[llvm] [RISCV][llvm] Support fminimum, fmaximum, fminnum, fmaxnum, fminimumnum, fmaximumnum codegen for zvfbfa (PR #171794)
Brandon Wu via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 11 02:16:28 PST 2025
https://github.com/4vtomat created https://github.com/llvm/llvm-project/pull/171794
This patch supports for both scalable vector and fixed-length vector.
It also enables fsetcc pattern match for zvfbfa to make fminimum and
fmaximum work correctly.
>From 120e35994ad43c65143d64d1e9b3fa3504e2d76f Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Thu, 11 Dec 2025 01:08:11 -0800
Subject: [PATCH 1/2] pre-commit test
---
.../RISCV/rvv/fixed-vectors-fmaximum.ll | 378 +++-
.../RISCV/rvv/fixed-vectors-fmaximumnum.ll | 1537 +++++++++++++++++
.../RISCV/rvv/fixed-vectors-fminimum.ll | 378 +++-
.../RISCV/rvv/fixed-vectors-fminimumnum.ll | 1537 +++++++++++++++++
.../CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll | 660 ++++++-
.../CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll | 660 ++++++-
.../test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll | 778 +++++++--
.../CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll | 927 ++++++++--
.../test/CodeGen/RISCV/rvv/fminimum-sdnode.ll | 778 +++++++--
.../CodeGen/RISCV/rvv/fminimumnum-sdnode.ll | 927 ++++++++--
llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll | 874 ++++++++--
llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll | 874 ++++++++--
12 files changed, 9314 insertions(+), 994 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
index 6ee2e204bcfe3..299361374f2f4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
@@ -1,12 +1,236 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+define <2 x bfloat> @vfmax_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v2bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT: vfmax.vv v9, v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v2bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v2bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.maximum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+ ret <2 x bfloat> %v
+}
+
+define <4 x bfloat> @vfmax_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v4bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT: vfmax.vv v9, v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v4bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v4bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.maximum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
+ ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @vfmax_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v8bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFH-NEXT: vmfeq.vv v0, v12, v12
+; ZVFH-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFH-NEXT: vfmax.vv v10, v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v8bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v8bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFBFA-NEXT: vfmax.vv v10, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.maximum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+ ret <8 x bfloat> %v
+}
+
+define <16 x bfloat> @vfmax_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v16bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v12, v12
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v10
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFH-NEXT: vmfeq.vv v0, v16, v16
+; ZVFH-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFH-NEXT: vfmax.vv v12, v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v16bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16
+; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v16bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v16, v16
+; ZVFBFA-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFBFA-NEXT: vfmax.vv v12, v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
+ %v = call <16 x bfloat> @llvm.maximum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
+ ret <16 x bfloat> %v
+}
define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
; ZVFH-LABEL: vfmax_v2f16_vv:
@@ -35,6 +259,23 @@ define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v2f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
ret <2 x half> %v
}
@@ -66,6 +307,23 @@ define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v4f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b)
ret <4 x half> %v
}
@@ -97,6 +355,23 @@ define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v8f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFBFA-NEXT: vfmax.vv v10, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%v = call <8 x half> @llvm.maximum.v8f16(<8 x half> %a, <8 x half> %b)
ret <8 x half> %v
}
@@ -128,6 +403,23 @@ define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v16f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v16, v16
+; ZVFBFA-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFBFA-NEXT: vfmax.vv v12, v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%v = call <16 x half> @llvm.maximum.v16f16(<16 x half> %a, <16 x half> %b)
ret <16 x half> %v
}
@@ -262,6 +554,17 @@ define <2 x half> @vfmax_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v2f16_vv_nnan:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call nnan <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
ret <2 x half> %v
}
@@ -295,6 +598,26 @@ define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v2f16_vv_nnana:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfadd.vv v9, v10, v10
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v9, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%c = fadd nnan <2 x half> %a, %a
%v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %c, <2 x half> %b)
ret <2 x half> %v
@@ -329,6 +652,26 @@ define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v2f16_vv_nnanb:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfadd.vv v8, v10, v10
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v10, v9, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v9, v0
+; ZVFBFA-NEXT: vfmax.vv v9, v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%c = fadd nnan <2 x half> %b, %b
%v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %c)
ret <2 x half> %v
@@ -373,6 +716,33 @@ define <4 x half> @vfmax_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v2f16_vv_nnan_insert_subvector:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v11, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfadd.vv v9, v11, v11
+; ZVFBFA-NEXT: vfadd.vv v8, v8, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v11, v9
+; ZVFBFA-NEXT: vfncvt.f.f.w v9, v8
+; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vslideup.vi v11, v9, 2
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v11
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v9, v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%d = fadd nnan <2 x half> %a, %a
%e = fadd nnan <2 x half> %b, %b
%f = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> poison, <2 x half> %d, i64 0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll
index c8cea368f905e..1a5e5af61b8ef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll
@@ -1,6 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfhmin,+zfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
define <2 x double> @max_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: max_v2f64:
@@ -107,6 +111,17 @@ define <2 x half> @max_v2f16(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: max_v2f16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
entry:
%c = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> %a, <2 x half> %b)
ret <2 x half> %c
@@ -129,6 +144,17 @@ define <4 x half> @max_v4f16(<4 x half> %a, <4 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: max_v4f16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
entry:
%c = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> %a, <4 x half> %b)
ret <4 x half> %c
@@ -151,6 +177,17 @@ define <8 x half> @max_v8f16(<8 x half> %a, <8 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: max_v8f16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v10, v12, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
entry:
%c = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> %a, <8 x half> %b)
ret <8 x half> %c
@@ -173,6 +210,17 @@ define <9 x half> @max_v9f16(<9 x half> %a, <9 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: max_v9f16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
entry:
%c = call <9 x half> @llvm.maximumnum.v9f16(<9 x half> %a, <9 x half> %b)
ret <9 x half> %c
@@ -195,7 +243,1496 @@ define <16 x half> @max_v16f16(<16 x half> %a, <16 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: max_v16f16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
entry:
%c = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> %a, <16 x half> %b)
ret <16 x half> %c
}
+
+define <2 x bfloat> @max_v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) {
+; ZVFH-LABEL: max_v2bf16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: addi sp, sp, -32
+; ZVFH-NEXT: .cfi_def_cfa_offset 32
+; ZVFH-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: .cfi_offset ra, -8
+; ZVFH-NEXT: .cfi_offset s0, -16
+; ZVFH-NEXT: .cfi_offset s1, -24
+; ZVFH-NEXT: .cfi_offset s2, -32
+; ZVFH-NEXT: fmv.x.w s0, fa0
+; ZVFH-NEXT: fmv.x.w s1, fa2
+; ZVFH-NEXT: fmv.x.w a0, fa1
+; ZVFH-NEXT: fmv.x.w a1, fa3
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: fmv.w.x fa5, a1
+; ZVFH-NEXT: fmv.w.x fa4, a0
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: slli s1, s1, 16
+; ZVFH-NEXT: slli s0, s0, 16
+; ZVFH-NEXT: slli s2, a0, 16
+; ZVFH-NEXT: fmv.w.x fa5, s1
+; ZVFH-NEXT: fmv.w.x fa4, s0
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: slli a0, a0, 48
+; ZVFH-NEXT: srli a0, a0, 48
+; ZVFH-NEXT: or a0, a0, s2
+; ZVFH-NEXT: lui a1, 1048560
+; ZVFH-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; ZVFH-NEXT: vmv.s.x v8, a0
+; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFH-NEXT: vmv.x.s a0, v8
+; ZVFH-NEXT: vslidedown.vi v8, v8, 1
+; ZVFH-NEXT: or a0, a0, a1
+; ZVFH-NEXT: fmv.w.x fa0, a0
+; ZVFH-NEXT: vmv.x.s a0, v8
+; ZVFH-NEXT: or a0, a0, a1
+; ZVFH-NEXT: fmv.w.x fa1, a0
+; ZVFH-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: .cfi_restore ra
+; ZVFH-NEXT: .cfi_restore s0
+; ZVFH-NEXT: .cfi_restore s1
+; ZVFH-NEXT: .cfi_restore s2
+; ZVFH-NEXT: addi sp, sp, 32
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: max_v2bf16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: addi sp, sp, -32
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: .cfi_offset ra, -8
+; ZVFHMIN-NEXT: .cfi_offset s0, -16
+; ZVFHMIN-NEXT: .cfi_offset s1, -24
+; ZVFHMIN-NEXT: .cfi_offset s2, -32
+; ZVFHMIN-NEXT: fmv.x.w s0, fa0
+; ZVFHMIN-NEXT: fmv.x.w s1, fa2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa1
+; ZVFHMIN-NEXT: fmv.x.w a1, fa3
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: fmv.w.x fa4, a0
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: slli s1, s1, 16
+; ZVFHMIN-NEXT: slli s0, s0, 16
+; ZVFHMIN-NEXT: slli s2, a0, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s1
+; ZVFHMIN-NEXT: fmv.w.x fa4, s0
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: slli a0, a0, 48
+; ZVFHMIN-NEXT: srli a0, a0, 48
+; ZVFHMIN-NEXT: or a0, a0, s2
+; ZVFHMIN-NEXT: lui a1, 1048560
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.s.x v8, a0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.x.s a0, v8
+; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; ZVFHMIN-NEXT: or a0, a0, a1
+; ZVFHMIN-NEXT: fmv.w.x fa0, a0
+; ZVFHMIN-NEXT: vmv.x.s a0, v8
+; ZVFHMIN-NEXT: or a0, a0, a1
+; ZVFHMIN-NEXT: fmv.w.x fa1, a0
+; ZVFHMIN-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: .cfi_restore ra
+; ZVFHMIN-NEXT: .cfi_restore s0
+; ZVFHMIN-NEXT: .cfi_restore s1
+; ZVFHMIN-NEXT: .cfi_restore s2
+; ZVFHMIN-NEXT: addi sp, sp, 32
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: max_v2bf16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+entry:
+ %c = call <2 x bfloat> @llvm.maximumnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+ ret <2 x bfloat> %c
+}
+
+define <4 x bfloat> @max_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) {
+; ZVFH-LABEL: max_v4bf16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: addi sp, sp, -80
+; ZVFH-NEXT: .cfi_def_cfa_offset 80
+; ZVFH-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s2, 48(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s3, 40(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s4, 32(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s5, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s6, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s7, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: .cfi_offset ra, -8
+; ZVFH-NEXT: .cfi_offset s0, -16
+; ZVFH-NEXT: .cfi_offset s1, -24
+; ZVFH-NEXT: .cfi_offset s2, -32
+; ZVFH-NEXT: .cfi_offset s3, -40
+; ZVFH-NEXT: .cfi_offset s4, -48
+; ZVFH-NEXT: .cfi_offset s5, -56
+; ZVFH-NEXT: .cfi_offset s6, -64
+; ZVFH-NEXT: .cfi_offset s7, -72
+; ZVFH-NEXT: mv s0, a0
+; ZVFH-NEXT: fmv.x.w s1, fa3
+; ZVFH-NEXT: fmv.x.w s2, fa7
+; ZVFH-NEXT: fmv.x.w s3, fa2
+; ZVFH-NEXT: fmv.x.w s4, fa6
+; ZVFH-NEXT: fmv.x.w s5, fa1
+; ZVFH-NEXT: fmv.x.w s6, fa5
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: fmv.x.w a1, fa4
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: fmv.w.x fa5, a1
+; ZVFH-NEXT: fmv.w.x fa4, a0
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s7, fa0
+; ZVFH-NEXT: slli s6, s6, 16
+; ZVFH-NEXT: slli s5, s5, 16
+; ZVFH-NEXT: fmv.w.x fa5, s6
+; ZVFH-NEXT: fmv.w.x fa4, s5
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s5, fa0
+; ZVFH-NEXT: slli s4, s4, 16
+; ZVFH-NEXT: slli s3, s3, 16
+; ZVFH-NEXT: fmv.w.x fa5, s4
+; ZVFH-NEXT: fmv.w.x fa4, s3
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s3, fa0
+; ZVFH-NEXT: slli s2, s2, 16
+; ZVFH-NEXT: slli s1, s1, 16
+; ZVFH-NEXT: fmv.w.x fa5, s2
+; ZVFH-NEXT: fmv.w.x fa4, s1
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: sh s7, 0(s0)
+; ZVFH-NEXT: sh s5, 2(s0)
+; ZVFH-NEXT: sh s3, 4(s0)
+; ZVFH-NEXT: sh a0, 6(s0)
+; ZVFH-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s5, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s6, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s7, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: .cfi_restore ra
+; ZVFH-NEXT: .cfi_restore s0
+; ZVFH-NEXT: .cfi_restore s1
+; ZVFH-NEXT: .cfi_restore s2
+; ZVFH-NEXT: .cfi_restore s3
+; ZVFH-NEXT: .cfi_restore s4
+; ZVFH-NEXT: .cfi_restore s5
+; ZVFH-NEXT: .cfi_restore s6
+; ZVFH-NEXT: .cfi_restore s7
+; ZVFH-NEXT: addi sp, sp, 80
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: max_v4bf16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: addi sp, sp, -80
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s2, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s3, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s4, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s5, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s6, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s7, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: .cfi_offset ra, -8
+; ZVFHMIN-NEXT: .cfi_offset s0, -16
+; ZVFHMIN-NEXT: .cfi_offset s1, -24
+; ZVFHMIN-NEXT: .cfi_offset s2, -32
+; ZVFHMIN-NEXT: .cfi_offset s3, -40
+; ZVFHMIN-NEXT: .cfi_offset s4, -48
+; ZVFHMIN-NEXT: .cfi_offset s5, -56
+; ZVFHMIN-NEXT: .cfi_offset s6, -64
+; ZVFHMIN-NEXT: .cfi_offset s7, -72
+; ZVFHMIN-NEXT: mv s0, a0
+; ZVFHMIN-NEXT: fmv.x.w s1, fa3
+; ZVFHMIN-NEXT: fmv.x.w s2, fa7
+; ZVFHMIN-NEXT: fmv.x.w s3, fa2
+; ZVFHMIN-NEXT: fmv.x.w s4, fa6
+; ZVFHMIN-NEXT: fmv.x.w s5, fa1
+; ZVFHMIN-NEXT: fmv.x.w s6, fa5
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: fmv.x.w a1, fa4
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: fmv.w.x fa4, a0
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s7, fa0
+; ZVFHMIN-NEXT: slli s6, s6, 16
+; ZVFHMIN-NEXT: slli s5, s5, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s6
+; ZVFHMIN-NEXT: fmv.w.x fa4, s5
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s5, fa0
+; ZVFHMIN-NEXT: slli s4, s4, 16
+; ZVFHMIN-NEXT: slli s3, s3, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s4
+; ZVFHMIN-NEXT: fmv.w.x fa4, s3
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s3, fa0
+; ZVFHMIN-NEXT: slli s2, s2, 16
+; ZVFHMIN-NEXT: slli s1, s1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s2
+; ZVFHMIN-NEXT: fmv.w.x fa4, s1
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: sh s7, 0(s0)
+; ZVFHMIN-NEXT: sh s5, 2(s0)
+; ZVFHMIN-NEXT: sh s3, 4(s0)
+; ZVFHMIN-NEXT: sh a0, 6(s0)
+; ZVFHMIN-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s5, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s6, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s7, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: .cfi_restore ra
+; ZVFHMIN-NEXT: .cfi_restore s0
+; ZVFHMIN-NEXT: .cfi_restore s1
+; ZVFHMIN-NEXT: .cfi_restore s2
+; ZVFHMIN-NEXT: .cfi_restore s3
+; ZVFHMIN-NEXT: .cfi_restore s4
+; ZVFHMIN-NEXT: .cfi_restore s5
+; ZVFHMIN-NEXT: .cfi_restore s6
+; ZVFHMIN-NEXT: .cfi_restore s7
+; ZVFHMIN-NEXT: addi sp, sp, 80
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: max_v4bf16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+entry:
+ %c = call <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
+ ret <4 x bfloat> %c
+}
+
+define <8 x bfloat> @max_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) {
+; ZVFH-LABEL: max_v8bf16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: addi sp, sp, -128
+; ZVFH-NEXT: .cfi_def_cfa_offset 128
+; ZVFH-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s1, 104(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s2, 96(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s3, 88(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s4, 80(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s5, 72(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s6, 64(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s7, 56(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s8, 48(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s9, 40(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s10, 32(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s11, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: .cfi_offset ra, -8
+; ZVFH-NEXT: .cfi_offset s0, -16
+; ZVFH-NEXT: .cfi_offset s1, -24
+; ZVFH-NEXT: .cfi_offset s2, -32
+; ZVFH-NEXT: .cfi_offset s3, -40
+; ZVFH-NEXT: .cfi_offset s4, -48
+; ZVFH-NEXT: .cfi_offset s5, -56
+; ZVFH-NEXT: .cfi_offset s6, -64
+; ZVFH-NEXT: .cfi_offset s7, -72
+; ZVFH-NEXT: .cfi_offset s8, -80
+; ZVFH-NEXT: .cfi_offset s9, -88
+; ZVFH-NEXT: .cfi_offset s10, -96
+; ZVFH-NEXT: .cfi_offset s11, -104
+; ZVFH-NEXT: sd a7, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: mv s2, a6
+; ZVFH-NEXT: mv s3, a5
+; ZVFH-NEXT: mv s4, a4
+; ZVFH-NEXT: mv s5, a3
+; ZVFH-NEXT: mv s6, a2
+; ZVFH-NEXT: mv s0, a0
+; ZVFH-NEXT: fmv.x.w a0, fa7
+; ZVFH-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: fmv.x.w s10, fa6
+; ZVFH-NEXT: fmv.x.w s11, fa5
+; ZVFH-NEXT: fmv.x.w s9, fa4
+; ZVFH-NEXT: fmv.x.w s1, fa3
+; ZVFH-NEXT: fmv.x.w s7, fa2
+; ZVFH-NEXT: fmv.x.w s8, fa1
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a1
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: fmv.w.x fa4, a0
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: sd a0, 0(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: slli s6, s6, 16
+; ZVFH-NEXT: slli s8, s8, 16
+; ZVFH-NEXT: fmv.w.x fa5, s6
+; ZVFH-NEXT: fmv.w.x fa4, s8
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s6, fa0
+; ZVFH-NEXT: slli s5, s5, 16
+; ZVFH-NEXT: slli s7, s7, 16
+; ZVFH-NEXT: fmv.w.x fa5, s5
+; ZVFH-NEXT: fmv.w.x fa4, s7
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s5, fa0
+; ZVFH-NEXT: slli s4, s4, 16
+; ZVFH-NEXT: slli s1, s1, 16
+; ZVFH-NEXT: fmv.w.x fa5, s4
+; ZVFH-NEXT: fmv.w.x fa4, s1
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s4, fa0
+; ZVFH-NEXT: slli s3, s3, 16
+; ZVFH-NEXT: slli s9, s9, 16
+; ZVFH-NEXT: fmv.w.x fa5, s3
+; ZVFH-NEXT: fmv.w.x fa4, s9
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s1, fa0
+; ZVFH-NEXT: slli s2, s2, 16
+; ZVFH-NEXT: slli s11, s11, 16
+; ZVFH-NEXT: fmv.w.x fa5, s2
+; ZVFH-NEXT: fmv.w.x fa4, s11
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s2, fa0
+; ZVFH-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: slli s10, s10, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, s10
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s3, fa0
+; ZVFH-NEXT: lhu a0, 128(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: sh s1, 8(s0)
+; ZVFH-NEXT: sh s2, 10(s0)
+; ZVFH-NEXT: sh s3, 12(s0)
+; ZVFH-NEXT: sh a0, 14(s0)
+; ZVFH-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: sh a0, 0(s0)
+; ZVFH-NEXT: sh s6, 2(s0)
+; ZVFH-NEXT: sh s5, 4(s0)
+; ZVFH-NEXT: sh s4, 6(s0)
+; ZVFH-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s2, 96(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s3, 88(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s4, 80(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s5, 72(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s6, 64(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s7, 56(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s8, 48(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s9, 40(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s10, 32(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s11, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: .cfi_restore ra
+; ZVFH-NEXT: .cfi_restore s0
+; ZVFH-NEXT: .cfi_restore s1
+; ZVFH-NEXT: .cfi_restore s2
+; ZVFH-NEXT: .cfi_restore s3
+; ZVFH-NEXT: .cfi_restore s4
+; ZVFH-NEXT: .cfi_restore s5
+; ZVFH-NEXT: .cfi_restore s6
+; ZVFH-NEXT: .cfi_restore s7
+; ZVFH-NEXT: .cfi_restore s8
+; ZVFH-NEXT: .cfi_restore s9
+; ZVFH-NEXT: .cfi_restore s10
+; ZVFH-NEXT: .cfi_restore s11
+; ZVFH-NEXT: addi sp, sp, 128
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: max_v8bf16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: addi sp, sp, -128
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 128
+; ZVFHMIN-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s1, 104(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s2, 96(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s3, 88(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s4, 80(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s5, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s6, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s7, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s8, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s9, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s10, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s11, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: .cfi_offset ra, -8
+; ZVFHMIN-NEXT: .cfi_offset s0, -16
+; ZVFHMIN-NEXT: .cfi_offset s1, -24
+; ZVFHMIN-NEXT: .cfi_offset s2, -32
+; ZVFHMIN-NEXT: .cfi_offset s3, -40
+; ZVFHMIN-NEXT: .cfi_offset s4, -48
+; ZVFHMIN-NEXT: .cfi_offset s5, -56
+; ZVFHMIN-NEXT: .cfi_offset s6, -64
+; ZVFHMIN-NEXT: .cfi_offset s7, -72
+; ZVFHMIN-NEXT: .cfi_offset s8, -80
+; ZVFHMIN-NEXT: .cfi_offset s9, -88
+; ZVFHMIN-NEXT: .cfi_offset s10, -96
+; ZVFHMIN-NEXT: .cfi_offset s11, -104
+; ZVFHMIN-NEXT: sd a7, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: mv s2, a6
+; ZVFHMIN-NEXT: mv s3, a5
+; ZVFHMIN-NEXT: mv s4, a4
+; ZVFHMIN-NEXT: mv s5, a3
+; ZVFHMIN-NEXT: mv s6, a2
+; ZVFHMIN-NEXT: mv s0, a0
+; ZVFHMIN-NEXT: fmv.x.w a0, fa7
+; ZVFHMIN-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: fmv.x.w s10, fa6
+; ZVFHMIN-NEXT: fmv.x.w s11, fa5
+; ZVFHMIN-NEXT: fmv.x.w s9, fa4
+; ZVFHMIN-NEXT: fmv.x.w s1, fa3
+; ZVFHMIN-NEXT: fmv.x.w s7, fa2
+; ZVFHMIN-NEXT: fmv.x.w s8, fa1
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: fmv.w.x fa4, a0
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: sd a0, 0(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: slli s6, s6, 16
+; ZVFHMIN-NEXT: slli s8, s8, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s6
+; ZVFHMIN-NEXT: fmv.w.x fa4, s8
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s6, fa0
+; ZVFHMIN-NEXT: slli s5, s5, 16
+; ZVFHMIN-NEXT: slli s7, s7, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s5
+; ZVFHMIN-NEXT: fmv.w.x fa4, s7
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s5, fa0
+; ZVFHMIN-NEXT: slli s4, s4, 16
+; ZVFHMIN-NEXT: slli s1, s1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s4
+; ZVFHMIN-NEXT: fmv.w.x fa4, s1
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s4, fa0
+; ZVFHMIN-NEXT: slli s3, s3, 16
+; ZVFHMIN-NEXT: slli s9, s9, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s3
+; ZVFHMIN-NEXT: fmv.w.x fa4, s9
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s1, fa0
+; ZVFHMIN-NEXT: slli s2, s2, 16
+; ZVFHMIN-NEXT: slli s11, s11, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s2
+; ZVFHMIN-NEXT: fmv.w.x fa4, s11
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s2, fa0
+; ZVFHMIN-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: slli s10, s10, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, s10
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s3, fa0
+; ZVFHMIN-NEXT: lhu a0, 128(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: sh s1, 8(s0)
+; ZVFHMIN-NEXT: sh s2, 10(s0)
+; ZVFHMIN-NEXT: sh s3, 12(s0)
+; ZVFHMIN-NEXT: sh a0, 14(s0)
+; ZVFHMIN-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: sh a0, 0(s0)
+; ZVFHMIN-NEXT: sh s6, 2(s0)
+; ZVFHMIN-NEXT: sh s5, 4(s0)
+; ZVFHMIN-NEXT: sh s4, 6(s0)
+; ZVFHMIN-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s2, 96(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s3, 88(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s4, 80(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s5, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s6, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s7, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s8, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s9, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s10, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s11, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: .cfi_restore ra
+; ZVFHMIN-NEXT: .cfi_restore s0
+; ZVFHMIN-NEXT: .cfi_restore s1
+; ZVFHMIN-NEXT: .cfi_restore s2
+; ZVFHMIN-NEXT: .cfi_restore s3
+; ZVFHMIN-NEXT: .cfi_restore s4
+; ZVFHMIN-NEXT: .cfi_restore s5
+; ZVFHMIN-NEXT: .cfi_restore s6
+; ZVFHMIN-NEXT: .cfi_restore s7
+; ZVFHMIN-NEXT: .cfi_restore s8
+; ZVFHMIN-NEXT: .cfi_restore s9
+; ZVFHMIN-NEXT: .cfi_restore s10
+; ZVFHMIN-NEXT: .cfi_restore s11
+; ZVFHMIN-NEXT: addi sp, sp, 128
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: max_v8bf16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v10, v12, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
+entry:
+ %c = call <8 x bfloat> @llvm.maximumnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+ ret <8 x bfloat> %c
+}
+
+define <9 x bfloat> @max_v9bf16(<9 x bfloat> %a, <9 x bfloat> %b) {
+; ZVFH-LABEL: max_v9bf16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: addi sp, sp, -128
+; ZVFH-NEXT: .cfi_def_cfa_offset 128
+; ZVFH-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s1, 104(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s2, 96(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s3, 88(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s4, 80(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s5, 72(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s6, 64(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s7, 56(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s8, 48(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s9, 40(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s10, 32(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s11, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: .cfi_offset ra, -8
+; ZVFH-NEXT: .cfi_offset s0, -16
+; ZVFH-NEXT: .cfi_offset s1, -24
+; ZVFH-NEXT: .cfi_offset s2, -32
+; ZVFH-NEXT: .cfi_offset s3, -40
+; ZVFH-NEXT: .cfi_offset s4, -48
+; ZVFH-NEXT: .cfi_offset s5, -56
+; ZVFH-NEXT: .cfi_offset s6, -64
+; ZVFH-NEXT: .cfi_offset s7, -72
+; ZVFH-NEXT: .cfi_offset s8, -80
+; ZVFH-NEXT: .cfi_offset s9, -88
+; ZVFH-NEXT: .cfi_offset s10, -96
+; ZVFH-NEXT: .cfi_offset s11, -104
+; ZVFH-NEXT: sd a7, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: mv s3, a6
+; ZVFH-NEXT: mv s5, a4
+; ZVFH-NEXT: mv s4, a3
+; ZVFH-NEXT: mv s6, a2
+; ZVFH-NEXT: sd a1, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: mv s0, a0
+; ZVFH-NEXT: fmv.x.w a0, fa5
+; ZVFH-NEXT: sd a0, 0(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: fmv.x.w s9, fa4
+; ZVFH-NEXT: fmv.x.w s10, fa6
+; ZVFH-NEXT: fmv.x.w s7, fa7
+; ZVFH-NEXT: fmv.x.w s11, fa1
+; ZVFH-NEXT: fmv.x.w s1, fa0
+; ZVFH-NEXT: fmv.x.w s2, fa2
+; ZVFH-NEXT: fmv.x.w a0, fa3
+; ZVFH-NEXT: slli a5, a5, 16
+; ZVFH-NEXT: fmv.w.x fa5, a5
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: fmv.w.x fa4, a0
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: slli s5, s5, 16
+; ZVFH-NEXT: slli s2, s2, 16
+; ZVFH-NEXT: slli s8, a0, 48
+; ZVFH-NEXT: fmv.w.x fa5, s5
+; ZVFH-NEXT: fmv.w.x fa4, s2
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: slli s6, s6, 16
+; ZVFH-NEXT: slli s1, s1, 16
+; ZVFH-NEXT: slli a0, a0, 48
+; ZVFH-NEXT: fmv.w.x fa5, s6
+; ZVFH-NEXT: fmv.w.x fa4, s1
+; ZVFH-NEXT: srli a0, a0, 16
+; ZVFH-NEXT: or s1, s8, a0
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: lui s5, 16
+; ZVFH-NEXT: slli s4, s4, 16
+; ZVFH-NEXT: slli s11, s11, 16
+; ZVFH-NEXT: addi s5, s5, -1
+; ZVFH-NEXT: fmv.w.x fa5, s4
+; ZVFH-NEXT: fmv.w.x fa4, s11
+; ZVFH-NEXT: and s2, a0, s5
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: lhu a1, 136(sp)
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: slli s7, s7, 16
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: fmv.w.x fa5, a1
+; ZVFH-NEXT: fmv.w.x fa4, s7
+; ZVFH-NEXT: or a0, s2, a0
+; ZVFH-NEXT: slli a0, a0, 32
+; ZVFH-NEXT: srli a0, a0, 32
+; ZVFH-NEXT: or s4, a0, s1
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: lhu a1, 128(sp)
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: slli s10, s10, 16
+; ZVFH-NEXT: slli s1, a0, 48
+; ZVFH-NEXT: fmv.w.x fa5, a1
+; ZVFH-NEXT: fmv.w.x fa4, s10
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: slli s3, s3, 16
+; ZVFH-NEXT: slli s9, s9, 16
+; ZVFH-NEXT: slli a0, a0, 48
+; ZVFH-NEXT: fmv.w.x fa5, s3
+; ZVFH-NEXT: fmv.w.x fa4, s9
+; ZVFH-NEXT: srli s2, a0, 16
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: ld a2, 0(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a2, a2, 16
+; ZVFH-NEXT: and s3, a0, s5
+; ZVFH-NEXT: fmv.w.x fa5, a1
+; ZVFH-NEXT: fmv.w.x fa4, a2
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: lhu a1, 144(sp)
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: ld a2, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a2, a2, 16
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: fmv.w.x fa5, a1
+; ZVFH-NEXT: fmv.w.x fa4, a2
+; ZVFH-NEXT: or a0, s3, a0
+; ZVFH-NEXT: slli a0, a0, 32
+; ZVFH-NEXT: srli a0, a0, 32
+; ZVFH-NEXT: or a0, a0, s1
+; ZVFH-NEXT: or s1, a0, s2
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVFH-NEXT: vmv.v.x v8, s1
+; ZVFH-NEXT: sh a0, 16(s0)
+; ZVFH-NEXT: vsetvli zero, zero, e64, m2, tu, ma
+; ZVFH-NEXT: vmv.s.x v8, s4
+; ZVFH-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; ZVFH-NEXT: vse64.v v8, (s0)
+; ZVFH-NEXT: vslidedown.vi v8, v8, 1
+; ZVFH-NEXT: addi s0, s0, 8
+; ZVFH-NEXT: vse64.v v8, (s0)
+; ZVFH-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s2, 96(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s3, 88(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s4, 80(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s5, 72(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s6, 64(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s7, 56(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s8, 48(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s9, 40(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s10, 32(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s11, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: .cfi_restore ra
+; ZVFH-NEXT: .cfi_restore s0
+; ZVFH-NEXT: .cfi_restore s1
+; ZVFH-NEXT: .cfi_restore s2
+; ZVFH-NEXT: .cfi_restore s3
+; ZVFH-NEXT: .cfi_restore s4
+; ZVFH-NEXT: .cfi_restore s5
+; ZVFH-NEXT: .cfi_restore s6
+; ZVFH-NEXT: .cfi_restore s7
+; ZVFH-NEXT: .cfi_restore s8
+; ZVFH-NEXT: .cfi_restore s9
+; ZVFH-NEXT: .cfi_restore s10
+; ZVFH-NEXT: .cfi_restore s11
+; ZVFH-NEXT: addi sp, sp, 128
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: max_v9bf16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: addi sp, sp, -128
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 128
+; ZVFHMIN-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s1, 104(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s2, 96(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s3, 88(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s4, 80(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s5, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s6, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s7, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s8, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s9, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s10, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s11, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: .cfi_offset ra, -8
+; ZVFHMIN-NEXT: .cfi_offset s0, -16
+; ZVFHMIN-NEXT: .cfi_offset s1, -24
+; ZVFHMIN-NEXT: .cfi_offset s2, -32
+; ZVFHMIN-NEXT: .cfi_offset s3, -40
+; ZVFHMIN-NEXT: .cfi_offset s4, -48
+; ZVFHMIN-NEXT: .cfi_offset s5, -56
+; ZVFHMIN-NEXT: .cfi_offset s6, -64
+; ZVFHMIN-NEXT: .cfi_offset s7, -72
+; ZVFHMIN-NEXT: .cfi_offset s8, -80
+; ZVFHMIN-NEXT: .cfi_offset s9, -88
+; ZVFHMIN-NEXT: .cfi_offset s10, -96
+; ZVFHMIN-NEXT: .cfi_offset s11, -104
+; ZVFHMIN-NEXT: sd a7, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: mv s3, a6
+; ZVFHMIN-NEXT: mv s5, a4
+; ZVFHMIN-NEXT: mv s4, a3
+; ZVFHMIN-NEXT: mv s6, a2
+; ZVFHMIN-NEXT: sd a1, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: mv s0, a0
+; ZVFHMIN-NEXT: fmv.x.w a0, fa5
+; ZVFHMIN-NEXT: sd a0, 0(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: fmv.x.w s9, fa4
+; ZVFHMIN-NEXT: fmv.x.w s10, fa6
+; ZVFHMIN-NEXT: fmv.x.w s7, fa7
+; ZVFHMIN-NEXT: fmv.x.w s11, fa1
+; ZVFHMIN-NEXT: fmv.x.w s1, fa0
+; ZVFHMIN-NEXT: fmv.x.w s2, fa2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa3
+; ZVFHMIN-NEXT: slli a5, a5, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a5
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: fmv.w.x fa4, a0
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: slli s5, s5, 16
+; ZVFHMIN-NEXT: slli s2, s2, 16
+; ZVFHMIN-NEXT: slli s8, a0, 48
+; ZVFHMIN-NEXT: fmv.w.x fa5, s5
+; ZVFHMIN-NEXT: fmv.w.x fa4, s2
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: slli s6, s6, 16
+; ZVFHMIN-NEXT: slli s1, s1, 16
+; ZVFHMIN-NEXT: slli a0, a0, 48
+; ZVFHMIN-NEXT: fmv.w.x fa5, s6
+; ZVFHMIN-NEXT: fmv.w.x fa4, s1
+; ZVFHMIN-NEXT: srli a0, a0, 16
+; ZVFHMIN-NEXT: or s1, s8, a0
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: lui s5, 16
+; ZVFHMIN-NEXT: slli s4, s4, 16
+; ZVFHMIN-NEXT: slli s11, s11, 16
+; ZVFHMIN-NEXT: addi s5, s5, -1
+; ZVFHMIN-NEXT: fmv.w.x fa5, s4
+; ZVFHMIN-NEXT: fmv.w.x fa4, s11
+; ZVFHMIN-NEXT: and s2, a0, s5
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: lhu a1, 136(sp)
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: slli s7, s7, 16
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: fmv.w.x fa4, s7
+; ZVFHMIN-NEXT: or a0, s2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 32
+; ZVFHMIN-NEXT: srli a0, a0, 32
+; ZVFHMIN-NEXT: or s4, a0, s1
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: lhu a1, 128(sp)
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: slli s10, s10, 16
+; ZVFHMIN-NEXT: slli s1, a0, 48
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: fmv.w.x fa4, s10
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: slli s3, s3, 16
+; ZVFHMIN-NEXT: slli s9, s9, 16
+; ZVFHMIN-NEXT: slli a0, a0, 48
+; ZVFHMIN-NEXT: fmv.w.x fa5, s3
+; ZVFHMIN-NEXT: fmv.w.x fa4, s9
+; ZVFHMIN-NEXT: srli s2, a0, 16
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: ld a2, 0(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a2, a2, 16
+; ZVFHMIN-NEXT: and s3, a0, s5
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: fmv.w.x fa4, a2
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: lhu a1, 144(sp)
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: ld a2, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a2, a2, 16
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: fmv.w.x fa4, a2
+; ZVFHMIN-NEXT: or a0, s3, a0
+; ZVFHMIN-NEXT: slli a0, a0, 32
+; ZVFHMIN-NEXT: srli a0, a0, 32
+; ZVFHMIN-NEXT: or a0, a0, s1
+; ZVFHMIN-NEXT: or s1, a0, s2
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v8, s1
+; ZVFHMIN-NEXT: sh a0, 16(s0)
+; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, tu, ma
+; ZVFHMIN-NEXT: vmv.s.x v8, s4
+; ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; ZVFHMIN-NEXT: vse64.v v8, (s0)
+; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; ZVFHMIN-NEXT: addi s0, s0, 8
+; ZVFHMIN-NEXT: vse64.v v8, (s0)
+; ZVFHMIN-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s2, 96(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s3, 88(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s4, 80(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s5, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s6, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s7, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s8, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s9, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s10, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s11, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: .cfi_restore ra
+; ZVFHMIN-NEXT: .cfi_restore s0
+; ZVFHMIN-NEXT: .cfi_restore s1
+; ZVFHMIN-NEXT: .cfi_restore s2
+; ZVFHMIN-NEXT: .cfi_restore s3
+; ZVFHMIN-NEXT: .cfi_restore s4
+; ZVFHMIN-NEXT: .cfi_restore s5
+; ZVFHMIN-NEXT: .cfi_restore s6
+; ZVFHMIN-NEXT: .cfi_restore s7
+; ZVFHMIN-NEXT: .cfi_restore s8
+; ZVFHMIN-NEXT: .cfi_restore s9
+; ZVFHMIN-NEXT: .cfi_restore s10
+; ZVFHMIN-NEXT: .cfi_restore s11
+; ZVFHMIN-NEXT: addi sp, sp, 128
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: max_v9bf16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
+entry:
+ %c = call <9 x bfloat> @llvm.maximumnum.v9bf16(<9 x bfloat> %a, <9 x bfloat> %b)
+ ret <9 x bfloat> %c
+}
+
+define <16 x bfloat> @max_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
+; ZVFH-LABEL: max_v16bf16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: addi sp, sp, -192
+; ZVFH-NEXT: .cfi_def_cfa_offset 192
+; ZVFH-NEXT: sd ra, 184(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s1, 168(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s2, 160(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s3, 152(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s4, 144(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s5, 136(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s6, 128(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s7, 120(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s8, 112(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s9, 104(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s10, 96(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s11, 88(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: .cfi_offset ra, -8
+; ZVFH-NEXT: .cfi_offset s0, -16
+; ZVFH-NEXT: .cfi_offset s1, -24
+; ZVFH-NEXT: .cfi_offset s2, -32
+; ZVFH-NEXT: .cfi_offset s3, -40
+; ZVFH-NEXT: .cfi_offset s4, -48
+; ZVFH-NEXT: .cfi_offset s5, -56
+; ZVFH-NEXT: .cfi_offset s6, -64
+; ZVFH-NEXT: .cfi_offset s7, -72
+; ZVFH-NEXT: .cfi_offset s8, -80
+; ZVFH-NEXT: .cfi_offset s9, -88
+; ZVFH-NEXT: .cfi_offset s10, -96
+; ZVFH-NEXT: .cfi_offset s11, -104
+; ZVFH-NEXT: sd a7, 80(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd a6, 64(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd a5, 48(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd a4, 32(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd a3, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd a2, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: mv s7, a1
+; ZVFH-NEXT: mv s0, a0
+; ZVFH-NEXT: fmv.x.w s11, fa7
+; ZVFH-NEXT: fmv.x.w s2, fa6
+; ZVFH-NEXT: fmv.x.w s8, fa5
+; ZVFH-NEXT: fmv.x.w s1, fa4
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: lhu s9, 224(sp)
+; ZVFH-NEXT: lhu a1, 200(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: lhu s4, 216(sp)
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: lhu s3, 208(sp)
+; ZVFH-NEXT: fmax.s fa0, fa5, fa4
+; ZVFH-NEXT: fmv.x.w s5, fa3
+; ZVFH-NEXT: fmv.x.w s6, fa2
+; ZVFH-NEXT: fmv.x.w s10, fa1
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: sd a0, 72(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: slli s3, s3, 16
+; ZVFH-NEXT: slli s10, s10, 16
+; ZVFH-NEXT: fmv.w.x fa5, s3
+; ZVFH-NEXT: fmv.w.x fa4, s10
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: sd a0, 56(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: slli s4, s4, 16
+; ZVFH-NEXT: slli s6, s6, 16
+; ZVFH-NEXT: fmv.w.x fa5, s4
+; ZVFH-NEXT: fmv.w.x fa4, s6
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: slli s9, s9, 16
+; ZVFH-NEXT: slli s5, s5, 16
+; ZVFH-NEXT: fmv.w.x fa5, s9
+; ZVFH-NEXT: fmv.w.x fa4, s5
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: lhu a0, 232(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: slli s1, s1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, s1
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s1, fa0
+; ZVFH-NEXT: lhu a0, 240(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: slli s8, s8, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, s8
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s8, fa0
+; ZVFH-NEXT: lhu a0, 248(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: slli s2, s2, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, s2
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s2, fa0
+; ZVFH-NEXT: lhu a0, 256(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: slli s11, s11, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, s11
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s11, fa0
+; ZVFH-NEXT: lhu a0, 264(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: slli s7, s7, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, s7
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s7, fa0
+; ZVFH-NEXT: lhu a0, 272(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s6, fa0
+; ZVFH-NEXT: lhu a0, 280(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: ld a1, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s3, fa0
+; ZVFH-NEXT: lhu a0, 288(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s4, fa0
+; ZVFH-NEXT: lhu a0, 296(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: ld a1, 48(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s5, fa0
+; ZVFH-NEXT: lhu a0, 304(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: ld a1, 64(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s10, fa0
+; ZVFH-NEXT: lhu a0, 312(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: ld a1, 80(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s9, fa0
+; ZVFH-NEXT: lhu a0, 320(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: lhu a1, 192(sp)
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: fmax.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: sh s7, 16(s0)
+; ZVFH-NEXT: sh s6, 18(s0)
+; ZVFH-NEXT: sh s3, 20(s0)
+; ZVFH-NEXT: sh s4, 22(s0)
+; ZVFH-NEXT: sh s1, 8(s0)
+; ZVFH-NEXT: sh s8, 10(s0)
+; ZVFH-NEXT: sh s2, 12(s0)
+; ZVFH-NEXT: sh s11, 14(s0)
+; ZVFH-NEXT: sh s5, 24(s0)
+; ZVFH-NEXT: sh s10, 26(s0)
+; ZVFH-NEXT: sh s9, 28(s0)
+; ZVFH-NEXT: sh a0, 30(s0)
+; ZVFH-NEXT: ld a0, 72(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: sh a0, 0(s0)
+; ZVFH-NEXT: ld a0, 56(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: sh a0, 2(s0)
+; ZVFH-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: sh a0, 4(s0)
+; ZVFH-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: sh a0, 6(s0)
+; ZVFH-NEXT: ld ra, 184(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s1, 168(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s2, 160(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s3, 152(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s4, 144(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s5, 136(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s6, 128(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s7, 120(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s8, 112(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s9, 104(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s10, 96(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s11, 88(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: .cfi_restore ra
+; ZVFH-NEXT: .cfi_restore s0
+; ZVFH-NEXT: .cfi_restore s1
+; ZVFH-NEXT: .cfi_restore s2
+; ZVFH-NEXT: .cfi_restore s3
+; ZVFH-NEXT: .cfi_restore s4
+; ZVFH-NEXT: .cfi_restore s5
+; ZVFH-NEXT: .cfi_restore s6
+; ZVFH-NEXT: .cfi_restore s7
+; ZVFH-NEXT: .cfi_restore s8
+; ZVFH-NEXT: .cfi_restore s9
+; ZVFH-NEXT: .cfi_restore s10
+; ZVFH-NEXT: .cfi_restore s11
+; ZVFH-NEXT: addi sp, sp, 192
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: max_v16bf16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: addi sp, sp, -192
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 192
+; ZVFHMIN-NEXT: sd ra, 184(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s1, 168(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s2, 160(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s3, 152(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s4, 144(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s5, 136(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s6, 128(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s7, 120(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s8, 112(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s9, 104(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s10, 96(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s11, 88(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: .cfi_offset ra, -8
+; ZVFHMIN-NEXT: .cfi_offset s0, -16
+; ZVFHMIN-NEXT: .cfi_offset s1, -24
+; ZVFHMIN-NEXT: .cfi_offset s2, -32
+; ZVFHMIN-NEXT: .cfi_offset s3, -40
+; ZVFHMIN-NEXT: .cfi_offset s4, -48
+; ZVFHMIN-NEXT: .cfi_offset s5, -56
+; ZVFHMIN-NEXT: .cfi_offset s6, -64
+; ZVFHMIN-NEXT: .cfi_offset s7, -72
+; ZVFHMIN-NEXT: .cfi_offset s8, -80
+; ZVFHMIN-NEXT: .cfi_offset s9, -88
+; ZVFHMIN-NEXT: .cfi_offset s10, -96
+; ZVFHMIN-NEXT: .cfi_offset s11, -104
+; ZVFHMIN-NEXT: sd a7, 80(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd a6, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd a5, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd a4, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd a3, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd a2, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: mv s7, a1
+; ZVFHMIN-NEXT: mv s0, a0
+; ZVFHMIN-NEXT: fmv.x.w s11, fa7
+; ZVFHMIN-NEXT: fmv.x.w s2, fa6
+; ZVFHMIN-NEXT: fmv.x.w s8, fa5
+; ZVFHMIN-NEXT: fmv.x.w s1, fa4
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: lhu s9, 224(sp)
+; ZVFHMIN-NEXT: lhu a1, 200(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: lhu s4, 216(sp)
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: lhu s3, 208(sp)
+; ZVFHMIN-NEXT: fmax.s fa0, fa5, fa4
+; ZVFHMIN-NEXT: fmv.x.w s5, fa3
+; ZVFHMIN-NEXT: fmv.x.w s6, fa2
+; ZVFHMIN-NEXT: fmv.x.w s10, fa1
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: sd a0, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: slli s3, s3, 16
+; ZVFHMIN-NEXT: slli s10, s10, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s3
+; ZVFHMIN-NEXT: fmv.w.x fa4, s10
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: sd a0, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: slli s4, s4, 16
+; ZVFHMIN-NEXT: slli s6, s6, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s4
+; ZVFHMIN-NEXT: fmv.w.x fa4, s6
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: slli s9, s9, 16
+; ZVFHMIN-NEXT: slli s5, s5, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s9
+; ZVFHMIN-NEXT: fmv.w.x fa4, s5
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: lhu a0, 232(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: slli s1, s1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, s1
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s1, fa0
+; ZVFHMIN-NEXT: lhu a0, 240(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: slli s8, s8, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, s8
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s8, fa0
+; ZVFHMIN-NEXT: lhu a0, 248(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: slli s2, s2, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, s2
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s2, fa0
+; ZVFHMIN-NEXT: lhu a0, 256(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: slli s11, s11, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, s11
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s11, fa0
+; ZVFHMIN-NEXT: lhu a0, 264(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: slli s7, s7, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, s7
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s7, fa0
+; ZVFHMIN-NEXT: lhu a0, 272(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s6, fa0
+; ZVFHMIN-NEXT: lhu a0, 280(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: ld a1, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s3, fa0
+; ZVFHMIN-NEXT: lhu a0, 288(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s4, fa0
+; ZVFHMIN-NEXT: lhu a0, 296(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: ld a1, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s5, fa0
+; ZVFHMIN-NEXT: lhu a0, 304(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: ld a1, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s10, fa0
+; ZVFHMIN-NEXT: lhu a0, 312(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: ld a1, 80(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s9, fa0
+; ZVFHMIN-NEXT: lhu a0, 320(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: lhu a1, 192(sp)
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: fmax.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: sh s7, 16(s0)
+; ZVFHMIN-NEXT: sh s6, 18(s0)
+; ZVFHMIN-NEXT: sh s3, 20(s0)
+; ZVFHMIN-NEXT: sh s4, 22(s0)
+; ZVFHMIN-NEXT: sh s1, 8(s0)
+; ZVFHMIN-NEXT: sh s8, 10(s0)
+; ZVFHMIN-NEXT: sh s2, 12(s0)
+; ZVFHMIN-NEXT: sh s11, 14(s0)
+; ZVFHMIN-NEXT: sh s5, 24(s0)
+; ZVFHMIN-NEXT: sh s10, 26(s0)
+; ZVFHMIN-NEXT: sh s9, 28(s0)
+; ZVFHMIN-NEXT: sh a0, 30(s0)
+; ZVFHMIN-NEXT: ld a0, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: sh a0, 0(s0)
+; ZVFHMIN-NEXT: ld a0, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: sh a0, 2(s0)
+; ZVFHMIN-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: sh a0, 4(s0)
+; ZVFHMIN-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: sh a0, 6(s0)
+; ZVFHMIN-NEXT: ld ra, 184(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s1, 168(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s2, 160(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s3, 152(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s4, 144(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s5, 136(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s6, 128(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s7, 120(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s8, 112(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s9, 104(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s10, 96(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s11, 88(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: .cfi_restore ra
+; ZVFHMIN-NEXT: .cfi_restore s0
+; ZVFHMIN-NEXT: .cfi_restore s1
+; ZVFHMIN-NEXT: .cfi_restore s2
+; ZVFHMIN-NEXT: .cfi_restore s3
+; ZVFHMIN-NEXT: .cfi_restore s4
+; ZVFHMIN-NEXT: .cfi_restore s5
+; ZVFHMIN-NEXT: .cfi_restore s6
+; ZVFHMIN-NEXT: .cfi_restore s7
+; ZVFHMIN-NEXT: .cfi_restore s8
+; ZVFHMIN-NEXT: .cfi_restore s9
+; ZVFHMIN-NEXT: .cfi_restore s10
+; ZVFHMIN-NEXT: .cfi_restore s11
+; ZVFHMIN-NEXT: addi sp, sp, 192
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: max_v16bf16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
+entry:
+ %c = call <16 x bfloat> @llvm.maximumnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
+ ret <16 x bfloat> %c
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
index a95177a1de9a6..bcd86e5237918 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
@@ -1,12 +1,236 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+define <2 x bfloat> @vfmin_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) {
+; ZVFH-LABEL: vfmin_v2bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT: vfmin.vv v9, v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v2bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v2bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmin.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.minimum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+ ret <2 x bfloat> %v
+}
+
+define <4 x bfloat> @vfmin_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) {
+; ZVFH-LABEL: vfmin_v4bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT: vfmin.vv v9, v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v4bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v4bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmin.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.minimum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
+ ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @vfmin_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) {
+; ZVFH-LABEL: vfmin_v8bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFH-NEXT: vmfeq.vv v0, v12, v12
+; ZVFH-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFH-NEXT: vfmin.vv v10, v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v8bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v8bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFBFA-NEXT: vfmin.vv v10, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.minimum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+ ret <8 x bfloat> %v
+}
+
+define <16 x bfloat> @vfmin_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) {
+; ZVFH-LABEL: vfmin_v16bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v12, v12
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v10
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFH-NEXT: vmfeq.vv v0, v16, v16
+; ZVFH-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFH-NEXT: vfmin.vv v12, v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v16bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16
+; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v16bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v16, v16
+; ZVFBFA-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFBFA-NEXT: vfmin.vv v12, v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
+ %v = call <16 x bfloat> @llvm.minimum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
+ ret <16 x bfloat> %v
+}
define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) {
; ZVFH-LABEL: vfmin_v2f16_vv:
@@ -35,6 +259,23 @@ define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v2f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmin.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
ret <2 x half> %v
}
@@ -66,6 +307,23 @@ define <4 x half> @vfmin_v4f16_vv(<4 x half> %a, <4 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v4f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmin.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b)
ret <4 x half> %v
}
@@ -97,6 +355,23 @@ define <8 x half> @vfmin_v8f16_vv(<8 x half> %a, <8 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v8f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFBFA-NEXT: vfmin.vv v10, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%v = call <8 x half> @llvm.minimum.v8f16(<8 x half> %a, <8 x half> %b)
ret <8 x half> %v
}
@@ -128,6 +403,23 @@ define <16 x half> @vfmin_v16f16_vv(<16 x half> %a, <16 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v16f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v16, v16
+; ZVFBFA-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFBFA-NEXT: vfmin.vv v12, v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%v = call <16 x half> @llvm.minimum.v16f16(<16 x half> %a, <16 x half> %b)
ret <16 x half> %v
}
@@ -262,6 +554,17 @@ define <2 x half> @vfmin_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v2f16_vv_nnan:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call nnan <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
ret <2 x half> %v
}
@@ -295,6 +598,26 @@ define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v2f16_vv_nnana:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfadd.vv v9, v10, v10
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmin.vv v9, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%c = fadd nnan <2 x half> %a, %a
%v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %c, <2 x half> %b)
ret <2 x half> %v
@@ -329,6 +652,26 @@ define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v2f16_vv_nnanb:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfadd.vv v8, v10, v10
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v10, v9, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v9, v0
+; ZVFBFA-NEXT: vfmin.vv v9, v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%c = fadd nnan <2 x half> %b, %b
%v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %c)
ret <2 x half> %v
@@ -373,6 +716,33 @@ define <4 x half> @vfmin_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v2f16_vv_nnan_insert_subvector:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v11, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfadd.vv v9, v11, v11
+; ZVFBFA-NEXT: vfadd.vv v8, v8, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v11, v9
+; ZVFBFA-NEXT: vfncvt.f.f.w v9, v8
+; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vslideup.vi v11, v9, 2
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v11
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmin.vv v9, v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%d = fadd nnan <2 x half> %a, %a
%e = fadd nnan <2 x half> %b, %b
%f = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> poison, <2 x half> %d, i64 0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll
index 36114d56aa0d6..eed806da19601 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll
@@ -1,6 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
; RUN: llc --mtriple=riscv64-linux-gnu --mattr=+v,+zvfhmin,+zfh < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
define <2 x double> @min_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: min_v2f64:
@@ -107,6 +111,17 @@ define <2 x half> @min_v2f16(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: min_v2f16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
entry:
%c = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> %a, <2 x half> %b)
ret <2 x half> %c
@@ -129,6 +144,17 @@ define <4 x half> @min_v4f16(<4 x half> %a, <4 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: min_v4f16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
entry:
%c = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> %a, <4 x half> %b)
ret <4 x half> %c
@@ -151,6 +177,17 @@ define <8 x half> @min_v8f16(<8 x half> %a, <8 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: min_v8f16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v10, v12, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
entry:
%c = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> %a, <8 x half> %b)
ret <8 x half> %c
@@ -173,6 +210,17 @@ define <9 x half> @min_v9f16(<9 x half> %a, <9 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: min_v9f16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
entry:
%c = call <9 x half> @llvm.minimumnum.v9f16(<9 x half> %a, <9 x half> %b)
ret <9 x half> %c
@@ -195,7 +243,1496 @@ define <16 x half> @min_v16f16(<16 x half> %a, <16 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: min_v16f16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
entry:
%c = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> %a, <16 x half> %b)
ret <16 x half> %c
}
+
+define <2 x bfloat> @min_v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) {
+; ZVFH-LABEL: min_v2bf16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: addi sp, sp, -32
+; ZVFH-NEXT: .cfi_def_cfa_offset 32
+; ZVFH-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: .cfi_offset ra, -8
+; ZVFH-NEXT: .cfi_offset s0, -16
+; ZVFH-NEXT: .cfi_offset s1, -24
+; ZVFH-NEXT: .cfi_offset s2, -32
+; ZVFH-NEXT: fmv.x.w s0, fa0
+; ZVFH-NEXT: fmv.x.w s1, fa2
+; ZVFH-NEXT: fmv.x.w a0, fa1
+; ZVFH-NEXT: fmv.x.w a1, fa3
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: fmv.w.x fa5, a1
+; ZVFH-NEXT: fmv.w.x fa4, a0
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: slli s1, s1, 16
+; ZVFH-NEXT: slli s0, s0, 16
+; ZVFH-NEXT: slli s2, a0, 16
+; ZVFH-NEXT: fmv.w.x fa5, s1
+; ZVFH-NEXT: fmv.w.x fa4, s0
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: slli a0, a0, 48
+; ZVFH-NEXT: srli a0, a0, 48
+; ZVFH-NEXT: or a0, a0, s2
+; ZVFH-NEXT: lui a1, 1048560
+; ZVFH-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; ZVFH-NEXT: vmv.s.x v8, a0
+; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFH-NEXT: vmv.x.s a0, v8
+; ZVFH-NEXT: vslidedown.vi v8, v8, 1
+; ZVFH-NEXT: or a0, a0, a1
+; ZVFH-NEXT: fmv.w.x fa0, a0
+; ZVFH-NEXT: vmv.x.s a0, v8
+; ZVFH-NEXT: or a0, a0, a1
+; ZVFH-NEXT: fmv.w.x fa1, a0
+; ZVFH-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: .cfi_restore ra
+; ZVFH-NEXT: .cfi_restore s0
+; ZVFH-NEXT: .cfi_restore s1
+; ZVFH-NEXT: .cfi_restore s2
+; ZVFH-NEXT: addi sp, sp, 32
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: min_v2bf16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: addi sp, sp, -32
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 32
+; ZVFHMIN-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: .cfi_offset ra, -8
+; ZVFHMIN-NEXT: .cfi_offset s0, -16
+; ZVFHMIN-NEXT: .cfi_offset s1, -24
+; ZVFHMIN-NEXT: .cfi_offset s2, -32
+; ZVFHMIN-NEXT: fmv.x.w s0, fa0
+; ZVFHMIN-NEXT: fmv.x.w s1, fa2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa1
+; ZVFHMIN-NEXT: fmv.x.w a1, fa3
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: fmv.w.x fa4, a0
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: slli s1, s1, 16
+; ZVFHMIN-NEXT: slli s0, s0, 16
+; ZVFHMIN-NEXT: slli s2, a0, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s1
+; ZVFHMIN-NEXT: fmv.w.x fa4, s0
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: slli a0, a0, 48
+; ZVFHMIN-NEXT: srli a0, a0, 48
+; ZVFHMIN-NEXT: or a0, a0, s2
+; ZVFHMIN-NEXT: lui a1, 1048560
+; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.s.x v8, a0
+; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.x.s a0, v8
+; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; ZVFHMIN-NEXT: or a0, a0, a1
+; ZVFHMIN-NEXT: fmv.w.x fa0, a0
+; ZVFHMIN-NEXT: vmv.x.s a0, v8
+; ZVFHMIN-NEXT: or a0, a0, a1
+; ZVFHMIN-NEXT: fmv.w.x fa1, a0
+; ZVFHMIN-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: .cfi_restore ra
+; ZVFHMIN-NEXT: .cfi_restore s0
+; ZVFHMIN-NEXT: .cfi_restore s1
+; ZVFHMIN-NEXT: .cfi_restore s2
+; ZVFHMIN-NEXT: addi sp, sp, 32
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: min_v2bf16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+entry:
+ %c = call <2 x bfloat> @llvm.minimumnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+ ret <2 x bfloat> %c
+}
+
+define <4 x bfloat> @min_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) {
+; ZVFH-LABEL: min_v4bf16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: addi sp, sp, -80
+; ZVFH-NEXT: .cfi_def_cfa_offset 80
+; ZVFH-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s2, 48(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s3, 40(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s4, 32(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s5, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s6, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s7, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: .cfi_offset ra, -8
+; ZVFH-NEXT: .cfi_offset s0, -16
+; ZVFH-NEXT: .cfi_offset s1, -24
+; ZVFH-NEXT: .cfi_offset s2, -32
+; ZVFH-NEXT: .cfi_offset s3, -40
+; ZVFH-NEXT: .cfi_offset s4, -48
+; ZVFH-NEXT: .cfi_offset s5, -56
+; ZVFH-NEXT: .cfi_offset s6, -64
+; ZVFH-NEXT: .cfi_offset s7, -72
+; ZVFH-NEXT: mv s0, a0
+; ZVFH-NEXT: fmv.x.w s1, fa3
+; ZVFH-NEXT: fmv.x.w s2, fa7
+; ZVFH-NEXT: fmv.x.w s3, fa2
+; ZVFH-NEXT: fmv.x.w s4, fa6
+; ZVFH-NEXT: fmv.x.w s5, fa1
+; ZVFH-NEXT: fmv.x.w s6, fa5
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: fmv.x.w a1, fa4
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: fmv.w.x fa5, a1
+; ZVFH-NEXT: fmv.w.x fa4, a0
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s7, fa0
+; ZVFH-NEXT: slli s6, s6, 16
+; ZVFH-NEXT: slli s5, s5, 16
+; ZVFH-NEXT: fmv.w.x fa5, s6
+; ZVFH-NEXT: fmv.w.x fa4, s5
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s5, fa0
+; ZVFH-NEXT: slli s4, s4, 16
+; ZVFH-NEXT: slli s3, s3, 16
+; ZVFH-NEXT: fmv.w.x fa5, s4
+; ZVFH-NEXT: fmv.w.x fa4, s3
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s3, fa0
+; ZVFH-NEXT: slli s2, s2, 16
+; ZVFH-NEXT: slli s1, s1, 16
+; ZVFH-NEXT: fmv.w.x fa5, s2
+; ZVFH-NEXT: fmv.w.x fa4, s1
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: sh s7, 0(s0)
+; ZVFH-NEXT: sh s5, 2(s0)
+; ZVFH-NEXT: sh s3, 4(s0)
+; ZVFH-NEXT: sh a0, 6(s0)
+; ZVFH-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s5, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s6, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s7, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: .cfi_restore ra
+; ZVFH-NEXT: .cfi_restore s0
+; ZVFH-NEXT: .cfi_restore s1
+; ZVFH-NEXT: .cfi_restore s2
+; ZVFH-NEXT: .cfi_restore s3
+; ZVFH-NEXT: .cfi_restore s4
+; ZVFH-NEXT: .cfi_restore s5
+; ZVFH-NEXT: .cfi_restore s6
+; ZVFH-NEXT: .cfi_restore s7
+; ZVFH-NEXT: addi sp, sp, 80
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: min_v4bf16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: addi sp, sp, -80
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 80
+; ZVFHMIN-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s2, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s3, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s4, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s5, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s6, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s7, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: .cfi_offset ra, -8
+; ZVFHMIN-NEXT: .cfi_offset s0, -16
+; ZVFHMIN-NEXT: .cfi_offset s1, -24
+; ZVFHMIN-NEXT: .cfi_offset s2, -32
+; ZVFHMIN-NEXT: .cfi_offset s3, -40
+; ZVFHMIN-NEXT: .cfi_offset s4, -48
+; ZVFHMIN-NEXT: .cfi_offset s5, -56
+; ZVFHMIN-NEXT: .cfi_offset s6, -64
+; ZVFHMIN-NEXT: .cfi_offset s7, -72
+; ZVFHMIN-NEXT: mv s0, a0
+; ZVFHMIN-NEXT: fmv.x.w s1, fa3
+; ZVFHMIN-NEXT: fmv.x.w s2, fa7
+; ZVFHMIN-NEXT: fmv.x.w s3, fa2
+; ZVFHMIN-NEXT: fmv.x.w s4, fa6
+; ZVFHMIN-NEXT: fmv.x.w s5, fa1
+; ZVFHMIN-NEXT: fmv.x.w s6, fa5
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: fmv.x.w a1, fa4
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: fmv.w.x fa4, a0
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s7, fa0
+; ZVFHMIN-NEXT: slli s6, s6, 16
+; ZVFHMIN-NEXT: slli s5, s5, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s6
+; ZVFHMIN-NEXT: fmv.w.x fa4, s5
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s5, fa0
+; ZVFHMIN-NEXT: slli s4, s4, 16
+; ZVFHMIN-NEXT: slli s3, s3, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s4
+; ZVFHMIN-NEXT: fmv.w.x fa4, s3
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s3, fa0
+; ZVFHMIN-NEXT: slli s2, s2, 16
+; ZVFHMIN-NEXT: slli s1, s1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s2
+; ZVFHMIN-NEXT: fmv.w.x fa4, s1
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: sh s7, 0(s0)
+; ZVFHMIN-NEXT: sh s5, 2(s0)
+; ZVFHMIN-NEXT: sh s3, 4(s0)
+; ZVFHMIN-NEXT: sh a0, 6(s0)
+; ZVFHMIN-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s5, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s6, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s7, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: .cfi_restore ra
+; ZVFHMIN-NEXT: .cfi_restore s0
+; ZVFHMIN-NEXT: .cfi_restore s1
+; ZVFHMIN-NEXT: .cfi_restore s2
+; ZVFHMIN-NEXT: .cfi_restore s3
+; ZVFHMIN-NEXT: .cfi_restore s4
+; ZVFHMIN-NEXT: .cfi_restore s5
+; ZVFHMIN-NEXT: .cfi_restore s6
+; ZVFHMIN-NEXT: .cfi_restore s7
+; ZVFHMIN-NEXT: addi sp, sp, 80
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: min_v4bf16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+entry:
+ %c = call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
+ ret <4 x bfloat> %c
+}
+
+define <8 x bfloat> @min_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) {
+; ZVFH-LABEL: min_v8bf16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: addi sp, sp, -128
+; ZVFH-NEXT: .cfi_def_cfa_offset 128
+; ZVFH-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s1, 104(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s2, 96(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s3, 88(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s4, 80(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s5, 72(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s6, 64(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s7, 56(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s8, 48(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s9, 40(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s10, 32(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s11, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: .cfi_offset ra, -8
+; ZVFH-NEXT: .cfi_offset s0, -16
+; ZVFH-NEXT: .cfi_offset s1, -24
+; ZVFH-NEXT: .cfi_offset s2, -32
+; ZVFH-NEXT: .cfi_offset s3, -40
+; ZVFH-NEXT: .cfi_offset s4, -48
+; ZVFH-NEXT: .cfi_offset s5, -56
+; ZVFH-NEXT: .cfi_offset s6, -64
+; ZVFH-NEXT: .cfi_offset s7, -72
+; ZVFH-NEXT: .cfi_offset s8, -80
+; ZVFH-NEXT: .cfi_offset s9, -88
+; ZVFH-NEXT: .cfi_offset s10, -96
+; ZVFH-NEXT: .cfi_offset s11, -104
+; ZVFH-NEXT: sd a7, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: mv s2, a6
+; ZVFH-NEXT: mv s3, a5
+; ZVFH-NEXT: mv s4, a4
+; ZVFH-NEXT: mv s5, a3
+; ZVFH-NEXT: mv s6, a2
+; ZVFH-NEXT: mv s0, a0
+; ZVFH-NEXT: fmv.x.w a0, fa7
+; ZVFH-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: fmv.x.w s10, fa6
+; ZVFH-NEXT: fmv.x.w s11, fa5
+; ZVFH-NEXT: fmv.x.w s9, fa4
+; ZVFH-NEXT: fmv.x.w s1, fa3
+; ZVFH-NEXT: fmv.x.w s7, fa2
+; ZVFH-NEXT: fmv.x.w s8, fa1
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a1
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: fmv.w.x fa4, a0
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: sd a0, 0(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: slli s6, s6, 16
+; ZVFH-NEXT: slli s8, s8, 16
+; ZVFH-NEXT: fmv.w.x fa5, s6
+; ZVFH-NEXT: fmv.w.x fa4, s8
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s6, fa0
+; ZVFH-NEXT: slli s5, s5, 16
+; ZVFH-NEXT: slli s7, s7, 16
+; ZVFH-NEXT: fmv.w.x fa5, s5
+; ZVFH-NEXT: fmv.w.x fa4, s7
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s5, fa0
+; ZVFH-NEXT: slli s4, s4, 16
+; ZVFH-NEXT: slli s1, s1, 16
+; ZVFH-NEXT: fmv.w.x fa5, s4
+; ZVFH-NEXT: fmv.w.x fa4, s1
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s4, fa0
+; ZVFH-NEXT: slli s3, s3, 16
+; ZVFH-NEXT: slli s9, s9, 16
+; ZVFH-NEXT: fmv.w.x fa5, s3
+; ZVFH-NEXT: fmv.w.x fa4, s9
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s1, fa0
+; ZVFH-NEXT: slli s2, s2, 16
+; ZVFH-NEXT: slli s11, s11, 16
+; ZVFH-NEXT: fmv.w.x fa5, s2
+; ZVFH-NEXT: fmv.w.x fa4, s11
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s2, fa0
+; ZVFH-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: slli s10, s10, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, s10
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s3, fa0
+; ZVFH-NEXT: lhu a0, 128(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: sh s1, 8(s0)
+; ZVFH-NEXT: sh s2, 10(s0)
+; ZVFH-NEXT: sh s3, 12(s0)
+; ZVFH-NEXT: sh a0, 14(s0)
+; ZVFH-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: sh a0, 0(s0)
+; ZVFH-NEXT: sh s6, 2(s0)
+; ZVFH-NEXT: sh s5, 4(s0)
+; ZVFH-NEXT: sh s4, 6(s0)
+; ZVFH-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s2, 96(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s3, 88(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s4, 80(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s5, 72(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s6, 64(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s7, 56(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s8, 48(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s9, 40(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s10, 32(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s11, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: .cfi_restore ra
+; ZVFH-NEXT: .cfi_restore s0
+; ZVFH-NEXT: .cfi_restore s1
+; ZVFH-NEXT: .cfi_restore s2
+; ZVFH-NEXT: .cfi_restore s3
+; ZVFH-NEXT: .cfi_restore s4
+; ZVFH-NEXT: .cfi_restore s5
+; ZVFH-NEXT: .cfi_restore s6
+; ZVFH-NEXT: .cfi_restore s7
+; ZVFH-NEXT: .cfi_restore s8
+; ZVFH-NEXT: .cfi_restore s9
+; ZVFH-NEXT: .cfi_restore s10
+; ZVFH-NEXT: .cfi_restore s11
+; ZVFH-NEXT: addi sp, sp, 128
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: min_v8bf16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: addi sp, sp, -128
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 128
+; ZVFHMIN-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s1, 104(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s2, 96(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s3, 88(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s4, 80(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s5, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s6, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s7, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s8, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s9, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s10, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s11, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: .cfi_offset ra, -8
+; ZVFHMIN-NEXT: .cfi_offset s0, -16
+; ZVFHMIN-NEXT: .cfi_offset s1, -24
+; ZVFHMIN-NEXT: .cfi_offset s2, -32
+; ZVFHMIN-NEXT: .cfi_offset s3, -40
+; ZVFHMIN-NEXT: .cfi_offset s4, -48
+; ZVFHMIN-NEXT: .cfi_offset s5, -56
+; ZVFHMIN-NEXT: .cfi_offset s6, -64
+; ZVFHMIN-NEXT: .cfi_offset s7, -72
+; ZVFHMIN-NEXT: .cfi_offset s8, -80
+; ZVFHMIN-NEXT: .cfi_offset s9, -88
+; ZVFHMIN-NEXT: .cfi_offset s10, -96
+; ZVFHMIN-NEXT: .cfi_offset s11, -104
+; ZVFHMIN-NEXT: sd a7, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: mv s2, a6
+; ZVFHMIN-NEXT: mv s3, a5
+; ZVFHMIN-NEXT: mv s4, a4
+; ZVFHMIN-NEXT: mv s5, a3
+; ZVFHMIN-NEXT: mv s6, a2
+; ZVFHMIN-NEXT: mv s0, a0
+; ZVFHMIN-NEXT: fmv.x.w a0, fa7
+; ZVFHMIN-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: fmv.x.w s10, fa6
+; ZVFHMIN-NEXT: fmv.x.w s11, fa5
+; ZVFHMIN-NEXT: fmv.x.w s9, fa4
+; ZVFHMIN-NEXT: fmv.x.w s1, fa3
+; ZVFHMIN-NEXT: fmv.x.w s7, fa2
+; ZVFHMIN-NEXT: fmv.x.w s8, fa1
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: fmv.w.x fa4, a0
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: sd a0, 0(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: slli s6, s6, 16
+; ZVFHMIN-NEXT: slli s8, s8, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s6
+; ZVFHMIN-NEXT: fmv.w.x fa4, s8
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s6, fa0
+; ZVFHMIN-NEXT: slli s5, s5, 16
+; ZVFHMIN-NEXT: slli s7, s7, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s5
+; ZVFHMIN-NEXT: fmv.w.x fa4, s7
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s5, fa0
+; ZVFHMIN-NEXT: slli s4, s4, 16
+; ZVFHMIN-NEXT: slli s1, s1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s4
+; ZVFHMIN-NEXT: fmv.w.x fa4, s1
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s4, fa0
+; ZVFHMIN-NEXT: slli s3, s3, 16
+; ZVFHMIN-NEXT: slli s9, s9, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s3
+; ZVFHMIN-NEXT: fmv.w.x fa4, s9
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s1, fa0
+; ZVFHMIN-NEXT: slli s2, s2, 16
+; ZVFHMIN-NEXT: slli s11, s11, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s2
+; ZVFHMIN-NEXT: fmv.w.x fa4, s11
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s2, fa0
+; ZVFHMIN-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: slli s10, s10, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, s10
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s3, fa0
+; ZVFHMIN-NEXT: lhu a0, 128(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: sh s1, 8(s0)
+; ZVFHMIN-NEXT: sh s2, 10(s0)
+; ZVFHMIN-NEXT: sh s3, 12(s0)
+; ZVFHMIN-NEXT: sh a0, 14(s0)
+; ZVFHMIN-NEXT: ld a0, 0(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: sh a0, 0(s0)
+; ZVFHMIN-NEXT: sh s6, 2(s0)
+; ZVFHMIN-NEXT: sh s5, 4(s0)
+; ZVFHMIN-NEXT: sh s4, 6(s0)
+; ZVFHMIN-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s2, 96(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s3, 88(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s4, 80(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s5, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s6, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s7, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s8, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s9, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s10, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s11, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: .cfi_restore ra
+; ZVFHMIN-NEXT: .cfi_restore s0
+; ZVFHMIN-NEXT: .cfi_restore s1
+; ZVFHMIN-NEXT: .cfi_restore s2
+; ZVFHMIN-NEXT: .cfi_restore s3
+; ZVFHMIN-NEXT: .cfi_restore s4
+; ZVFHMIN-NEXT: .cfi_restore s5
+; ZVFHMIN-NEXT: .cfi_restore s6
+; ZVFHMIN-NEXT: .cfi_restore s7
+; ZVFHMIN-NEXT: .cfi_restore s8
+; ZVFHMIN-NEXT: .cfi_restore s9
+; ZVFHMIN-NEXT: .cfi_restore s10
+; ZVFHMIN-NEXT: .cfi_restore s11
+; ZVFHMIN-NEXT: addi sp, sp, 128
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: min_v8bf16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v10, v12, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
+entry:
+ %c = call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+ ret <8 x bfloat> %c
+}
+
+define <9 x bfloat> @min_v9bf16(<9 x bfloat> %a, <9 x bfloat> %b) {
+; ZVFH-LABEL: min_v9bf16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: addi sp, sp, -128
+; ZVFH-NEXT: .cfi_def_cfa_offset 128
+; ZVFH-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s1, 104(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s2, 96(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s3, 88(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s4, 80(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s5, 72(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s6, 64(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s7, 56(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s8, 48(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s9, 40(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s10, 32(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s11, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: .cfi_offset ra, -8
+; ZVFH-NEXT: .cfi_offset s0, -16
+; ZVFH-NEXT: .cfi_offset s1, -24
+; ZVFH-NEXT: .cfi_offset s2, -32
+; ZVFH-NEXT: .cfi_offset s3, -40
+; ZVFH-NEXT: .cfi_offset s4, -48
+; ZVFH-NEXT: .cfi_offset s5, -56
+; ZVFH-NEXT: .cfi_offset s6, -64
+; ZVFH-NEXT: .cfi_offset s7, -72
+; ZVFH-NEXT: .cfi_offset s8, -80
+; ZVFH-NEXT: .cfi_offset s9, -88
+; ZVFH-NEXT: .cfi_offset s10, -96
+; ZVFH-NEXT: .cfi_offset s11, -104
+; ZVFH-NEXT: sd a7, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: mv s3, a6
+; ZVFH-NEXT: mv s5, a4
+; ZVFH-NEXT: mv s4, a3
+; ZVFH-NEXT: mv s6, a2
+; ZVFH-NEXT: sd a1, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: mv s0, a0
+; ZVFH-NEXT: fmv.x.w a0, fa5
+; ZVFH-NEXT: sd a0, 0(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: fmv.x.w s9, fa4
+; ZVFH-NEXT: fmv.x.w s10, fa6
+; ZVFH-NEXT: fmv.x.w s7, fa7
+; ZVFH-NEXT: fmv.x.w s11, fa1
+; ZVFH-NEXT: fmv.x.w s1, fa0
+; ZVFH-NEXT: fmv.x.w s2, fa2
+; ZVFH-NEXT: fmv.x.w a0, fa3
+; ZVFH-NEXT: slli a5, a5, 16
+; ZVFH-NEXT: fmv.w.x fa5, a5
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: fmv.w.x fa4, a0
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: slli s5, s5, 16
+; ZVFH-NEXT: slli s2, s2, 16
+; ZVFH-NEXT: slli s8, a0, 48
+; ZVFH-NEXT: fmv.w.x fa5, s5
+; ZVFH-NEXT: fmv.w.x fa4, s2
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: slli s6, s6, 16
+; ZVFH-NEXT: slli s1, s1, 16
+; ZVFH-NEXT: slli a0, a0, 48
+; ZVFH-NEXT: fmv.w.x fa5, s6
+; ZVFH-NEXT: fmv.w.x fa4, s1
+; ZVFH-NEXT: srli a0, a0, 16
+; ZVFH-NEXT: or s1, s8, a0
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: lui s5, 16
+; ZVFH-NEXT: slli s4, s4, 16
+; ZVFH-NEXT: slli s11, s11, 16
+; ZVFH-NEXT: addi s5, s5, -1
+; ZVFH-NEXT: fmv.w.x fa5, s4
+; ZVFH-NEXT: fmv.w.x fa4, s11
+; ZVFH-NEXT: and s2, a0, s5
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: lhu a1, 136(sp)
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: slli s7, s7, 16
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: fmv.w.x fa5, a1
+; ZVFH-NEXT: fmv.w.x fa4, s7
+; ZVFH-NEXT: or a0, s2, a0
+; ZVFH-NEXT: slli a0, a0, 32
+; ZVFH-NEXT: srli a0, a0, 32
+; ZVFH-NEXT: or s4, a0, s1
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: lhu a1, 128(sp)
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: slli s10, s10, 16
+; ZVFH-NEXT: slli s1, a0, 48
+; ZVFH-NEXT: fmv.w.x fa5, a1
+; ZVFH-NEXT: fmv.w.x fa4, s10
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: slli s3, s3, 16
+; ZVFH-NEXT: slli s9, s9, 16
+; ZVFH-NEXT: slli a0, a0, 48
+; ZVFH-NEXT: fmv.w.x fa5, s3
+; ZVFH-NEXT: fmv.w.x fa4, s9
+; ZVFH-NEXT: srli s2, a0, 16
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: ld a2, 0(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a2, a2, 16
+; ZVFH-NEXT: and s3, a0, s5
+; ZVFH-NEXT: fmv.w.x fa5, a1
+; ZVFH-NEXT: fmv.w.x fa4, a2
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: lhu a1, 144(sp)
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: ld a2, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a2, a2, 16
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: fmv.w.x fa5, a1
+; ZVFH-NEXT: fmv.w.x fa4, a2
+; ZVFH-NEXT: or a0, s3, a0
+; ZVFH-NEXT: slli a0, a0, 32
+; ZVFH-NEXT: srli a0, a0, 32
+; ZVFH-NEXT: or a0, a0, s1
+; ZVFH-NEXT: or s1, a0, s2
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVFH-NEXT: vmv.v.x v8, s1
+; ZVFH-NEXT: sh a0, 16(s0)
+; ZVFH-NEXT: vsetvli zero, zero, e64, m2, tu, ma
+; ZVFH-NEXT: vmv.s.x v8, s4
+; ZVFH-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; ZVFH-NEXT: vse64.v v8, (s0)
+; ZVFH-NEXT: vslidedown.vi v8, v8, 1
+; ZVFH-NEXT: addi s0, s0, 8
+; ZVFH-NEXT: vse64.v v8, (s0)
+; ZVFH-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s2, 96(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s3, 88(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s4, 80(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s5, 72(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s6, 64(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s7, 56(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s8, 48(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s9, 40(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s10, 32(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s11, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: .cfi_restore ra
+; ZVFH-NEXT: .cfi_restore s0
+; ZVFH-NEXT: .cfi_restore s1
+; ZVFH-NEXT: .cfi_restore s2
+; ZVFH-NEXT: .cfi_restore s3
+; ZVFH-NEXT: .cfi_restore s4
+; ZVFH-NEXT: .cfi_restore s5
+; ZVFH-NEXT: .cfi_restore s6
+; ZVFH-NEXT: .cfi_restore s7
+; ZVFH-NEXT: .cfi_restore s8
+; ZVFH-NEXT: .cfi_restore s9
+; ZVFH-NEXT: .cfi_restore s10
+; ZVFH-NEXT: .cfi_restore s11
+; ZVFH-NEXT: addi sp, sp, 128
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: min_v9bf16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: addi sp, sp, -128
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 128
+; ZVFHMIN-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s1, 104(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s2, 96(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s3, 88(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s4, 80(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s5, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s6, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s7, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s8, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s9, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s10, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s11, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: .cfi_offset ra, -8
+; ZVFHMIN-NEXT: .cfi_offset s0, -16
+; ZVFHMIN-NEXT: .cfi_offset s1, -24
+; ZVFHMIN-NEXT: .cfi_offset s2, -32
+; ZVFHMIN-NEXT: .cfi_offset s3, -40
+; ZVFHMIN-NEXT: .cfi_offset s4, -48
+; ZVFHMIN-NEXT: .cfi_offset s5, -56
+; ZVFHMIN-NEXT: .cfi_offset s6, -64
+; ZVFHMIN-NEXT: .cfi_offset s7, -72
+; ZVFHMIN-NEXT: .cfi_offset s8, -80
+; ZVFHMIN-NEXT: .cfi_offset s9, -88
+; ZVFHMIN-NEXT: .cfi_offset s10, -96
+; ZVFHMIN-NEXT: .cfi_offset s11, -104
+; ZVFHMIN-NEXT: sd a7, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: mv s3, a6
+; ZVFHMIN-NEXT: mv s5, a4
+; ZVFHMIN-NEXT: mv s4, a3
+; ZVFHMIN-NEXT: mv s6, a2
+; ZVFHMIN-NEXT: sd a1, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: mv s0, a0
+; ZVFHMIN-NEXT: fmv.x.w a0, fa5
+; ZVFHMIN-NEXT: sd a0, 0(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: fmv.x.w s9, fa4
+; ZVFHMIN-NEXT: fmv.x.w s10, fa6
+; ZVFHMIN-NEXT: fmv.x.w s7, fa7
+; ZVFHMIN-NEXT: fmv.x.w s11, fa1
+; ZVFHMIN-NEXT: fmv.x.w s1, fa0
+; ZVFHMIN-NEXT: fmv.x.w s2, fa2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa3
+; ZVFHMIN-NEXT: slli a5, a5, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a5
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: fmv.w.x fa4, a0
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: slli s5, s5, 16
+; ZVFHMIN-NEXT: slli s2, s2, 16
+; ZVFHMIN-NEXT: slli s8, a0, 48
+; ZVFHMIN-NEXT: fmv.w.x fa5, s5
+; ZVFHMIN-NEXT: fmv.w.x fa4, s2
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: slli s6, s6, 16
+; ZVFHMIN-NEXT: slli s1, s1, 16
+; ZVFHMIN-NEXT: slli a0, a0, 48
+; ZVFHMIN-NEXT: fmv.w.x fa5, s6
+; ZVFHMIN-NEXT: fmv.w.x fa4, s1
+; ZVFHMIN-NEXT: srli a0, a0, 16
+; ZVFHMIN-NEXT: or s1, s8, a0
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: lui s5, 16
+; ZVFHMIN-NEXT: slli s4, s4, 16
+; ZVFHMIN-NEXT: slli s11, s11, 16
+; ZVFHMIN-NEXT: addi s5, s5, -1
+; ZVFHMIN-NEXT: fmv.w.x fa5, s4
+; ZVFHMIN-NEXT: fmv.w.x fa4, s11
+; ZVFHMIN-NEXT: and s2, a0, s5
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: lhu a1, 136(sp)
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: slli s7, s7, 16
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: fmv.w.x fa4, s7
+; ZVFHMIN-NEXT: or a0, s2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 32
+; ZVFHMIN-NEXT: srli a0, a0, 32
+; ZVFHMIN-NEXT: or s4, a0, s1
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: lhu a1, 128(sp)
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: slli s10, s10, 16
+; ZVFHMIN-NEXT: slli s1, a0, 48
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: fmv.w.x fa4, s10
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: slli s3, s3, 16
+; ZVFHMIN-NEXT: slli s9, s9, 16
+; ZVFHMIN-NEXT: slli a0, a0, 48
+; ZVFHMIN-NEXT: fmv.w.x fa5, s3
+; ZVFHMIN-NEXT: fmv.w.x fa4, s9
+; ZVFHMIN-NEXT: srli s2, a0, 16
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: ld a2, 0(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a2, a2, 16
+; ZVFHMIN-NEXT: and s3, a0, s5
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: fmv.w.x fa4, a2
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: lhu a1, 144(sp)
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: ld a2, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a2, a2, 16
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: fmv.w.x fa4, a2
+; ZVFHMIN-NEXT: or a0, s3, a0
+; ZVFHMIN-NEXT: slli a0, a0, 32
+; ZVFHMIN-NEXT: srli a0, a0, 32
+; ZVFHMIN-NEXT: or a0, a0, s1
+; ZVFHMIN-NEXT: or s1, a0, s2
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v8, s1
+; ZVFHMIN-NEXT: sh a0, 16(s0)
+; ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, tu, ma
+; ZVFHMIN-NEXT: vmv.s.x v8, s4
+; ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; ZVFHMIN-NEXT: vse64.v v8, (s0)
+; ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; ZVFHMIN-NEXT: addi s0, s0, 8
+; ZVFHMIN-NEXT: vse64.v v8, (s0)
+; ZVFHMIN-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s2, 96(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s3, 88(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s4, 80(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s5, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s6, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s7, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s8, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s9, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s10, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s11, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: .cfi_restore ra
+; ZVFHMIN-NEXT: .cfi_restore s0
+; ZVFHMIN-NEXT: .cfi_restore s1
+; ZVFHMIN-NEXT: .cfi_restore s2
+; ZVFHMIN-NEXT: .cfi_restore s3
+; ZVFHMIN-NEXT: .cfi_restore s4
+; ZVFHMIN-NEXT: .cfi_restore s5
+; ZVFHMIN-NEXT: .cfi_restore s6
+; ZVFHMIN-NEXT: .cfi_restore s7
+; ZVFHMIN-NEXT: .cfi_restore s8
+; ZVFHMIN-NEXT: .cfi_restore s9
+; ZVFHMIN-NEXT: .cfi_restore s10
+; ZVFHMIN-NEXT: .cfi_restore s11
+; ZVFHMIN-NEXT: addi sp, sp, 128
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: min_v9bf16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
+entry:
+ %c = call <9 x bfloat> @llvm.minimumnum.v9bf16(<9 x bfloat> %a, <9 x bfloat> %b)
+ ret <9 x bfloat> %c
+}
+
+define <16 x bfloat> @min_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
+; ZVFH-LABEL: min_v16bf16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: addi sp, sp, -192
+; ZVFH-NEXT: .cfi_def_cfa_offset 192
+; ZVFH-NEXT: sd ra, 184(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s1, 168(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s2, 160(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s3, 152(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s4, 144(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s5, 136(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s6, 128(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s7, 120(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s8, 112(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s9, 104(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s10, 96(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd s11, 88(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: .cfi_offset ra, -8
+; ZVFH-NEXT: .cfi_offset s0, -16
+; ZVFH-NEXT: .cfi_offset s1, -24
+; ZVFH-NEXT: .cfi_offset s2, -32
+; ZVFH-NEXT: .cfi_offset s3, -40
+; ZVFH-NEXT: .cfi_offset s4, -48
+; ZVFH-NEXT: .cfi_offset s5, -56
+; ZVFH-NEXT: .cfi_offset s6, -64
+; ZVFH-NEXT: .cfi_offset s7, -72
+; ZVFH-NEXT: .cfi_offset s8, -80
+; ZVFH-NEXT: .cfi_offset s9, -88
+; ZVFH-NEXT: .cfi_offset s10, -96
+; ZVFH-NEXT: .cfi_offset s11, -104
+; ZVFH-NEXT: sd a7, 80(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd a6, 64(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd a5, 48(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd a4, 32(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd a3, 16(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: sd a2, 8(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: mv s7, a1
+; ZVFH-NEXT: mv s0, a0
+; ZVFH-NEXT: fmv.x.w s11, fa7
+; ZVFH-NEXT: fmv.x.w s2, fa6
+; ZVFH-NEXT: fmv.x.w s8, fa5
+; ZVFH-NEXT: fmv.x.w s1, fa4
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: lhu s9, 224(sp)
+; ZVFH-NEXT: lhu a1, 200(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: lhu s4, 216(sp)
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: lhu s3, 208(sp)
+; ZVFH-NEXT: fmin.s fa0, fa5, fa4
+; ZVFH-NEXT: fmv.x.w s5, fa3
+; ZVFH-NEXT: fmv.x.w s6, fa2
+; ZVFH-NEXT: fmv.x.w s10, fa1
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: sd a0, 72(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: slli s3, s3, 16
+; ZVFH-NEXT: slli s10, s10, 16
+; ZVFH-NEXT: fmv.w.x fa5, s3
+; ZVFH-NEXT: fmv.w.x fa4, s10
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: sd a0, 56(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: slli s4, s4, 16
+; ZVFH-NEXT: slli s6, s6, 16
+; ZVFH-NEXT: fmv.w.x fa5, s4
+; ZVFH-NEXT: fmv.w.x fa4, s6
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: slli s9, s9, 16
+; ZVFH-NEXT: slli s5, s5, 16
+; ZVFH-NEXT: fmv.w.x fa5, s9
+; ZVFH-NEXT: fmv.w.x fa4, s5
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
+; ZVFH-NEXT: lhu a0, 232(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: slli s1, s1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, s1
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s1, fa0
+; ZVFH-NEXT: lhu a0, 240(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: slli s8, s8, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, s8
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s8, fa0
+; ZVFH-NEXT: lhu a0, 248(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: slli s2, s2, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, s2
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s2, fa0
+; ZVFH-NEXT: lhu a0, 256(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: slli s11, s11, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, s11
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s11, fa0
+; ZVFH-NEXT: lhu a0, 264(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: slli s7, s7, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, s7
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s7, fa0
+; ZVFH-NEXT: lhu a0, 272(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s6, fa0
+; ZVFH-NEXT: lhu a0, 280(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: ld a1, 16(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s3, fa0
+; ZVFH-NEXT: lhu a0, 288(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s4, fa0
+; ZVFH-NEXT: lhu a0, 296(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: ld a1, 48(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s5, fa0
+; ZVFH-NEXT: lhu a0, 304(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: ld a1, 64(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s10, fa0
+; ZVFH-NEXT: lhu a0, 312(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: ld a1, 80(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w s9, fa0
+; ZVFH-NEXT: lhu a0, 320(sp)
+; ZVFH-NEXT: slli a0, a0, 16
+; ZVFH-NEXT: lhu a1, 192(sp)
+; ZVFH-NEXT: slli a1, a1, 16
+; ZVFH-NEXT: fmv.w.x fa5, a0
+; ZVFH-NEXT: fmv.w.x fa4, a1
+; ZVFH-NEXT: fmin.s fa0, fa4, fa5
+; ZVFH-NEXT: call __truncsfbf2
+; ZVFH-NEXT: fmv.x.w a0, fa0
+; ZVFH-NEXT: sh s7, 16(s0)
+; ZVFH-NEXT: sh s6, 18(s0)
+; ZVFH-NEXT: sh s3, 20(s0)
+; ZVFH-NEXT: sh s4, 22(s0)
+; ZVFH-NEXT: sh s1, 8(s0)
+; ZVFH-NEXT: sh s8, 10(s0)
+; ZVFH-NEXT: sh s2, 12(s0)
+; ZVFH-NEXT: sh s11, 14(s0)
+; ZVFH-NEXT: sh s5, 24(s0)
+; ZVFH-NEXT: sh s10, 26(s0)
+; ZVFH-NEXT: sh s9, 28(s0)
+; ZVFH-NEXT: sh a0, 30(s0)
+; ZVFH-NEXT: ld a0, 72(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: sh a0, 0(s0)
+; ZVFH-NEXT: ld a0, 56(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: sh a0, 2(s0)
+; ZVFH-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: sh a0, 4(s0)
+; ZVFH-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: sh a0, 6(s0)
+; ZVFH-NEXT: ld ra, 184(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s1, 168(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s2, 160(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s3, 152(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s4, 144(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s5, 136(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s6, 128(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s7, 120(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s8, 112(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s9, 104(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s10, 96(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: ld s11, 88(sp) # 8-byte Folded Reload
+; ZVFH-NEXT: .cfi_restore ra
+; ZVFH-NEXT: .cfi_restore s0
+; ZVFH-NEXT: .cfi_restore s1
+; ZVFH-NEXT: .cfi_restore s2
+; ZVFH-NEXT: .cfi_restore s3
+; ZVFH-NEXT: .cfi_restore s4
+; ZVFH-NEXT: .cfi_restore s5
+; ZVFH-NEXT: .cfi_restore s6
+; ZVFH-NEXT: .cfi_restore s7
+; ZVFH-NEXT: .cfi_restore s8
+; ZVFH-NEXT: .cfi_restore s9
+; ZVFH-NEXT: .cfi_restore s10
+; ZVFH-NEXT: .cfi_restore s11
+; ZVFH-NEXT: addi sp, sp, 192
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: min_v16bf16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: addi sp, sp, -192
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 192
+; ZVFHMIN-NEXT: sd ra, 184(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s0, 176(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s1, 168(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s2, 160(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s3, 152(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s4, 144(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s5, 136(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s6, 128(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s7, 120(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s8, 112(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s9, 104(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s10, 96(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd s11, 88(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: .cfi_offset ra, -8
+; ZVFHMIN-NEXT: .cfi_offset s0, -16
+; ZVFHMIN-NEXT: .cfi_offset s1, -24
+; ZVFHMIN-NEXT: .cfi_offset s2, -32
+; ZVFHMIN-NEXT: .cfi_offset s3, -40
+; ZVFHMIN-NEXT: .cfi_offset s4, -48
+; ZVFHMIN-NEXT: .cfi_offset s5, -56
+; ZVFHMIN-NEXT: .cfi_offset s6, -64
+; ZVFHMIN-NEXT: .cfi_offset s7, -72
+; ZVFHMIN-NEXT: .cfi_offset s8, -80
+; ZVFHMIN-NEXT: .cfi_offset s9, -88
+; ZVFHMIN-NEXT: .cfi_offset s10, -96
+; ZVFHMIN-NEXT: .cfi_offset s11, -104
+; ZVFHMIN-NEXT: sd a7, 80(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd a6, 64(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd a5, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd a4, 32(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd a3, 16(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: sd a2, 8(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: mv s7, a1
+; ZVFHMIN-NEXT: mv s0, a0
+; ZVFHMIN-NEXT: fmv.x.w s11, fa7
+; ZVFHMIN-NEXT: fmv.x.w s2, fa6
+; ZVFHMIN-NEXT: fmv.x.w s8, fa5
+; ZVFHMIN-NEXT: fmv.x.w s1, fa4
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: lhu s9, 224(sp)
+; ZVFHMIN-NEXT: lhu a1, 200(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: lhu s4, 216(sp)
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: lhu s3, 208(sp)
+; ZVFHMIN-NEXT: fmin.s fa0, fa5, fa4
+; ZVFHMIN-NEXT: fmv.x.w s5, fa3
+; ZVFHMIN-NEXT: fmv.x.w s6, fa2
+; ZVFHMIN-NEXT: fmv.x.w s10, fa1
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: sd a0, 72(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: slli s3, s3, 16
+; ZVFHMIN-NEXT: slli s10, s10, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s3
+; ZVFHMIN-NEXT: fmv.w.x fa4, s10
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: sd a0, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: slli s4, s4, 16
+; ZVFHMIN-NEXT: slli s6, s6, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s4
+; ZVFHMIN-NEXT: fmv.w.x fa4, s6
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: slli s9, s9, 16
+; ZVFHMIN-NEXT: slli s5, s5, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, s9
+; ZVFHMIN-NEXT: fmv.w.x fa4, s5
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
+; ZVFHMIN-NEXT: lhu a0, 232(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: slli s1, s1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, s1
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s1, fa0
+; ZVFHMIN-NEXT: lhu a0, 240(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: slli s8, s8, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, s8
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s8, fa0
+; ZVFHMIN-NEXT: lhu a0, 248(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: slli s2, s2, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, s2
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s2, fa0
+; ZVFHMIN-NEXT: lhu a0, 256(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: slli s11, s11, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, s11
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s11, fa0
+; ZVFHMIN-NEXT: lhu a0, 264(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: slli s7, s7, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, s7
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s7, fa0
+; ZVFHMIN-NEXT: lhu a0, 272(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s6, fa0
+; ZVFHMIN-NEXT: lhu a0, 280(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: ld a1, 16(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s3, fa0
+; ZVFHMIN-NEXT: lhu a0, 288(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s4, fa0
+; ZVFHMIN-NEXT: lhu a0, 296(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: ld a1, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s5, fa0
+; ZVFHMIN-NEXT: lhu a0, 304(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: ld a1, 64(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s10, fa0
+; ZVFHMIN-NEXT: lhu a0, 312(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: ld a1, 80(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w s9, fa0
+; ZVFHMIN-NEXT: lhu a0, 320(sp)
+; ZVFHMIN-NEXT: slli a0, a0, 16
+; ZVFHMIN-NEXT: lhu a1, 192(sp)
+; ZVFHMIN-NEXT: slli a1, a1, 16
+; ZVFHMIN-NEXT: fmv.w.x fa5, a0
+; ZVFHMIN-NEXT: fmv.w.x fa4, a1
+; ZVFHMIN-NEXT: fmin.s fa0, fa4, fa5
+; ZVFHMIN-NEXT: call __truncsfbf2
+; ZVFHMIN-NEXT: fmv.x.w a0, fa0
+; ZVFHMIN-NEXT: sh s7, 16(s0)
+; ZVFHMIN-NEXT: sh s6, 18(s0)
+; ZVFHMIN-NEXT: sh s3, 20(s0)
+; ZVFHMIN-NEXT: sh s4, 22(s0)
+; ZVFHMIN-NEXT: sh s1, 8(s0)
+; ZVFHMIN-NEXT: sh s8, 10(s0)
+; ZVFHMIN-NEXT: sh s2, 12(s0)
+; ZVFHMIN-NEXT: sh s11, 14(s0)
+; ZVFHMIN-NEXT: sh s5, 24(s0)
+; ZVFHMIN-NEXT: sh s10, 26(s0)
+; ZVFHMIN-NEXT: sh s9, 28(s0)
+; ZVFHMIN-NEXT: sh a0, 30(s0)
+; ZVFHMIN-NEXT: ld a0, 72(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: sh a0, 0(s0)
+; ZVFHMIN-NEXT: ld a0, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: sh a0, 2(s0)
+; ZVFHMIN-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: sh a0, 4(s0)
+; ZVFHMIN-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: sh a0, 6(s0)
+; ZVFHMIN-NEXT: ld ra, 184(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s0, 176(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s1, 168(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s2, 160(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s3, 152(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s4, 144(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s5, 136(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s6, 128(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s7, 120(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s8, 112(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s9, 104(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s10, 96(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: ld s11, 88(sp) # 8-byte Folded Reload
+; ZVFHMIN-NEXT: .cfi_restore ra
+; ZVFHMIN-NEXT: .cfi_restore s0
+; ZVFHMIN-NEXT: .cfi_restore s1
+; ZVFHMIN-NEXT: .cfi_restore s2
+; ZVFHMIN-NEXT: .cfi_restore s3
+; ZVFHMIN-NEXT: .cfi_restore s4
+; ZVFHMIN-NEXT: .cfi_restore s5
+; ZVFHMIN-NEXT: .cfi_restore s6
+; ZVFHMIN-NEXT: .cfi_restore s7
+; ZVFHMIN-NEXT: .cfi_restore s8
+; ZVFHMIN-NEXT: .cfi_restore s9
+; ZVFHMIN-NEXT: .cfi_restore s10
+; ZVFHMIN-NEXT: .cfi_restore s11
+; ZVFHMIN-NEXT: addi sp, sp, 192
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: min_v16bf16:
+; ZVFBFA: # %bb.0: # %entry
+; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
+entry:
+ %c = call <16 x bfloat> @llvm.minimumnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
+ ret <16 x bfloat> %c
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
index 44362efa1fe83..179d88cecc7f8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
@@ -1,12 +1,516 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+define <2 x bfloat> @vfmax_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v2bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vfmax.vv v9, v9, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v2bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v2bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+ ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfmax_v2bf16_vf(<2 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmax_v2bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vmv.v.x v9, a0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vfmax.vv v9, v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v2bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v9, v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v2bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v9, fa0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+ %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
+ %v = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %splat)
+ ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfmax_v2bf16_fv(<2 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmax_v2bf16_fv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vmv.v.x v9, a0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vfmax.vv v9, v8, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v2bf16_fv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v2bf16_fv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v9, fa0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+ %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
+ %v = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %splat, <2 x bfloat> %a)
+ ret <2 x bfloat> %v
+}
+
+define <4 x bfloat> @vfmax_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v4bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vfmax.vv v9, v9, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v4bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v4bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
+ ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vfmax_v4bf16_vf(<4 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmax_v4bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vmv.v.x v9, a0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vfmax.vv v9, v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v4bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v9, v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v4bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v9, fa0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+ %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
+ %v = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %splat)
+ ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vfmax_v4bf16_fv(<4 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmax_v4bf16_fv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vmv.v.x v9, a0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vfmax.vv v9, v8, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v4bf16_fv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v4bf16_fv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v9, fa0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+ %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
+ %v = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> %splat, <4 x bfloat> %a)
+ ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @vfmax_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v8bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vfmax.vv v10, v12, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v8bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v10, v12, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v8bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v10, v12, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+ ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vfmax_v8bf16_vf(<8 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmax_v8bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vmv.v.x v12, a0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v12
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vfmax.vv v10, v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v8bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v8bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v12, fa0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v10, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
+ %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
+ %v = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %splat)
+ ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vfmax_v8bf16_fv(<8 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmax_v8bf16_fv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vmv.v.x v12, a0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v12
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vfmax.vv v10, v8, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v8bf16_fv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v10, v8, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v8bf16_fv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v12, fa0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v10, v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
+ %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
+ %v = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> %splat, <8 x bfloat> %a)
+ ret <8 x bfloat> %v
+}
+
+define <16 x bfloat> @vfmax_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) {
+; ZVFH-LABEL: vfmax_v16bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v10
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vfmax.vv v12, v16, v12
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v16bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v12, v16, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v16bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
+ %v = call <16 x bfloat> @llvm.maxnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
+ ret <16 x bfloat> %v
+}
+
+define <16 x bfloat> @vfmax_v16bf16_vf(<16 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmax_v16bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vmv.v.x v16, a0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v16
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vfmax.vv v12, v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v16bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v16bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v16, fa0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v12, v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
+ %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
+ %v = call <16 x bfloat> @llvm.maxnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %splat)
+ ret <16 x bfloat> %v
+}
+
+define <16 x bfloat> @vfmax_v16bf16_fv(<16 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmax_v16bf16_fv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vmv.v.x v16, a0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v16
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vfmax.vv v12, v8, v12
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_v16bf16_fv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v12, v8, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v16bf16_fv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v16, fa0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v12, v8, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
+ %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
+ %v = call <16 x bfloat> @llvm.maxnum.v16bf16(<16 x bfloat> %splat, <16 x bfloat> %a)
+ ret <16 x bfloat> %v
+}
define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
; ZVFH-LABEL: vfmax_v2f16_vv:
@@ -25,6 +529,17 @@ define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v2f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b)
ret <2 x half> %v
}
@@ -48,6 +563,19 @@ define <2 x half> @vfmax_v2f16_vf(<2 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v2f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.w a0, fa0
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <2 x half> poison, half %b, i32 0
%splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer
%v = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %splat)
@@ -73,6 +601,19 @@ define <2 x half> @vfmax_v2f16_fv(<2 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v2f16_fv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.w a0, fa0
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <2 x half> poison, half %b, i32 0
%splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer
%v = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %splat, <2 x half> %a)
@@ -96,6 +637,17 @@ define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v4f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %a, <4 x half> %b)
ret <4 x half> %v
}
@@ -119,6 +671,19 @@ define <4 x half> @vfmax_v4f16_vf(<4 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v4f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.w a0, fa0
+; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <4 x half> poison, half %b, i32 0
%splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer
%v = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %a, <4 x half> %splat)
@@ -144,6 +709,19 @@ define <4 x half> @vfmax_v4f16_fv(<4 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v4f16_fv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.w a0, fa0
+; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <4 x half> poison, half %b, i32 0
%splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer
%v = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %splat, <4 x half> %a)
@@ -167,6 +745,17 @@ define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v8f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v10, v12, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%v = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %a, <8 x half> %b)
ret <8 x half> %v
}
@@ -190,6 +779,19 @@ define <8 x half> @vfmax_v8f16_vf(<8 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v8f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.w a0, fa0
+; ZVFBFA-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v12, a0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v10, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <8 x half> poison, half %b, i32 0
%splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
%v = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %a, <8 x half> %splat)
@@ -215,6 +817,19 @@ define <8 x half> @vfmax_v8f16_fv(<8 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v8f16_fv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.w a0, fa0
+; ZVFBFA-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v12, a0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v10, v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <8 x half> poison, half %b, i32 0
%splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
%v = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %splat, <8 x half> %a)
@@ -238,6 +853,17 @@ define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v16f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%v = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %a, <16 x half> %b)
ret <16 x half> %v
}
@@ -261,6 +887,19 @@ define <16 x half> @vfmax_v16f16_vf(<16 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v16f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.w a0, fa0
+; ZVFBFA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v16, a0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v12, v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <16 x half> poison, half %b, i32 0
%splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
%v = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %a, <16 x half> %splat)
@@ -286,6 +925,19 @@ define <16 x half> @vfmax_v16f16_fv(<16 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_v16f16_fv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.w a0, fa0
+; ZVFBFA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v16, a0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v12, v8, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <16 x half> poison, half %b, i32 0
%splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
%v = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %splat, <16 x half> %a)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
index c9bb99d6cb3d6..2228292d9e66b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
@@ -1,12 +1,516 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+define <2 x bfloat> @vfmin_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) {
+; ZVFH-LABEL: vfmin_v2bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vfmin.vv v9, v9, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v2bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v2bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
+ ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfmin_v2bf16_vf(<2 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmin_v2bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vmv.v.x v9, a0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vfmin.vv v9, v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v2bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v9, v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v2bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v9, fa0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+ %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
+ %v = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %splat)
+ ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfmin_v2bf16_fv(<2 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmin_v2bf16_fv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vmv.v.x v9, a0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vfmin.vv v9, v8, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v2bf16_fv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v9, v8, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v2bf16_fv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v9, fa0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+ %head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
+ %v = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> %splat, <2 x bfloat> %a)
+ ret <2 x bfloat> %v
+}
+
+define <4 x bfloat> @vfmin_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) {
+; ZVFH-LABEL: vfmin_v4bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vfmin.vv v9, v9, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v4bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v4bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
+ ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vfmin_v4bf16_vf(<4 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmin_v4bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vmv.v.x v9, a0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vfmin.vv v9, v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v4bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v9, v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v4bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v9, fa0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+ %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
+ %v = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %splat)
+ ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vfmin_v4bf16_fv(<4 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmin_v4bf16_fv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vmv.v.x v9, a0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vfmin.vv v9, v8, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v4bf16_fv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v9, v8, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v4bf16_fv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v9, fa0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
+ %head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
+ %v = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> %splat, <4 x bfloat> %a)
+ ret <4 x bfloat> %v
+}
+
+define <8 x bfloat> @vfmin_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) {
+; ZVFH-LABEL: vfmin_v8bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vfmin.vv v10, v12, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v8bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v10, v12, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v8bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v10, v12, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+ ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vfmin_v8bf16_vf(<8 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmin_v8bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vmv.v.x v12, a0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v12
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vfmin.vv v10, v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v8bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v8bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v12, fa0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v10, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
+ %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
+ %v = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %splat)
+ ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vfmin_v8bf16_fv(<8 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmin_v8bf16_fv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vmv.v.x v12, a0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v12
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vfmin.vv v10, v8, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v8bf16_fv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v12, a0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v10, v8, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v8bf16_fv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v12, fa0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v10, v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
+ %head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
+ %v = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> %splat, <8 x bfloat> %a)
+ ret <8 x bfloat> %v
+}
+
+define <16 x bfloat> @vfmin_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) {
+; ZVFH-LABEL: vfmin_v16bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v10
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vfmin.vv v12, v16, v12
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v16bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v12, v16, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v16bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
+ %v = call <16 x bfloat> @llvm.minnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
+ ret <16 x bfloat> %v
+}
+
+define <16 x bfloat> @vfmin_v16bf16_vf(<16 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmin_v16bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vmv.v.x v16, a0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v16
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vfmin.vv v12, v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v16bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v16bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v16, fa0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v12, v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
+ %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
+ %v = call <16 x bfloat> @llvm.minnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %splat)
+ ret <16 x bfloat> %v
+}
+
+define <16 x bfloat> @vfmin_v16bf16_fv(<16 x bfloat> %a, bfloat %b) {
+; ZVFH-LABEL: vfmin_v16bf16_fv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vmv.v.x v16, a0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v16
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vfmin.vv v12, v8, v12
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_v16bf16_fv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v16, a0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v12, v8, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v16bf16_fv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v16, fa0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v12, v8, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
+ %head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
+ %splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
+ %v = call <16 x bfloat> @llvm.minnum.v16bf16(<16 x bfloat> %splat, <16 x bfloat> %a)
+ ret <16 x bfloat> %v
+}
define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) {
; ZVFH-LABEL: vfmin_v2f16_vv:
@@ -25,6 +529,17 @@ define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v2f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
ret <2 x half> %v
}
@@ -48,6 +563,19 @@ define <2 x half> @vfmin_v2f16_vf(<2 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v2f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.w a0, fa0
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <2 x half> poison, half %b, i32 0
%splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer
%v = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %splat)
@@ -73,6 +601,19 @@ define <2 x half> @vfmin_v2f16_fv(<2 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v2f16_fv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.w a0, fa0
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <2 x half> poison, half %b, i32 0
%splat = shufflevector <2 x half> %head, <2 x half> poison, <2 x i32> zeroinitializer
%v = call <2 x half> @llvm.minnum.v2f16(<2 x half> %splat, <2 x half> %a)
@@ -96,6 +637,17 @@ define <4 x half> @vfmin_v4f16_vv(<4 x half> %a, <4 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v4f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b)
ret <4 x half> %v
}
@@ -119,6 +671,19 @@ define <4 x half> @vfmin_v4f16_vf(<4 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v4f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.w a0, fa0
+; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <4 x half> poison, half %b, i32 0
%splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer
%v = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %splat)
@@ -144,6 +709,19 @@ define <4 x half> @vfmin_v4f16_fv(<4 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v4f16_fv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.w a0, fa0
+; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <4 x half> poison, half %b, i32 0
%splat = shufflevector <4 x half> %head, <4 x half> poison, <4 x i32> zeroinitializer
%v = call <4 x half> @llvm.minnum.v4f16(<4 x half> %splat, <4 x half> %a)
@@ -167,6 +745,17 @@ define <8 x half> @vfmin_v8f16_vv(<8 x half> %a, <8 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v8f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v10, v12, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%v = call <8 x half> @llvm.minnum.v8f16(<8 x half> %a, <8 x half> %b)
ret <8 x half> %v
}
@@ -190,6 +779,19 @@ define <8 x half> @vfmin_v8f16_vf(<8 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v8f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.w a0, fa0
+; ZVFBFA-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v12, a0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v10, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <8 x half> poison, half %b, i32 0
%splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
%v = call <8 x half> @llvm.minnum.v8f16(<8 x half> %a, <8 x half> %splat)
@@ -215,6 +817,19 @@ define <8 x half> @vfmin_v8f16_fv(<8 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v8f16_fv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.w a0, fa0
+; ZVFBFA-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v12, a0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v10, v8, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <8 x half> poison, half %b, i32 0
%splat = shufflevector <8 x half> %head, <8 x half> poison, <8 x i32> zeroinitializer
%v = call <8 x half> @llvm.minnum.v8f16(<8 x half> %splat, <8 x half> %a)
@@ -238,6 +853,17 @@ define <16 x half> @vfmin_v16f16_vv(<16 x half> %a, <16 x half> %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v16f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%v = call <16 x half> @llvm.minnum.v16f16(<16 x half> %a, <16 x half> %b)
ret <16 x half> %v
}
@@ -261,6 +887,19 @@ define <16 x half> @vfmin_v16f16_vf(<16 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v16f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.w a0, fa0
+; ZVFBFA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v16, a0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v12, v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <16 x half> poison, half %b, i32 0
%splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
%v = call <16 x half> @llvm.minnum.v16f16(<16 x half> %a, <16 x half> %splat)
@@ -286,6 +925,19 @@ define <16 x half> @vfmin_v16f16_fv(<16 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_v16f16_fv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.w a0, fa0
+; ZVFBFA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v16, a0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v12, v8, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <16 x half> poison, half %b, i32 0
%splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer
%v = call <16 x half> @llvm.minnum.v16f16(<16 x half> %splat, <16 x half> %a)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
index fd70f95ed53c6..9a3d27becd644 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
@@ -11,178 +11,472 @@
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=ilp32d \
; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \
-; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v,+m \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v,+m \
+; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv1bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0
-; CHECK-NEXT: vmfeq.vv v0, v8, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT: vfmax.vv v9, v8, v9
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv1bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT: vfmax.vv v9, v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv1bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x bfloat> @llvm.maximum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
ret <vscale x 1 x bfloat> %v
}
define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv2bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0
-; CHECK-NEXT: vmfeq.vv v0, v8, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT: vfmax.vv v9, v8, v9
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv2bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT: vfmax.vv v9, v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv2bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv2bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x bfloat> @llvm.maximum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
ret <vscale x 2 x bfloat> %v
}
define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv4bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v12, v10, v0
-; CHECK-NEXT: vfmax.vv v10, v10, v8
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv4bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFH-NEXT: vmfeq.vv v0, v12, v12
+; ZVFH-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFH-NEXT: vfmax.vv v10, v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv4bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv4bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFBFA-NEXT: vfmax.vv v10, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x bfloat> @llvm.maximum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
ret <vscale x 4 x bfloat> %v
}
define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv8bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmerge.vvm v12, v16, v12, v0
-; CHECK-NEXT: vfmax.vv v12, v12, v8
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv8bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v12, v12
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v10
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFH-NEXT: vmfeq.vv v0, v16, v16
+; ZVFH-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFH-NEXT: vfmax.vv v12, v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv8bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16
+; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv8bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v16, v16
+; ZVFBFA-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFBFA-NEXT: vfmax.vv v12, v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x bfloat> @llvm.maximum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
ret <vscale x 8 x bfloat> %v
}
define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv16bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v24, v24
-; CHECK-NEXT: vmfeq.vv v7, v16, v16
-; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0
-; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
-; CHECK-NEXT: vfmax.vv v16, v16, v8
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv16bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v24, v24
+; ZVFH-NEXT: vmfeq.vv v7, v16, v16
+; ZVFH-NEXT: vmerge.vvm v8, v24, v16, v0
+; ZVFH-NEXT: vmv1r.v v0, v7
+; ZVFH-NEXT: vmerge.vvm v16, v16, v24, v0
+; ZVFH-NEXT: vfmax.vv v16, v16, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv16bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24
+; ZVFHMIN-NEXT: vmfeq.vv v7, v16, v16
+; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0
+; ZVFHMIN-NEXT: vmv1r.v v0, v7
+; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0
+; ZVFHMIN-NEXT: vfmax.vv v16, v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv16bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v24, v24
+; ZVFBFA-NEXT: vmfeq.vv v7, v16, v16
+; ZVFBFA-NEXT: vmerge.vvm v8, v24, v16, v0
+; ZVFBFA-NEXT: vmv1r.v v0, v7
+; ZVFBFA-NEXT: vmerge.vvm v16, v16, v24, v0
+; ZVFBFA-NEXT: vfmax.vv v16, v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x bfloat> @llvm.maximum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
ret <vscale x 16 x bfloat> %v
}
define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) nounwind {
-; CHECK-LABEL: vfmax_nxv32bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vmv8r.v v0, v16
-; CHECK-NEXT: vmv8r.v v24, v8
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v8, v8
-; CHECK-NEXT: vmfeq.vv v3, v16, v16
-; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT: vmv1r.v v0, v3
-; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v4
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v8, v8, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v8, v8
-; CHECK-NEXT: vmfeq.vv v7, v24, v24
-; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0
-; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
-; CHECK-NEXT: vfmax.vv v16, v8, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
-; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv32bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: addi sp, sp, -16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 4
+; ZVFH-NEXT: sub sp, sp, a0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vmv8r.v v0, v16
+; ZVFH-NEXT: vmv8r.v v24, v8
+; ZVFH-NEXT: addi a0, sp, 16
+; ZVFH-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v24
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmfeq.vv v3, v16, v16
+; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT: vmv1r.v v0, v3
+; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v4
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmax.vv v8, v8, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT: addi a0, sp, 16
+; ZVFH-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v20
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmfeq.vv v7, v24, v24
+; ZVFH-NEXT: vmerge.vvm v16, v8, v24, v0
+; ZVFH-NEXT: vmv1r.v v0, v7
+; ZVFH-NEXT: vmerge.vvm v8, v24, v8, v0
+; ZVFH-NEXT: vfmax.vv v16, v8, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v24
+; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 4
+; ZVFH-NEXT: add sp, sp, a0
+; ZVFH-NEXT: addi sp, sp, 16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv32bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: sub sp, sp, a0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vmv8r.v v0, v16
+; ZVFHMIN-NEXT: vmv8r.v v24, v8
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v24
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT: vmfeq.vv v3, v16, v16
+; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT: vmv1r.v v0, v3
+; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v8, v0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v4
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v8, v8, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v20
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24
+; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0
+; ZVFHMIN-NEXT: vmv1r.v v0, v7
+; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v8, v0
+; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: add sp, sp, a0
+; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv32bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 4
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vmv8r.v v0, v16
+; ZVFBFA-NEXT: vmv8r.v v24, v8
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmfeq.vv v3, v16, v16
+; ZVFBFA-NEXT: vmerge.vvm v24, v8, v16, v0
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add a0, sp, a0
+; ZVFBFA-NEXT: addi a0, a0, 16
+; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vmv1r.v v0, v3
+; ZVFBFA-NEXT: vmerge.vvm v8, v16, v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v4
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add a0, sp, a0
+; ZVFBFA-NEXT: addi a0, a0, 16
+; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add a0, sp, a0
+; ZVFBFA-NEXT: addi a0, a0, 16
+; ZVFBFA-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v20
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmfeq.vv v7, v24, v24
+; ZVFBFA-NEXT: vmerge.vvm v16, v8, v24, v0
+; ZVFBFA-NEXT: vmv1r.v v0, v7
+; ZVFBFA-NEXT: vmerge.vvm v8, v24, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v16, v8, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add a0, sp, a0
+; ZVFBFA-NEXT: addi a0, a0, 16
+; ZVFBFA-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v24
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 4
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x bfloat> @llvm.maximum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
ret <vscale x 32 x bfloat> %v
}
@@ -214,6 +508,23 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
ret <vscale x 1 x half> %v
}
@@ -245,6 +556,23 @@ define <vscale x 2 x half> @vfmax_nxv2f16_vv(<vscale x 2 x half> %a, <vscale x 2
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv2f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmax.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
ret <vscale x 2 x half> %v
}
@@ -276,6 +604,23 @@ define <vscale x 4 x half> @vfmax_nxv4f16_vv(<vscale x 4 x half> %a, <vscale x 4
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv4f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFBFA-NEXT: vfmax.vv v10, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
ret <vscale x 4 x half> %v
}
@@ -307,6 +652,23 @@ define <vscale x 8 x half> @vfmax_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv8f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v16, v16
+; ZVFBFA-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFBFA-NEXT: vfmax.vv v12, v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
ret <vscale x 8 x half> %v
}
@@ -337,6 +699,22 @@ define <vscale x 16 x half> @vfmax_nxv16f16_vv(<vscale x 16 x half> %a, <vscale
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv16f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v24, v24
+; ZVFBFA-NEXT: vmfeq.vv v7, v16, v16
+; ZVFBFA-NEXT: vmerge.vvm v8, v24, v16, v0
+; ZVFBFA-NEXT: vmv1r.v v0, v7
+; ZVFBFA-NEXT: vmerge.vvm v16, v16, v24, v0
+; ZVFBFA-NEXT: vfmax.vv v16, v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x half> @llvm.maximum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
ret <vscale x 16 x half> %v
}
@@ -415,6 +793,69 @@ define <vscale x 32 x half> @vfmax_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv32f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 4
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vmv8r.v v0, v16
+; ZVFBFA-NEXT: vmv8r.v v24, v8
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmfeq.vv v3, v16, v16
+; ZVFBFA-NEXT: vmerge.vvm v24, v8, v16, v0
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add a0, sp, a0
+; ZVFBFA-NEXT: addi a0, a0, 16
+; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vmv1r.v v0, v3
+; ZVFBFA-NEXT: vmerge.vvm v8, v16, v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v4
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add a0, sp, a0
+; ZVFBFA-NEXT: addi a0, a0, 16
+; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add a0, sp, a0
+; ZVFBFA-NEXT: addi a0, a0, 16
+; ZVFBFA-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v20
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmfeq.vv v7, v24, v24
+; ZVFBFA-NEXT: vmerge.vvm v16, v8, v24, v0
+; ZVFBFA-NEXT: vmv1r.v v0, v7
+; ZVFBFA-NEXT: vmerge.vvm v8, v24, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v16, v8, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add a0, sp, a0
+; ZVFBFA-NEXT: addi a0, a0, 16
+; ZVFBFA-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v24
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 4
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x half> @llvm.maximum.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b)
ret <vscale x 32 x half> %v
}
@@ -564,6 +1005,17 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnan(<vscale x 1 x half> %a, <vscal
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1f16_vv_nnan:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call nnan <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
ret <vscale x 1 x half> %v
}
@@ -595,6 +1047,24 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnana(<vscale x 1 x half> %a, <vsca
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1f16_vv_nnana:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfadd.vv v9, v10, v10
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v9
+; ZVFBFA-NEXT: vmv1r.v v9, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v10, v0.t
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%c = fadd nnan <vscale x 1 x half> %a, %a
%v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %c, <vscale x 1 x half> %b)
ret <vscale x 1 x half> %v
@@ -627,6 +1097,24 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vv_nnanb(<vscale x 1 x half> %a, <vsca
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1f16_vv_nnanb:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfadd.vv v8, v10, v10
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v8
+; ZVFBFA-NEXT: vmv1r.v v8, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v10, v0.t
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%c = fadd nnan <vscale x 1 x half> %b, %b
%v = call <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %c)
ret <vscale x 1 x half> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll
index dce5004d03e16..20f4a4d939ce1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll
@@ -11,33 +11,83 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
define <vscale x 1 x bfloat> @vfmax_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) {
-; CHECK-LABEL: vfmax_vv_nxv1bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vfmax.vv v9, v9, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_vv_nxv1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vfmax.vv v9, v9, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_vv_nxv1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 1 x bfloat> @llvm.maximumnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb)
ret <vscale x 1 x bfloat> %vc
}
define <vscale x 1 x bfloat> @vfmax_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmax_vf_nxv1bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vfmax.vf v9, v9, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_vf_nxv1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vfmax.vf v9, v9, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_vf_nxv1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmax.vf v9, v9, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v9, v9, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
%vc = call <vscale x 1 x bfloat> @llvm.maximumnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat)
@@ -45,31 +95,75 @@ define <vscale x 1 x bfloat> @vfmax_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
}
define <vscale x 2 x bfloat> @vfmax_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) {
-; CHECK-LABEL: vfmax_vv_nxv2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfmax.vv v9, v9, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_vv_nxv2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vfmax.vv v9, v9, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_vv_nxv2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 2 x bfloat> @llvm.maximumnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb)
ret <vscale x 2 x bfloat> %vc
}
define <vscale x 2 x bfloat> @vfmax_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmax_vf_nxv2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfmax.vf v9, v9, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_vf_nxv2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vfmax.vf v9, v9, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_vf_nxv2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmax.vf v9, v9, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v9, v9, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
%vc = call <vscale x 2 x bfloat> @llvm.maximumnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %splat)
@@ -77,31 +171,75 @@ define <vscale x 2 x bfloat> @vfmax_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
}
define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) {
-; CHECK-LABEL: vfmax_vv_nxv4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmax.vv v10, v12, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_vv_nxv4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vfmax.vv v10, v12, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_vv_nxv4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v10, v12, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v10, v12, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 4 x bfloat> @llvm.maximumnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb)
ret <vscale x 4 x bfloat> %vc
}
define <vscale x 4 x bfloat> @vfmax_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmax_vf_nxv4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmax.vf v10, v10, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_vf_nxv4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vfmax.vf v10, v10, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_vf_nxv4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmax.vf v10, v10, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v10, v10, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
%vc = call <vscale x 4 x bfloat> @llvm.maximumnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat)
@@ -109,31 +247,75 @@ define <vscale x 4 x bfloat> @vfmax_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
}
define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
-; CHECK-LABEL: vfmax_vv_nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfmax.vv v12, v16, v12
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_vv_nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v10
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vfmax.vv v12, v16, v12
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_vv_nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v12, v16, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 8 x bfloat> @llvm.maximumnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb)
ret <vscale x 8 x bfloat> %vc
}
define <vscale x 8 x bfloat> @vfmax_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmax_vf_nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfmax.vf v12, v12, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_vf_nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vfmax.vf v12, v12, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_vf_nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmax.vf v12, v12, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v12, v12, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
%vc = call <vscale x 8 x bfloat> @llvm.maximumnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %splat)
@@ -141,16 +323,38 @@ define <vscale x 8 x bfloat> @vfmax_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
}
define <vscale x 8 x bfloat> @vfmax_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmax_fv_nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfmax.vf v12, v12, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_fv_nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vfmax.vf v12, v12, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_fv_nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmax.vf v12, v12, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_fv_nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v12, v12, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
%vc = call <vscale x 8 x bfloat> @llvm.maximumnum.nxv8bf16(<vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va)
@@ -158,31 +362,75 @@ define <vscale x 8 x bfloat> @vfmax_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
}
define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) {
-; CHECK-LABEL: vfmax_vv_nxv16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v16, v24, v16
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_vv_nxv16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmax.vv v16, v24, v16
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_vv_nxv16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v16, v24, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v16, v24, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 16 x bfloat> @llvm.maximumnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb)
ret <vscale x 16 x bfloat> %vc
}
define <vscale x 16 x bfloat> @vfmax_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmax_vf_nxv16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmax.vf v16, v16, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_vf_nxv16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmax.vf v16, v16, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_vf_nxv16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmax.vf v16, v16, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v16, v16, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
%vc = call <vscale x 16 x bfloat> @llvm.maximumnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat)
@@ -190,78 +438,213 @@ define <vscale x 16 x bfloat> @vfmax_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
}
define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) {
-; CHECK-LABEL: vfmax_vv_nxv32bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v0, v0, v8
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v16, v16, v24
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_vv_nxv32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: addi sp, sp, -16
+; ZVFH-NEXT: .cfi_def_cfa_offset 16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: sub sp, sp, a0
+; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v16
+; ZVFH-NEXT: addi a0, sp, 16
+; ZVFH-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v20
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmax.vv v0, v0, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmax.vv v16, v16, v24
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add sp, sp, a0
+; ZVFH-NEXT: .cfi_def_cfa sp, 16
+; ZVFH-NEXT: addi sp, sp, 16
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_vv_nxv32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: sub sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v16
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v20
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v0, v0, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv32bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v0, v0, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v16, v16, v24
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 32 x bfloat> @llvm.maximumnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb)
ret <vscale x 32 x bfloat> %vc
}
define <vscale x 32 x bfloat> @vfmax_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmax_vf_nxv32bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: fmv.x.h a0, fa0
-; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
-; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v0, v8, v0
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v16, v24, v16
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_vf_nxv32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: addi sp, sp, -16
+; ZVFH-NEXT: .cfi_def_cfa_offset 16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: sub sp, sp, a0
+; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT: addi a1, sp, 16
+; ZVFH-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v12
+; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vmv.v.x v8, a0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT: addi a0, sp, 16
+; ZVFH-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmax.vv v0, v8, v0
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmax.vv v16, v24, v16
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add sp, sp, a0
+; ZVFH-NEXT: .cfi_def_cfa sp, 16
+; ZVFH-NEXT: addi sp, sp, 16
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_vf_nxv32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: sub sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT: addi a1, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v12
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v8, a0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v0, v8, v0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v16, v24, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv32bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v16, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v12
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v16, v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v16, v0, v24
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
%vc = call <vscale x 32 x bfloat> @llvm.maximumnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %splat)
@@ -285,6 +668,17 @@ define <vscale x 1 x half> @vfmax_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 1 x half> @llvm.maximumnum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb)
ret <vscale x 1 x half> %vc
}
@@ -306,6 +700,17 @@ define <vscale x 1 x half> @vfmax_vf_nxv1f16(<vscale x 1 x half> %va, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v9, v9, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
%vc = call <vscale x 1 x half> @llvm.maximumnum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %splat)
@@ -329,6 +734,17 @@ define <vscale x 2 x half> @vfmax_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 2 x half> @llvm.maximumnum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb)
ret <vscale x 2 x half> %vc
}
@@ -350,6 +766,17 @@ define <vscale x 2 x half> @vfmax_vf_nxv2f16(<vscale x 2 x half> %va, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v9, v9, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
%vc = call <vscale x 2 x half> @llvm.maximumnum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %splat)
@@ -373,6 +800,17 @@ define <vscale x 4 x half> @vfmax_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v10, v12, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 4 x half> @llvm.maximumnum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb)
ret <vscale x 4 x half> %vc
}
@@ -394,6 +832,17 @@ define <vscale x 4 x half> @vfmax_vf_nxv4f16(<vscale x 4 x half> %va, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v10, v10, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
%vc = call <vscale x 4 x half> @llvm.maximumnum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %splat)
@@ -417,6 +866,17 @@ define <vscale x 8 x half> @vfmax_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 8 x half> @llvm.maximumnum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb)
ret <vscale x 8 x half> %vc
}
@@ -438,6 +898,17 @@ define <vscale x 8 x half> @vfmax_vf_nxv8f16(<vscale x 8 x half> %va, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v12, v12, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
%vc = call <vscale x 8 x half> @llvm.maximumnum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %splat)
@@ -461,6 +932,17 @@ define <vscale x 8 x half> @vfmax_fv_nxv8f16(<vscale x 8 x half> %va, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_fv_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v12, v12, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
%vc = call <vscale x 8 x half> @llvm.maximumnum.nxv8f16(<vscale x 8 x half> %splat, <vscale x 8 x half> %va)
@@ -484,6 +966,17 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v16, v24, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 16 x half> @llvm.maximumnum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb)
ret <vscale x 16 x half> %vc
}
@@ -505,6 +998,17 @@ define <vscale x 16 x half> @vfmax_vf_nxv16f16(<vscale x 16 x half> %va, half %b
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v16, v16, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
%vc = call <vscale x 16 x half> @llvm.maximumnum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %splat)
@@ -549,6 +1053,38 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vv_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v0, v0, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v16, v16, v24
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 32 x half> @llvm.maximumnum.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb)
ret <vscale x 32 x half> %vc
}
@@ -596,6 +1132,43 @@ define <vscale x 32 x half> @vfmax_vf_nxv32f16(<vscale x 32 x half> %va, half %b
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_vf_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: addi a1, sp, 16
+; ZVFBFA-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v8, a0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v0, v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v16, v24, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 32 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
%vc = call <vscale x 32 x half> @llvm.maximumnum.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %splat)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
index 339f97a73ee52..bb0025d85ab1d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
@@ -11,178 +11,472 @@
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=ilp32d \
; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zfbfmin,+zvfbfmin,+v,+m -target-abi=lp64d \
-; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v,+m \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v,+m \
+; RUN: -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv1bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0
-; CHECK-NEXT: vmfeq.vv v0, v8, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT: vfmin.vv v9, v8, v9
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv1bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT: vfmin.vv v9, v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv1bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmin.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x bfloat> @llvm.minimum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
ret <vscale x 1 x bfloat> %v
}
define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv2bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0
-; CHECK-NEXT: vmfeq.vv v0, v8, v8
-; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT: vfmin.vv v9, v8, v9
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv2bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFH-NEXT: vfmin.vv v9, v8, v9
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv2bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv2bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmin.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x bfloat> @llvm.minimum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
ret <vscale x 2 x bfloat> %v
}
define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv4bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v10, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vmerge.vvm v10, v12, v10, v0
-; CHECK-NEXT: vfmin.vv v10, v10, v8
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv4bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v10, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v9
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFH-NEXT: vmfeq.vv v0, v12, v12
+; ZVFH-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFH-NEXT: vfmin.vv v10, v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv4bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv4bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFBFA-NEXT: vfmin.vv v10, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x bfloat> @llvm.minimum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
ret <vscale x 4 x bfloat> %v
}
define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv8bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v12, v12
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
-; CHECK-NEXT: vmfeq.vv v0, v16, v16
-; CHECK-NEXT: vmerge.vvm v12, v16, v12, v0
-; CHECK-NEXT: vfmin.vv v12, v12, v8
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv8bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v12, v12
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v10
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFH-NEXT: vmfeq.vv v0, v16, v16
+; ZVFH-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFH-NEXT: vfmin.vv v12, v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv8bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16
+; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv8bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v16, v16
+; ZVFBFA-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFBFA-NEXT: vfmin.vv v12, v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x bfloat> @llvm.minimum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
ret <vscale x 8 x bfloat> %v
}
define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv16bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v24, v24
-; CHECK-NEXT: vmfeq.vv v7, v16, v16
-; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0
-; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
-; CHECK-NEXT: vfmin.vv v16, v16, v8
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv16bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v24, v24
+; ZVFH-NEXT: vmfeq.vv v7, v16, v16
+; ZVFH-NEXT: vmerge.vvm v8, v24, v16, v0
+; ZVFH-NEXT: vmv1r.v v0, v7
+; ZVFH-NEXT: vmerge.vvm v16, v16, v24, v0
+; ZVFH-NEXT: vfmin.vv v16, v16, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv16bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24
+; ZVFHMIN-NEXT: vmfeq.vv v7, v16, v16
+; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0
+; ZVFHMIN-NEXT: vmv1r.v v0, v7
+; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0
+; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv16bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v24, v24
+; ZVFBFA-NEXT: vmfeq.vv v7, v16, v16
+; ZVFBFA-NEXT: vmerge.vvm v8, v24, v16, v0
+; ZVFBFA-NEXT: vmv1r.v v0, v7
+; ZVFBFA-NEXT: vmerge.vvm v16, v16, v24, v0
+; ZVFBFA-NEXT: vfmin.vv v16, v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x bfloat> @llvm.minimum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
ret <vscale x 16 x bfloat> %v
}
define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) nounwind {
-; CHECK-LABEL: vfmin_nxv32bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vmv8r.v v0, v16
-; CHECK-NEXT: vmv8r.v v24, v8
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v8, v8
-; CHECK-NEXT: vmfeq.vv v3, v16, v16
-; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT: vmv1r.v v0, v3
-; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v4
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v8, v8, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v0, v8, v8
-; CHECK-NEXT: vmfeq.vv v7, v24, v24
-; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0
-; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
-; CHECK-NEXT: vfmin.vv v16, v8, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24
-; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv32bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: addi sp, sp, -16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 4
+; ZVFH-NEXT: sub sp, sp, a0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vmv8r.v v0, v16
+; ZVFH-NEXT: vmv8r.v v24, v8
+; ZVFH-NEXT: addi a0, sp, 16
+; ZVFH-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v0
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v24
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmfeq.vv v3, v16, v16
+; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT: vmv1r.v v0, v3
+; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v4
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmin.vv v8, v8, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT: addi a0, sp, 16
+; ZVFH-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v20
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vmfeq.vv v0, v8, v8
+; ZVFH-NEXT: vmfeq.vv v7, v24, v24
+; ZVFH-NEXT: vmerge.vvm v16, v8, v24, v0
+; ZVFH-NEXT: vmv1r.v v0, v7
+; ZVFH-NEXT: vmerge.vvm v8, v24, v8, v0
+; ZVFH-NEXT: vfmin.vv v16, v8, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add a0, sp, a0
+; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v24
+; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 4
+; ZVFH-NEXT: add sp, sp, a0
+; ZVFH-NEXT: addi sp, sp, 16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv32bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: sub sp, sp, a0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vmv8r.v v0, v16
+; ZVFHMIN-NEXT: vmv8r.v v24, v8
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v0
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v24
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT: vmfeq.vv v3, v16, v16
+; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT: vmv1r.v v0, v3
+; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v8, v0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v4
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v8, v8, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v20
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
+; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24
+; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0
+; ZVFHMIN-NEXT: vmv1r.v v0, v7
+; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v8, v0
+; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: add sp, sp, a0
+; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv32bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 4
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vmv8r.v v0, v16
+; ZVFBFA-NEXT: vmv8r.v v24, v8
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmfeq.vv v3, v16, v16
+; ZVFBFA-NEXT: vmerge.vvm v24, v8, v16, v0
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add a0, sp, a0
+; ZVFBFA-NEXT: addi a0, a0, 16
+; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vmv1r.v v0, v3
+; ZVFBFA-NEXT: vmerge.vvm v8, v16, v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v4
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add a0, sp, a0
+; ZVFBFA-NEXT: addi a0, a0, 16
+; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add a0, sp, a0
+; ZVFBFA-NEXT: addi a0, a0, 16
+; ZVFBFA-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v20
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmfeq.vv v7, v24, v24
+; ZVFBFA-NEXT: vmerge.vvm v16, v8, v24, v0
+; ZVFBFA-NEXT: vmv1r.v v0, v7
+; ZVFBFA-NEXT: vmerge.vvm v8, v24, v8, v0
+; ZVFBFA-NEXT: vfmin.vv v16, v8, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add a0, sp, a0
+; ZVFBFA-NEXT: addi a0, a0, 16
+; ZVFBFA-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v24
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 4
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x bfloat> @llvm.minimum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
ret <vscale x 32 x bfloat> %v
}
@@ -214,6 +508,23 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmin.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
ret <vscale x 1 x half> %v
}
@@ -245,6 +556,23 @@ define <vscale x 2 x half> @vfmin_nxv2f16_vv(<vscale x 2 x half> %a, <vscale x 2
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv2f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
+; ZVFBFA-NEXT: vfmin.vv v9, v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x half> @llvm.minimum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
ret <vscale x 2 x half> %v
}
@@ -276,6 +604,23 @@ define <vscale x 4 x half> @vfmin_nxv4f16_vv(<vscale x 4 x half> %a, <vscale x 4
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv4f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v8, v10, v12, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vmerge.vvm v10, v12, v10, v0
+; ZVFBFA-NEXT: vfmin.vv v10, v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x half> @llvm.minimum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
ret <vscale x 4 x half> %v
}
@@ -307,6 +652,23 @@ define <vscale x 8 x half> @vfmin_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv8f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vmerge.vvm v8, v12, v16, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v16, v16
+; ZVFBFA-NEXT: vmerge.vvm v12, v16, v12, v0
+; ZVFBFA-NEXT: vfmin.vv v12, v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x half> @llvm.minimum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
ret <vscale x 8 x half> %v
}
@@ -337,6 +699,22 @@ define <vscale x 16 x half> @vfmin_nxv16f16_vv(<vscale x 16 x half> %a, <vscale
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv16f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v24, v24
+; ZVFBFA-NEXT: vmfeq.vv v7, v16, v16
+; ZVFBFA-NEXT: vmerge.vvm v8, v24, v16, v0
+; ZVFBFA-NEXT: vmv1r.v v0, v7
+; ZVFBFA-NEXT: vmerge.vvm v16, v16, v24, v0
+; ZVFBFA-NEXT: vfmin.vv v16, v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x half> @llvm.minimum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
ret <vscale x 16 x half> %v
}
@@ -415,6 +793,69 @@ define <vscale x 32 x half> @vfmin_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv32f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 4
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vmv8r.v v0, v16
+; ZVFBFA-NEXT: vmv8r.v v24, v8
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v0
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmfeq.vv v3, v16, v16
+; ZVFBFA-NEXT: vmerge.vvm v24, v8, v16, v0
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add a0, sp, a0
+; ZVFBFA-NEXT: addi a0, a0, 16
+; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vmv1r.v v0, v3
+; ZVFBFA-NEXT: vmerge.vvm v8, v16, v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v4
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add a0, sp, a0
+; ZVFBFA-NEXT: addi a0, a0, 16
+; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add a0, sp, a0
+; ZVFBFA-NEXT: addi a0, a0, 16
+; ZVFBFA-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v20
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmfeq.vv v7, v24, v24
+; ZVFBFA-NEXT: vmerge.vvm v16, v8, v24, v0
+; ZVFBFA-NEXT: vmv1r.v v0, v7
+; ZVFBFA-NEXT: vmerge.vvm v8, v24, v8, v0
+; ZVFBFA-NEXT: vfmin.vv v16, v8, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add a0, sp, a0
+; ZVFBFA-NEXT: addi a0, a0, 16
+; ZVFBFA-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v24
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 4
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x half> @llvm.minimum.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b)
ret <vscale x 32 x half> %v
}
@@ -564,6 +1005,17 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vv_nnan(<vscale x 1 x half> %a, <vscal
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1f16_vv_nnan:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call nnan <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
ret <vscale x 1 x half> %v
}
@@ -595,6 +1047,24 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vv_nnana(<vscale x 1 x half> %a, <vsca
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1f16_vv_nnana:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfadd.vv v9, v10, v10
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v9
+; ZVFBFA-NEXT: vmv1r.v v9, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v10, v0.t
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%c = fadd nnan <vscale x 1 x half> %a, %a
%v = call <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half> %c, <vscale x 1 x half> %b)
ret <vscale x 1 x half> %v
@@ -627,6 +1097,24 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vv_nnanb(<vscale x 1 x half> %a, <vsca
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1f16_vv_nnanb:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfadd.vv v8, v10, v10
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v8
+; ZVFBFA-NEXT: vmv1r.v v8, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v10, v0.t
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%c = fadd nnan <vscale x 1 x half> %b, %b
%v = call <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %c)
ret <vscale x 1 x half> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll
index a52625d9e8ef4..f2106256c7937 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll
@@ -11,33 +11,83 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb) {
-; CHECK-LABEL: vfmin_vv_nxv1bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vfmin.vv v9, v9, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_vv_nxv1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vfmin.vv v9, v9, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_vv_nxv1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 1 x bfloat> @llvm.minimumnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb)
ret <vscale x 1 x bfloat> %vc
}
define <vscale x 1 x bfloat> @vfmin_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmin_vf_nxv1bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vfmin.vf v9, v9, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_vf_nxv1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vfmin.vf v9, v9, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_vf_nxv1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmin.vf v9, v9, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v9, v9, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
%vc = call <vscale x 1 x bfloat> @llvm.minimumnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %splat)
@@ -45,31 +95,75 @@ define <vscale x 1 x bfloat> @vfmin_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
}
define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb) {
-; CHECK-LABEL: vfmin_vv_nxv2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfmin.vv v9, v9, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_vv_nxv2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vfmin.vv v9, v9, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_vv_nxv2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 2 x bfloat> @llvm.minimumnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb)
ret <vscale x 2 x bfloat> %vc
}
define <vscale x 2 x bfloat> @vfmin_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmin_vf_nxv2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfmin.vf v9, v9, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_vf_nxv2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vfmin.vf v9, v9, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_vf_nxv2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmin.vf v9, v9, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v9, v9, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
%vc = call <vscale x 2 x bfloat> @llvm.minimumnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %splat)
@@ -77,31 +171,75 @@ define <vscale x 2 x bfloat> @vfmin_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
}
define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb) {
-; CHECK-LABEL: vfmin_vv_nxv4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmin.vv v10, v12, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_vv_nxv4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vfmin.vv v10, v12, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_vv_nxv4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v10, v12, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v10, v12, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 4 x bfloat> @llvm.minimumnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb)
ret <vscale x 4 x bfloat> %vc
}
define <vscale x 4 x bfloat> @vfmin_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmin_vf_nxv4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmin.vf v10, v10, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_vf_nxv4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vfmin.vf v10, v10, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_vf_nxv4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmin.vf v10, v10, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v10, v10, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
%vc = call <vscale x 4 x bfloat> @llvm.minimumnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %splat)
@@ -109,31 +247,75 @@ define <vscale x 4 x bfloat> @vfmin_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
}
define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb) {
-; CHECK-LABEL: vfmin_vv_nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfmin.vv v12, v16, v12
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_vv_nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v10
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vfmin.vv v12, v16, v12
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_vv_nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v12, v16, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 8 x bfloat> @llvm.minimumnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb)
ret <vscale x 8 x bfloat> %vc
}
define <vscale x 8 x bfloat> @vfmin_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmin_vf_nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfmin.vf v12, v12, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_vf_nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vfmin.vf v12, v12, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_vf_nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmin.vf v12, v12, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v12, v12, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
%vc = call <vscale x 8 x bfloat> @llvm.minimumnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %splat)
@@ -141,16 +323,38 @@ define <vscale x 8 x bfloat> @vfmin_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
}
define <vscale x 8 x bfloat> @vfmin_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmin_fv_nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfmin.vf v12, v12, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_fv_nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vfmin.vf v12, v12, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_fv_nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmin.vf v12, v12, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_fv_nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v12, v12, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
%vc = call <vscale x 8 x bfloat> @llvm.minimumnum.nxv8bf16(<vscale x 8 x bfloat> %splat, <vscale x 8 x bfloat> %va)
@@ -158,31 +362,75 @@ define <vscale x 8 x bfloat> @vfmin_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
}
define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb) {
-; CHECK-LABEL: vfmin_vv_nxv16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v16, v24, v16
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_vv_nxv16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmin.vv v16, v24, v16
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_vv_nxv16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v16, v24, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v16, v24, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 16 x bfloat> @llvm.minimumnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb)
ret <vscale x 16 x bfloat> %vc
}
define <vscale x 16 x bfloat> @vfmin_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmin_vf_nxv16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmin.vf v16, v16, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_vf_nxv16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmin.vf v16, v16, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_vf_nxv16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmin.vf v16, v16, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v16, v16, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
%vc = call <vscale x 16 x bfloat> @llvm.minimumnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %splat)
@@ -190,78 +438,213 @@ define <vscale x 16 x bfloat> @vfmin_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
}
define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb) {
-; CHECK-LABEL: vfmin_vv_nxv32bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v0, v0, v8
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v16, v16, v24
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_vv_nxv32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: addi sp, sp, -16
+; ZVFH-NEXT: .cfi_def_cfa_offset 16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: sub sp, sp, a0
+; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v16
+; ZVFH-NEXT: addi a0, sp, 16
+; ZVFH-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v20
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmin.vv v0, v0, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmin.vv v16, v16, v24
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add sp, sp, a0
+; ZVFH-NEXT: .cfi_def_cfa sp, 16
+; ZVFH-NEXT: addi sp, sp, 16
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_vv_nxv32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: sub sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v16
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v20
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v0, v0, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv32bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v0, v0, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v16, v16, v24
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 32 x bfloat> @llvm.minimumnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb)
ret <vscale x 32 x bfloat> %vc
}
define <vscale x 32 x bfloat> @vfmin_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bfloat %b) {
-; CHECK-LABEL: vfmin_vf_nxv32bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: fmv.x.h a0, fa0
-; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
-; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v0, v8, v0
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v16, v24, v16
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_vf_nxv32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: addi sp, sp, -16
+; ZVFH-NEXT: .cfi_def_cfa_offset 16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: sub sp, sp, a0
+; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT: addi a1, sp, 16
+; ZVFH-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v12
+; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vmv.v.x v8, a0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT: addi a0, sp, 16
+; ZVFH-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmin.vv v0, v8, v0
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmin.vv v16, v24, v16
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add sp, sp, a0
+; ZVFH-NEXT: .cfi_def_cfa sp, 16
+; ZVFH-NEXT: addi sp, sp, 16
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_vf_nxv32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: sub sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT: addi a1, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v12
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v8, a0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v0, v8, v0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v16, v24, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv32bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v16, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v12
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v16, v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v16, v0, v24
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
%vc = call <vscale x 32 x bfloat> @llvm.minimumnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %splat)
@@ -285,6 +668,17 @@ define <vscale x 1 x half> @vfmin_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 1 x half> @llvm.minimumnum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb)
ret <vscale x 1 x half> %vc
}
@@ -306,6 +700,17 @@ define <vscale x 1 x half> @vfmin_vf_nxv1f16(<vscale x 1 x half> %va, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v9, v9, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
%vc = call <vscale x 1 x half> @llvm.minimumnum.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %splat)
@@ -329,6 +734,17 @@ define <vscale x 2 x half> @vfmin_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 2 x half> @llvm.minimumnum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb)
ret <vscale x 2 x half> %vc
}
@@ -350,6 +766,17 @@ define <vscale x 2 x half> @vfmin_vf_nxv2f16(<vscale x 2 x half> %va, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v9, v9, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
%vc = call <vscale x 2 x half> @llvm.minimumnum.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %splat)
@@ -373,6 +800,17 @@ define <vscale x 4 x half> @vfmin_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v10, v12, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 4 x half> @llvm.minimumnum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb)
ret <vscale x 4 x half> %vc
}
@@ -394,6 +832,17 @@ define <vscale x 4 x half> @vfmin_vf_nxv4f16(<vscale x 4 x half> %va, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v10, v10, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
%vc = call <vscale x 4 x half> @llvm.minimumnum.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %splat)
@@ -417,6 +866,17 @@ define <vscale x 8 x half> @vfmin_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 8 x half> @llvm.minimumnum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb)
ret <vscale x 8 x half> %vc
}
@@ -438,6 +898,17 @@ define <vscale x 8 x half> @vfmin_vf_nxv8f16(<vscale x 8 x half> %va, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v12, v12, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
%vc = call <vscale x 8 x half> @llvm.minimumnum.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %splat)
@@ -461,6 +932,17 @@ define <vscale x 8 x half> @vfmin_fv_nxv8f16(<vscale x 8 x half> %va, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_fv_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v12, v12, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
%vc = call <vscale x 8 x half> @llvm.minimumnum.nxv8f16(<vscale x 8 x half> %splat, <vscale x 8 x half> %va)
@@ -484,6 +966,17 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v16, v24, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 16 x half> @llvm.minimumnum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb)
ret <vscale x 16 x half> %vc
}
@@ -505,6 +998,17 @@ define <vscale x 16 x half> @vfmin_vf_nxv16f16(<vscale x 16 x half> %va, half %b
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v16, v16, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
%vc = call <vscale x 16 x half> @llvm.minimumnum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %splat)
@@ -549,6 +1053,38 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vv_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v0, v0, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v16, v16, v24
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: ret
%vc = call <vscale x 32 x half> @llvm.minimumnum.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb)
ret <vscale x 32 x half> %vc
}
@@ -596,6 +1132,43 @@ define <vscale x 32 x half> @vfmin_vf_nxv32f16(<vscale x 32 x half> %va, half %b
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_vf_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: addi a1, sp, 16
+; ZVFBFA-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v8, a0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v0, v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v16, v24, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 32 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
%vc = call <vscale x 32 x half> @llvm.minimumnum.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %splat)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
index 63bfe1dfad5fc..e228f8459f108 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
@@ -11,33 +11,83 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv1bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vfmax.vv v9, v9, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv1bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vfmax.vv v9, v9, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv1bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x bfloat> @llvm.maxnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
ret <vscale x 1 x bfloat> %v
}
define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmax_nxv1bf16_vf:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vfmax.vf v9, v9, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv1bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vfmax.vf v9, v9, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv1bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmax.vf v9, v9, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v9, v9, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x bfloat> @llvm.maxnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %splat)
@@ -45,31 +95,75 @@ define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat
}
define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv2bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfmax.vv v9, v9, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv2bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vfmax.vv v9, v9, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv2bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv2bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x bfloat> @llvm.maxnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
ret <vscale x 2 x bfloat> %v
}
define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmax_nxv2bf16_vf:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfmax.vf v9, v9, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv2bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vfmax.vf v9, v9, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv2bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmax.vf v9, v9, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv2bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v9, v9, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x bfloat> @llvm.maxnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %splat)
@@ -77,31 +171,75 @@ define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat
}
define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv4bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmax.vv v10, v12, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv4bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vfmax.vv v10, v12, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv4bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v10, v12, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv4bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v10, v12, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x bfloat> @llvm.maxnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
ret <vscale x 4 x bfloat> %v
}
define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmax_nxv4bf16_vf:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmax.vf v10, v10, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv4bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vfmax.vf v10, v10, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv4bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmax.vf v10, v10, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv4bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v10, v10, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x bfloat> @llvm.maxnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %splat)
@@ -109,31 +247,75 @@ define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat
}
define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv8bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfmax.vv v12, v16, v12
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv8bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v10
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vfmax.vv v12, v16, v12
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv8bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v12, v16, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv8bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x bfloat> @llvm.maxnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
ret <vscale x 8 x bfloat> %v
}
define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmax_nxv8bf16_vf:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfmax.vf v12, v12, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv8bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vfmax.vf v12, v12, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv8bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmax.vf v12, v12, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv8bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v12, v12, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x bfloat> @llvm.maxnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %splat)
@@ -141,31 +323,75 @@ define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat
}
define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv16bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v16, v24, v16
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv16bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmax.vv v16, v24, v16
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv16bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v16, v24, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv16bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v16, v24, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x bfloat> @llvm.maxnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
ret <vscale x 16 x bfloat> %v
}
define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmax_nxv16bf16_vf:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmax.vf v16, v16, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv16bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmax.vf v16, v16, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv16bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmax.vf v16, v16, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv16bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v16, v16, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x bfloat> @llvm.maxnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %splat)
@@ -173,78 +399,213 @@ define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfl
}
define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) {
-; CHECK-LABEL: vfmax_nxv32bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v0, v0, v8
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v16, v16, v24
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv32bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: addi sp, sp, -16
+; ZVFH-NEXT: .cfi_def_cfa_offset 16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: sub sp, sp, a0
+; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v16
+; ZVFH-NEXT: addi a0, sp, 16
+; ZVFH-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v20
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmax.vv v0, v0, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmax.vv v16, v16, v24
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add sp, sp, a0
+; ZVFH-NEXT: .cfi_def_cfa sp, 16
+; ZVFH-NEXT: addi sp, sp, 16
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv32bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: sub sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v16
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v20
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v0, v0, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv32bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v0, v0, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v16, v16, v24
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x bfloat> @llvm.maxnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
ret <vscale x 32 x bfloat> %v
}
define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vf(<vscale x 32 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmax_nxv32bf16_vf:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: fmv.x.h a0, fa0
-; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
-; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v0, v8, v0
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v16, v24, v16
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmax_nxv32bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: addi sp, sp, -16
+; ZVFH-NEXT: .cfi_def_cfa_offset 16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: sub sp, sp, a0
+; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT: addi a1, sp, 16
+; ZVFH-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v12
+; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vmv.v.x v8, a0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT: addi a0, sp, 16
+; ZVFH-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmax.vv v0, v8, v0
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmax.vv v16, v24, v16
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add sp, sp, a0
+; ZVFH-NEXT: .cfi_def_cfa sp, 16
+; ZVFH-NEXT: addi sp, sp, 16
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmax_nxv32bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: sub sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT: addi a1, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v12
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v8, a0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v0, v8, v0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmax.vv v16, v24, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv32bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v16, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v12
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v16, v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v16, v0, v24
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
%v = call <vscale x 32 x bfloat> @llvm.maxnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %splat)
@@ -268,6 +629,17 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x half> @llvm.maxnum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
ret <vscale x 1 x half> %v
}
@@ -289,6 +661,17 @@ define <vscale x 1 x half> @vfmax_nxv1f16_vf(<vscale x 1 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv1f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v9, v9, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x half> @llvm.maxnum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %splat)
@@ -312,6 +695,17 @@ define <vscale x 2 x half> @vfmax_nxv2f16_vv(<vscale x 2 x half> %a, <vscale x 2
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv2f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x half> @llvm.maxnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
ret <vscale x 2 x half> %v
}
@@ -333,6 +727,17 @@ define <vscale x 2 x half> @vfmax_nxv2f16_vf(<vscale x 2 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv2f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v9, v9, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x half> @llvm.maxnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %splat)
@@ -356,6 +761,17 @@ define <vscale x 4 x half> @vfmax_nxv4f16_vv(<vscale x 4 x half> %a, <vscale x 4
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv4f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v10, v12, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
ret <vscale x 4 x half> %v
}
@@ -377,6 +793,17 @@ define <vscale x 4 x half> @vfmax_nxv4f16_vf(<vscale x 4 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv4f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v10, v10, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %splat)
@@ -400,6 +827,17 @@ define <vscale x 8 x half> @vfmax_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv8f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
ret <vscale x 8 x half> %v
}
@@ -421,6 +859,17 @@ define <vscale x 8 x half> @vfmax_nxv8f16_vf(<vscale x 8 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv8f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v12, v12, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %splat)
@@ -444,6 +893,17 @@ define <vscale x 16 x half> @vfmax_nxv16f16_vv(<vscale x 16 x half> %a, <vscale
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv16f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v16, v24, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x half> @llvm.maxnum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
ret <vscale x 16 x half> %v
}
@@ -465,6 +925,17 @@ define <vscale x 16 x half> @vfmax_nxv16f16_vf(<vscale x 16 x half> %a, half %b)
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv16f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vf v16, v16, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x half> @llvm.maxnum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %splat)
@@ -509,6 +980,38 @@ define <vscale x 32 x half> @vfmax_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv32f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v0, v0, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v16, v16, v24
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x half> @llvm.maxnum.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b)
ret <vscale x 32 x half> %v
}
@@ -556,6 +1059,43 @@ define <vscale x 32 x half> @vfmax_nxv32f16_vf(<vscale x 32 x half> %a, half %b)
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmax_nxv32f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: addi a1, sp, 16
+; ZVFBFA-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v8, a0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v0, v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v16, v24, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 32 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
%v = call <vscale x 32 x half> @llvm.maxnum.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %splat)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
index bb435c9d0114f..b284d87ab97d4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
@@ -11,33 +11,83 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv1bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vfmin.vv v9, v9, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv1bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vfmin.vv v9, v9, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv1bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x bfloat> @llvm.minnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
ret <vscale x 1 x bfloat> %v
}
define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmin_nxv1bf16_vf:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vfmin.vf v9, v9, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv1bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFH-NEXT: vfmin.vf v9, v9, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv1bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmin.vf v9, v9, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v9, v9, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x bfloat> @llvm.minnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %splat)
@@ -45,31 +95,75 @@ define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat
}
define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv2bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfmin.vv v9, v9, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv2bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vfmin.vv v9, v9, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv2bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv2bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x bfloat> @llvm.minnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
ret <vscale x 2 x bfloat> %v
}
define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmin_nxv2bf16_vf:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vfmin.vf v9, v9, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv2bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFH-NEXT: vfmin.vf v9, v9, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv2bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmin.vf v9, v9, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv2bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v9, v9, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x bfloat> @llvm.minnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %splat)
@@ -77,31 +171,75 @@ define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat
}
define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv4bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmin.vv v10, v12, v10
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv4bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vfmin.vv v10, v12, v10
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv4bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v10, v12, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv4bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v10, v12, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x bfloat> @llvm.minnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
ret <vscale x 4 x bfloat> %v
}
define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmin_nxv4bf16_vf:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmin.vf v10, v10, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv4bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFH-NEXT: vfmin.vf v10, v10, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv4bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v10, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmin.vf v10, v10, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv4bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v10, v10, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x bfloat> @llvm.minnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %splat)
@@ -109,31 +247,75 @@ define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat
}
define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv8bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfmin.vv v12, v16, v12
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv8bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v10
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vfmin.vv v12, v16, v12
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv8bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v12, v16, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv8bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x bfloat> @llvm.minnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
ret <vscale x 8 x bfloat> %v
}
define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmin_nxv8bf16_vf:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vfmin.vf v12, v12, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv8bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFH-NEXT: vfmin.vf v12, v12, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv8bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmin.vf v12, v12, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv8bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v12, v12, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x bfloat> @llvm.minnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %splat)
@@ -141,31 +323,75 @@ define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat
}
define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv16bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v16, v24, v16
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv16bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmin.vv v16, v24, v16
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv16bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v16, v24, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv16bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v16, v24, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x bfloat> @llvm.minnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
ret <vscale x 16 x bfloat> %v
}
define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmin_nxv16bf16_vf:
-; CHECK: # %bb.0:
-; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmin.vf v16, v16, fa5
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv16bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmin.vf v16, v16, fa5
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv16bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmin.vf v16, v16, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv16bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v16, v16, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x bfloat> @llvm.minnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %splat)
@@ -173,78 +399,213 @@ define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfl
}
define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) {
-; CHECK-LABEL: vfmin_nxv32bf16_vv:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v0, v0, v8
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v16, v16, v24
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv32bf16_vv:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: addi sp, sp, -16
+; ZVFH-NEXT: .cfi_def_cfa_offset 16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: sub sp, sp, a0
+; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v16
+; ZVFH-NEXT: addi a0, sp, 16
+; ZVFH-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v20
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmin.vv v0, v0, v8
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmin.vv v16, v16, v24
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add sp, sp, a0
+; ZVFH-NEXT: .cfi_def_cfa sp, 16
+; ZVFH-NEXT: addi sp, sp, 16
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv32bf16_vv:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: sub sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v16
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v20
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v0, v0, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv32bf16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v0, v0, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v16, v16, v24
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x bfloat> @llvm.minnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
ret <vscale x 32 x bfloat> %v
}
define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vf(<vscale x 32 x bfloat> %a, bfloat %b) {
-; CHECK-LABEL: vfmin_nxv32bf16_vf:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: fmv.x.h a0, fa0
-; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12
-; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v0, v8, v0
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v8, v0
-; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v16, v24, v16
-; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfncvtbf16.f.f.w v12, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfmin_nxv32bf16_vf:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: addi sp, sp, -16
+; ZVFH-NEXT: .cfi_def_cfa_offset 16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: sub sp, sp, a0
+; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFH-NEXT: fmv.x.h a0, fa0
+; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFH-NEXT: addi a1, sp, 16
+; ZVFH-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v12
+; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vmv.v.x v8, a0
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT: addi a0, sp, 16
+; ZVFH-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmin.vv v0, v8, v0
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFH-NEXT: vfmin.vv v16, v24, v16
+; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFH-NEXT: csrr a0, vlenb
+; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: add sp, sp, a0
+; ZVFH-NEXT: .cfi_def_cfa sp, 16
+; ZVFH-NEXT: addi sp, sp, 16
+; ZVFH-NEXT: .cfi_def_cfa_offset 0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfmin_nxv32bf16_vf:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: sub sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFHMIN-NEXT: fmv.x.h a0, fa0
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v8
+; ZVFHMIN-NEXT: addi a1, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v12
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v8, a0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v0, v8
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v0, v8, v0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v0
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmin.vv v16, v24, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v12, v16
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add sp, sp, a0
+; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
+; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv32bf16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v16, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v12
+; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v16, v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v16, v0, v24
+; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
%v = call <vscale x 32 x bfloat> @llvm.minnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %splat)
@@ -268,6 +629,17 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vv(<vscale x 1 x half> %a, <vscale x 1
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x half> @llvm.minnum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b)
ret <vscale x 1 x half> %v
}
@@ -289,6 +661,17 @@ define <vscale x 1 x half> @vfmin_nxv1f16_vf(<vscale x 1 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv1f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v9, v9, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x half> @llvm.minnum.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %splat)
@@ -312,6 +695,17 @@ define <vscale x 2 x half> @vfmin_nxv2f16_vv(<vscale x 2 x half> %a, <vscale x 2
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv2f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x half> @llvm.minnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
ret <vscale x 2 x half> %v
}
@@ -333,6 +727,17 @@ define <vscale x 2 x half> @vfmin_nxv2f16_vf(<vscale x 2 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv2f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v9, v9, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x half> @llvm.minnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %splat)
@@ -356,6 +761,17 @@ define <vscale x 4 x half> @vfmin_nxv4f16_vv(<vscale x 4 x half> %a, <vscale x 4
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv4f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v10, v12, v10
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
ret <vscale x 4 x half> %v
}
@@ -377,6 +793,17 @@ define <vscale x 4 x half> @vfmin_nxv4f16_vf(<vscale x 4 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv4f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v10, v10, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %splat)
@@ -400,6 +827,17 @@ define <vscale x 8 x half> @vfmin_nxv8f16_vv(<vscale x 8 x half> %a, <vscale x 8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv8f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v12, v16, v12
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
ret <vscale x 8 x half> %v
}
@@ -421,6 +859,17 @@ define <vscale x 8 x half> @vfmin_nxv8f16_vf(<vscale x 8 x half> %a, half %b) {
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv8f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v12, v12, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %splat)
@@ -444,6 +893,17 @@ define <vscale x 16 x half> @vfmin_nxv16f16_vv(<vscale x 16 x half> %a, <vscale
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv16f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v16, v24, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x half> @llvm.minnum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
ret <vscale x 16 x half> %v
}
@@ -465,6 +925,17 @@ define <vscale x 16 x half> @vfmin_nxv16f16_vf(<vscale x 16 x half> %a, half %b)
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv16f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fcvt.s.h fa5, fa0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vf v16, v16, fa5
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x half> @llvm.minnum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %splat)
@@ -509,6 +980,38 @@ define <vscale x 32 x half> @vfmin_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv32f16_vv:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v0, v0, v8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v16, v16, v24
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x half> @llvm.minnum.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b)
ret <vscale x 32 x half> %v
}
@@ -556,6 +1059,43 @@ define <vscale x 32 x half> @vfmin_nxv32f16_vf(<vscale x 32 x half> %a, half %b)
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfmin_nxv32f16_vf:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: addi sp, sp, -16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: sub sp, sp, a0
+; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFBFA-NEXT: addi a1, sp, 16
+; ZVFBFA-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v8, a0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v8
+; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFBFA-NEXT: addi a0, sp, 16
+; ZVFBFA-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v0, v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v8, v0
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v16, v24, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
+; ZVFBFA-NEXT: csrr a0, vlenb
+; ZVFBFA-NEXT: slli a0, a0, 3
+; ZVFBFA-NEXT: add sp, sp, a0
+; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
+; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 32 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
%v = call <vscale x 32 x half> @llvm.minnum.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %splat)
>From ec23fa25be43a6bbb73afa6be8c77961600a7264 Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Thu, 11 Dec 2025 01:59:10 -0800
Subject: [PATCH 2/2] [RISCV][llvm] Support fminimum, fmaximum, fminnum,
fmaxnum, fminimumnum, fmaximumnum codegen for zvfbfa
This patch supports for both scalable vector and fixed-length vector.
It also enables fsetcc pattern match for zvfbfa to make fminimum and
fmaximum work correctly.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 18 +--
.../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 9 +-
.../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 23 ++-
.../RISCV/rvv/fixed-vectors-fmaximum.ll | 64 +++-----
.../RISCV/rvv/fixed-vectors-fmaximumnum.ll | 35 +----
.../RISCV/rvv/fixed-vectors-fminimum.ll | 64 +++-----
.../RISCV/rvv/fixed-vectors-fminimumnum.ll | 35 +----
.../CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll | 92 ++----------
.../CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll | 92 ++----------
.../test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll | 140 ++++--------------
.../CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll | 137 ++---------------
.../test/CodeGen/RISCV/rvv/fminimum-sdnode.ll | 140 ++++--------------
.../CodeGen/RISCV/rvv/fminimumnum-sdnode.ll | 137 ++---------------
llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll | 130 ++--------------
llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll | 130 ++--------------
15 files changed, 209 insertions(+), 1037 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 2c0a02ae396c7..9cd6817c1b7a7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -91,8 +91,10 @@ static cl::opt<bool>
static const unsigned ZvfbfaVPOps[] = {
ISD::VP_FNEG, ISD::VP_FABS, ISD::VP_FCOPYSIGN};
static const unsigned ZvfbfaOps[] = {
- ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::SPLAT_VECTOR,
- ISD::FADD, ISD::FSUB, ISD::FMUL};
+ ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::SPLAT_VECTOR,
+ ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FMINNUM,
+ ISD::FMAXNUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUM,
+ ISD::FMAXIMUM};
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
@@ -1087,11 +1089,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VECREDUCE_FMAXIMUM};
// TODO: Make more of these ops legal.
- static const unsigned ZvfbfaPromoteOps[] = {ISD::FMINNUM,
- ISD::FMAXNUM,
- ISD::FMINIMUMNUM,
- ISD::FMAXIMUMNUM,
- ISD::FDIV,
+ static const unsigned ZvfbfaPromoteOps[] = {ISD::FDIV,
ISD::FMA,
ISD::FSQRT,
ISD::FCEIL,
@@ -1103,8 +1101,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::FNEARBYINT,
ISD::IS_FPCLASS,
ISD::SETCC,
- ISD::FMAXIMUM,
- ISD::FMINIMUM,
ISD::STRICT_FADD,
ISD::STRICT_FSUB,
ISD::STRICT_FMUL,
@@ -1297,6 +1293,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VECTOR_INTERLEAVE, ISD::VECTOR_REVERSE,
ISD::VECTOR_SPLICE, ISD::VECTOR_COMPRESS},
VT, Custom);
+ setOperationAction(
+ {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, VT,
+ Legal);
+ setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 14ad7ca0eb35a..b3cc33d31761d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -201,12 +201,15 @@ class VPatBinarySDNode_VF_RM<SDPatternOperator vop,
multiclass VPatBinaryFPSDNode_VV_VF<SDPatternOperator vop, string instruction_name,
bit isSEWAware = 0> {
- foreach vti = AllFloatVectors in {
+ foreach vti = AllFloatAndBF16Vectors in {
let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : VPatBinarySDNode_VV<vop, instruction_name,
+ def : VPatBinarySDNode_VV<vop, instruction_name #
+ !if(!eq(vti.Scalar, bf16), "_ALT", ""),
vti.Vector, vti.Vector, vti.Log2SEW,
vti.LMul, vti.AVL, vti.RegClass, isSEWAware>;
- def : VPatBinarySDNode_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
+ def : VPatBinarySDNode_VF<vop, instruction_name#
+ !if(!eq(vti.Scalar, bf16), "_ALT", "")#
+ "_V"#vti.ScalarSuffix,
vti.Vector, vti.Vector, vti.Scalar,
vti.Log2SEW, vti.LMul, vti.AVL, vti.RegClass,
vti.ScalarRegClass, isSEWAware>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 9273ce094eb0a..4c41667560a98 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -1043,13 +1043,16 @@ class VPatBinaryVL_VF_RM<SDPatternOperator vop,
multiclass VPatBinaryFPVL_VV_VF<SDPatternOperator vop, string instruction_name,
bit isSEWAware = 0> {
- foreach vti = AllFloatVectors in {
+ foreach vti = AllFloatAndBF16Vectors in {
let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : VPatBinaryVL_V<vop, instruction_name, "VV",
+ def : VPatBinaryVL_V<vop, instruction_name#
+ !if(!eq(vti.Scalar, bf16), "_ALT", ""), "VV",
vti.Vector, vti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
vti.RegClass, isSEWAware>;
- def : VPatBinaryVL_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
+ def : VPatBinaryVL_VF<vop, instruction_name#
+ !if(!eq(vti.Scalar, bf16), "_ALT", "")#"_V"#
+ vti.ScalarSuffix,
vti.Vector, vti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
vti.ScalarRegClass, isSEWAware>;
@@ -1199,7 +1202,7 @@ multiclass VPatIntegerSetCCVL_VI_Swappable<VTypeInfo vti, string instruction_nam
multiclass VPatFPSetCCVL_VV_VF_FV<SDPatternOperator vop, CondCode cc,
string inst_name,
string swapped_op_inst_name> {
- foreach fvti = AllFloatVectors in {
+ foreach fvti = AllFloatAndBF16Vectors in {
let Predicates = GetVTypePredicates<fvti>.Predicates in {
def : Pat<(fvti.Mask (vop (fvti.Vector fvti.RegClass:$rs1),
fvti.RegClass:$rs2,
@@ -1207,7 +1210,9 @@ multiclass VPatFPSetCCVL_VV_VF_FV<SDPatternOperator vop, CondCode cc,
VR:$passthru,
(fvti.Mask VMV0:$vm),
VLOpFrag)),
- (!cast<Instruction>(inst_name#"_VV_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>(inst_name#
+ !if(!eq(fvti.Scalar, bf16), "_ALT", "")#
+ "_VV_"#fvti.LMul.MX#"_MASK")
VR:$passthru, fvti.RegClass:$rs1,
fvti.RegClass:$rs2, (fvti.Mask VMV0:$vm),
GPR:$vl, fvti.Log2SEW, TA_MU)>;
@@ -1217,7 +1222,9 @@ multiclass VPatFPSetCCVL_VV_VF_FV<SDPatternOperator vop, CondCode cc,
VR:$passthru,
(fvti.Mask VMV0:$vm),
VLOpFrag)),
- (!cast<Instruction>(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>(inst_name#
+ !if(!eq(fvti.Scalar, bf16), "_ALT", "")#
+ "_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
VR:$passthru, fvti.RegClass:$rs1,
fvti.ScalarRegClass:$rs2, (fvti.Mask VMV0:$vm),
GPR:$vl, fvti.Log2SEW, TA_MU)>;
@@ -1227,7 +1234,9 @@ multiclass VPatFPSetCCVL_VV_VF_FV<SDPatternOperator vop, CondCode cc,
VR:$passthru,
(fvti.Mask VMV0:$vm),
VLOpFrag)),
- (!cast<Instruction>(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>(swapped_op_inst_name#
+ !if(!eq(fvti.Scalar, bf16), "_ALT", "")#
+ "_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
VR:$passthru, fvti.RegClass:$rs1,
fvti.ScalarRegClass:$rs2, (fvti.Mask VMV0:$vm),
GPR:$vl, fvti.Log2SEW, TA_MU)>;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
index 299361374f2f4..c3d6b1fac50dd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
@@ -50,18 +50,11 @@ define <2 x bfloat> @vfmax_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) {
; ZVFBFA-LABEL: vfmax_v2bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
-; ZVFBFA-NEXT: vfmax.vv v9, v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%v = call <2 x bfloat> @llvm.maximum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
ret <2 x bfloat> %v
@@ -105,18 +98,11 @@ define <4 x bfloat> @vfmax_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) {
; ZVFBFA-LABEL: vfmax_v4bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
-; ZVFBFA-NEXT: vfmax.vv v9, v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%v = call <4 x bfloat> @llvm.maximum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
ret <4 x bfloat> %v
@@ -160,18 +146,11 @@ define <8 x bfloat> @vfmax_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) {
; ZVFBFA-LABEL: vfmax_v8bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vmerge.vvm v8, v10, v12, v0
-; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
-; ZVFBFA-NEXT: vmerge.vvm v10, v12, v10, v0
-; ZVFBFA-NEXT: vfmax.vv v10, v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%v = call <8 x bfloat> @llvm.maximum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
ret <8 x bfloat> %v
@@ -215,18 +194,11 @@ define <16 x bfloat> @vfmax_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) {
; ZVFBFA-LABEL: vfmax_v16bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vmerge.vvm v8, v12, v16, v0
-; ZVFBFA-NEXT: vmfeq.vv v0, v16, v16
-; ZVFBFA-NEXT: vmerge.vvm v12, v16, v12, v0
-; ZVFBFA-NEXT: vfmax.vv v12, v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v12, v8, v10, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vmerge.vvm v8, v10, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v12
; ZVFBFA-NEXT: ret
%v = call <16 x bfloat> @llvm.maximum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
ret <16 x bfloat> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll
index 1a5e5af61b8ef..52c26900de5e5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximumnum.ll
@@ -377,12 +377,7 @@ define <2 x bfloat> @max_v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) {
; ZVFBFA-LABEL: max_v2bf16:
; ZVFBFA: # %bb.0: # %entry
; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v9
; ZVFBFA-NEXT: ret
entry:
%c = call <2 x bfloat> @llvm.maximumnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
@@ -563,12 +558,7 @@ define <4 x bfloat> @max_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) {
; ZVFBFA-LABEL: max_v4bf16:
; ZVFBFA: # %bb.0: # %entry
; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v9
; ZVFBFA-NEXT: ret
entry:
%c = call <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
@@ -869,12 +859,7 @@ define <8 x bfloat> @max_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) {
; ZVFBFA-LABEL: max_v8bf16:
; ZVFBFA: # %bb.0: # %entry
; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v10, v12, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v9
; ZVFBFA-NEXT: ret
entry:
%c = call <8 x bfloat> @llvm.maximumnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
@@ -1239,12 +1224,7 @@ define <9 x bfloat> @max_v9bf16(<9 x bfloat> %a, <9 x bfloat> %b) {
; ZVFBFA-LABEL: max_v9bf16:
; ZVFBFA: # %bb.0: # %entry
; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v12, v16, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v10
; ZVFBFA-NEXT: ret
entry:
%c = call <9 x bfloat> @llvm.maximumnum.v9bf16(<9 x bfloat> %a, <9 x bfloat> %b)
@@ -1725,12 +1705,7 @@ define <16 x bfloat> @max_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; ZVFBFA-LABEL: max_v16bf16:
; ZVFBFA: # %bb.0: # %entry
; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v12, v16, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v10
; ZVFBFA-NEXT: ret
entry:
%c = call <16 x bfloat> @llvm.maximumnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
index bcd86e5237918..9a25beeec8c72 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
@@ -50,18 +50,11 @@ define <2 x bfloat> @vfmin_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) {
; ZVFBFA-LABEL: vfmin_v2bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
-; ZVFBFA-NEXT: vfmin.vv v9, v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%v = call <2 x bfloat> @llvm.minimum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
ret <2 x bfloat> %v
@@ -105,18 +98,11 @@ define <4 x bfloat> @vfmin_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) {
; ZVFBFA-LABEL: vfmin_v4bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
-; ZVFBFA-NEXT: vfmin.vv v9, v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%v = call <4 x bfloat> @llvm.minimum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
ret <4 x bfloat> %v
@@ -160,18 +146,11 @@ define <8 x bfloat> @vfmin_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) {
; ZVFBFA-LABEL: vfmin_v8bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vmerge.vvm v8, v10, v12, v0
-; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
-; ZVFBFA-NEXT: vmerge.vvm v10, v12, v10, v0
-; ZVFBFA-NEXT: vfmin.vv v10, v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%v = call <8 x bfloat> @llvm.minimum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
ret <8 x bfloat> %v
@@ -215,18 +194,11 @@ define <16 x bfloat> @vfmin_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) {
; ZVFBFA-LABEL: vfmin_v16bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vmerge.vvm v8, v12, v16, v0
-; ZVFBFA-NEXT: vmfeq.vv v0, v16, v16
-; ZVFBFA-NEXT: vmerge.vvm v12, v16, v12, v0
-; ZVFBFA-NEXT: vfmin.vv v12, v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v12, v8, v10, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vmerge.vvm v8, v10, v8, v0
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v12
; ZVFBFA-NEXT: ret
%v = call <16 x bfloat> @llvm.minimum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
ret <16 x bfloat> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll
index eed806da19601..39f557b767410 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimumnum.ll
@@ -377,12 +377,7 @@ define <2 x bfloat> @min_v2bf16(<2 x bfloat> %a, <2 x bfloat> %b) {
; ZVFBFA-LABEL: min_v2bf16:
; ZVFBFA: # %bb.0: # %entry
; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v9
; ZVFBFA-NEXT: ret
entry:
%c = call <2 x bfloat> @llvm.minimumnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
@@ -563,12 +558,7 @@ define <4 x bfloat> @min_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) {
; ZVFBFA-LABEL: min_v4bf16:
; ZVFBFA: # %bb.0: # %entry
; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v9
; ZVFBFA-NEXT: ret
entry:
%c = call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
@@ -869,12 +859,7 @@ define <8 x bfloat> @min_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) {
; ZVFBFA-LABEL: min_v8bf16:
; ZVFBFA: # %bb.0: # %entry
; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v10, v12, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v9
; ZVFBFA-NEXT: ret
entry:
%c = call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
@@ -1239,12 +1224,7 @@ define <9 x bfloat> @min_v9bf16(<9 x bfloat> %a, <9 x bfloat> %b) {
; ZVFBFA-LABEL: min_v9bf16:
; ZVFBFA: # %bb.0: # %entry
; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v12, v16, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v10
; ZVFBFA-NEXT: ret
entry:
%c = call <9 x bfloat> @llvm.minimumnum.v9bf16(<9 x bfloat> %a, <9 x bfloat> %b)
@@ -1725,12 +1705,7 @@ define <16 x bfloat> @min_v16bf16(<16 x bfloat> %a, <16 x bfloat> %b) {
; ZVFBFA-LABEL: min_v16bf16:
; ZVFBFA: # %bb.0: # %entry
; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v12, v16, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v10
; ZVFBFA-NEXT: ret
entry:
%c = call <16 x bfloat> @llvm.minimumnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
index 179d88cecc7f8..bdf78211988b6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
@@ -38,12 +38,7 @@ define <2 x bfloat> @vfmax_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) {
; ZVFBFA-LABEL: vfmax_v2bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%v = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
ret <2 x bfloat> %v
@@ -79,13 +74,7 @@ define <2 x bfloat> @vfmax_v2bf16_vf(<2 x bfloat> %a, bfloat %b) {
; ZVFBFA-LABEL: vfmax_v2bf16_vf:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v9, fa0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v9, v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
@@ -123,13 +112,7 @@ define <2 x bfloat> @vfmax_v2bf16_fv(<2 x bfloat> %a, bfloat %b) {
; ZVFBFA-LABEL: vfmax_v2bf16_fv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v9, fa0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v9, v8, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
@@ -163,12 +146,7 @@ define <4 x bfloat> @vfmax_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) {
; ZVFBFA-LABEL: vfmax_v4bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%v = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
ret <4 x bfloat> %v
@@ -204,13 +182,7 @@ define <4 x bfloat> @vfmax_v4bf16_vf(<4 x bfloat> %a, bfloat %b) {
; ZVFBFA-LABEL: vfmax_v4bf16_vf:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v9, fa0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v9, v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
@@ -248,13 +220,7 @@ define <4 x bfloat> @vfmax_v4bf16_fv(<4 x bfloat> %a, bfloat %b) {
; ZVFBFA-LABEL: vfmax_v4bf16_fv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v9, fa0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v9, v8, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
@@ -288,12 +254,7 @@ define <8 x bfloat> @vfmax_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) {
; ZVFBFA-LABEL: vfmax_v8bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v10, v12, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%v = call <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
ret <8 x bfloat> %v
@@ -329,13 +290,7 @@ define <8 x bfloat> @vfmax_v8bf16_vf(<8 x bfloat> %a, bfloat %b) {
; ZVFBFA-LABEL: vfmax_v8bf16_vf:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v12, fa0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v10, v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
@@ -373,13 +328,7 @@ define <8 x bfloat> @vfmax_v8bf16_fv(<8 x bfloat> %a, bfloat %b) {
; ZVFBFA-LABEL: vfmax_v8bf16_fv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v12, fa0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v10, v8, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
@@ -413,12 +362,7 @@ define <16 x bfloat> @vfmax_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) {
; ZVFBFA-LABEL: vfmax_v16bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v12, v16, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%v = call <16 x bfloat> @llvm.maxnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
ret <16 x bfloat> %v
@@ -454,13 +398,7 @@ define <16 x bfloat> @vfmax_v16bf16_vf(<16 x bfloat> %a, bfloat %b) {
; ZVFBFA-LABEL: vfmax_v16bf16_vf:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v16, fa0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v12, v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
@@ -498,13 +436,7 @@ define <16 x bfloat> @vfmax_v16bf16_fv(<16 x bfloat> %a, bfloat %b) {
; ZVFBFA-LABEL: vfmax_v16bf16_fv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v16, fa0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v12, v8, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
index 2228292d9e66b..3283dd35abf82 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
@@ -38,12 +38,7 @@ define <2 x bfloat> @vfmin_v2bf16_vv(<2 x bfloat> %a, <2 x bfloat> %b) {
; ZVFBFA-LABEL: vfmin_v2bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%v = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> %a, <2 x bfloat> %b)
ret <2 x bfloat> %v
@@ -79,13 +74,7 @@ define <2 x bfloat> @vfmin_v2bf16_vf(<2 x bfloat> %a, bfloat %b) {
; ZVFBFA-LABEL: vfmin_v2bf16_vf:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v9, fa0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v9, v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
@@ -123,13 +112,7 @@ define <2 x bfloat> @vfmin_v2bf16_fv(<2 x bfloat> %a, bfloat %b) {
; ZVFBFA-LABEL: vfmin_v2bf16_fv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v9, fa0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v9, v8, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <2 x bfloat> %head, <2 x bfloat> poison, <2 x i32> zeroinitializer
@@ -163,12 +146,7 @@ define <4 x bfloat> @vfmin_v4bf16_vv(<4 x bfloat> %a, <4 x bfloat> %b) {
; ZVFBFA-LABEL: vfmin_v4bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%v = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
ret <4 x bfloat> %v
@@ -204,13 +182,7 @@ define <4 x bfloat> @vfmin_v4bf16_vf(<4 x bfloat> %a, bfloat %b) {
; ZVFBFA-LABEL: vfmin_v4bf16_vf:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v9, fa0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v9, v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
@@ -248,13 +220,7 @@ define <4 x bfloat> @vfmin_v4bf16_fv(<4 x bfloat> %a, bfloat %b) {
; ZVFBFA-LABEL: vfmin_v4bf16_fv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v9, fa0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v9, v8, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <4 x bfloat> %head, <4 x bfloat> poison, <4 x i32> zeroinitializer
@@ -288,12 +254,7 @@ define <8 x bfloat> @vfmin_v8bf16_vv(<8 x bfloat> %a, <8 x bfloat> %b) {
; ZVFBFA-LABEL: vfmin_v8bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v10, v12, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%v = call <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
ret <8 x bfloat> %v
@@ -329,13 +290,7 @@ define <8 x bfloat> @vfmin_v8bf16_vf(<8 x bfloat> %a, bfloat %b) {
; ZVFBFA-LABEL: vfmin_v8bf16_vf:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v12, fa0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v10, v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
@@ -373,13 +328,7 @@ define <8 x bfloat> @vfmin_v8bf16_fv(<8 x bfloat> %a, bfloat %b) {
; ZVFBFA-LABEL: vfmin_v8bf16_fv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v12, fa0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v10, v8, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <8 x bfloat> %head, <8 x bfloat> poison, <8 x i32> zeroinitializer
@@ -413,12 +362,7 @@ define <16 x bfloat> @vfmin_v16bf16_vv(<16 x bfloat> %a, <16 x bfloat> %b) {
; ZVFBFA-LABEL: vfmin_v16bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v12, v16, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%v = call <16 x bfloat> @llvm.minnum.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b)
ret <16 x bfloat> %v
@@ -454,13 +398,7 @@ define <16 x bfloat> @vfmin_v16bf16_vf(<16 x bfloat> %a, bfloat %b) {
; ZVFBFA-LABEL: vfmin_v16bf16_vf:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v16, fa0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v12, v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
@@ -498,13 +436,7 @@ define <16 x bfloat> @vfmin_v16bf16_fv(<16 x bfloat> %a, bfloat %b) {
; ZVFBFA-LABEL: vfmin_v16bf16_fv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v16, fa0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v12, v8, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <16 x bfloat> %head, <16 x bfloat> poison, <16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
index 9a3d27becd644..dcf6dee8f736c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
@@ -54,18 +54,11 @@ define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscal
; ZVFBFA-LABEL: vfmax_nxv1bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
-; ZVFBFA-NEXT: vfmax.vv v9, v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x bfloat> @llvm.maximum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
ret <vscale x 1 x bfloat> %v
@@ -109,18 +102,11 @@ define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscal
; ZVFBFA-LABEL: vfmax_nxv2bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
-; ZVFBFA-NEXT: vfmax.vv v9, v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x bfloat> @llvm.maximum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
ret <vscale x 2 x bfloat> %v
@@ -164,18 +150,11 @@ define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscal
; ZVFBFA-LABEL: vfmax_nxv4bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vmerge.vvm v8, v10, v12, v0
-; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
-; ZVFBFA-NEXT: vmerge.vvm v10, v12, v10, v0
-; ZVFBFA-NEXT: vfmax.vv v10, v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x bfloat> @llvm.maximum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
ret <vscale x 4 x bfloat> %v
@@ -219,18 +198,11 @@ define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscal
; ZVFBFA-LABEL: vfmax_nxv8bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vmerge.vvm v8, v12, v16, v0
-; ZVFBFA-NEXT: vmfeq.vv v0, v16, v16
-; ZVFBFA-NEXT: vmerge.vvm v12, v16, v12, v0
-; ZVFBFA-NEXT: vfmax.vv v12, v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v12, v8, v10, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vmerge.vvm v8, v10, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v12
; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x bfloat> @llvm.maximum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
ret <vscale x 8 x bfloat> %v
@@ -272,17 +244,11 @@ define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vs
; ZVFBFA-LABEL: vfmax_nxv16bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v24, v24
-; ZVFBFA-NEXT: vmfeq.vv v7, v16, v16
-; ZVFBFA-NEXT: vmerge.vvm v8, v24, v16, v0
-; ZVFBFA-NEXT: vmv1r.v v0, v7
-; ZVFBFA-NEXT: vmerge.vvm v16, v16, v24, v0
-; ZVFBFA-NEXT: vfmax.vv v16, v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v16, v8, v12, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vmerge.vvm v8, v12, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v16
; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x bfloat> @llvm.maximum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
ret <vscale x 16 x bfloat> %v
@@ -417,65 +383,13 @@ define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vs
;
; ZVFBFA-LABEL: vfmax_nxv32bf16_vv:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: addi sp, sp, -16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 4
-; ZVFBFA-NEXT: sub sp, sp, a0
-; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vmv8r.v v0, v16
-; ZVFBFA-NEXT: vmv8r.v v24, v8
-; ZVFBFA-NEXT: addi a0, sp, 16
-; ZVFBFA-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v24
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT: vmfeq.vv v3, v16, v16
+; ZVFBFA-NEXT: vmfeq.vv v7, v16, v16
; ZVFBFA-NEXT: vmerge.vvm v24, v8, v16, v0
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: add a0, sp, a0
-; ZVFBFA-NEXT: addi a0, a0, 16
-; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT: vmv1r.v v0, v3
-; ZVFBFA-NEXT: vmerge.vvm v8, v16, v8, v0
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v4
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: add a0, sp, a0
-; ZVFBFA-NEXT: addi a0, a0, 16
-; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v8, v8, v16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: add a0, sp, a0
-; ZVFBFA-NEXT: addi a0, a0, 16
-; ZVFBFA-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT: addi a0, sp, 16
-; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v20
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT: vmfeq.vv v7, v24, v24
-; ZVFBFA-NEXT: vmerge.vvm v16, v8, v24, v0
; ZVFBFA-NEXT: vmv1r.v v0, v7
-; ZVFBFA-NEXT: vmerge.vvm v8, v24, v8, v0
-; ZVFBFA-NEXT: vfmax.vv v16, v8, v16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: add a0, sp, a0
-; ZVFBFA-NEXT: addi a0, a0, 16
-; ZVFBFA-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v24
-; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 4
-; ZVFBFA-NEXT: add sp, sp, a0
-; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: vmerge.vvm v8, v16, v8, v0
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v24
; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x bfloat> @llvm.maximum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
ret <vscale x 32 x bfloat> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll
index 20f4a4d939ce1..ed9ce8cefd538 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximumnum-sdnode.ll
@@ -44,12 +44,7 @@ define <vscale x 1 x bfloat> @vfmax_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
; ZVFBFA-LABEL: vfmax_vv_nxv1bf16:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%vc = call <vscale x 1 x bfloat> @llvm.maximumnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb)
ret <vscale x 1 x bfloat> %vc
@@ -80,13 +75,8 @@ define <vscale x 1 x bfloat> @vfmax_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
;
; ZVFBFA-LABEL: vfmax_vf_nxv1bf16:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vfmax.vf v9, v9, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -120,12 +110,7 @@ define <vscale x 2 x bfloat> @vfmax_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
; ZVFBFA-LABEL: vfmax_vv_nxv2bf16:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%vc = call <vscale x 2 x bfloat> @llvm.maximumnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb)
ret <vscale x 2 x bfloat> %vc
@@ -156,13 +141,8 @@ define <vscale x 2 x bfloat> @vfmax_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
;
; ZVFBFA-LABEL: vfmax_vf_nxv2bf16:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vfmax.vf v9, v9, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -196,12 +176,7 @@ define <vscale x 4 x bfloat> @vfmax_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
; ZVFBFA-LABEL: vfmax_vv_nxv4bf16:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v10, v12, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%vc = call <vscale x 4 x bfloat> @llvm.maximumnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb)
ret <vscale x 4 x bfloat> %vc
@@ -232,13 +207,8 @@ define <vscale x 4 x bfloat> @vfmax_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
;
; ZVFBFA-LABEL: vfmax_vf_nxv4bf16:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vfmax.vf v10, v10, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -272,12 +242,7 @@ define <vscale x 8 x bfloat> @vfmax_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
; ZVFBFA-LABEL: vfmax_vv_nxv8bf16:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v12, v16, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%vc = call <vscale x 8 x bfloat> @llvm.maximumnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb)
ret <vscale x 8 x bfloat> %vc
@@ -308,13 +273,8 @@ define <vscale x 8 x bfloat> @vfmax_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
;
; ZVFBFA-LABEL: vfmax_vf_nxv8bf16:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmax.vf v12, v12, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -347,13 +307,8 @@ define <vscale x 8 x bfloat> @vfmax_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
;
; ZVFBFA-LABEL: vfmax_fv_nxv8bf16:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmax.vf v12, v12, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -387,12 +342,7 @@ define <vscale x 16 x bfloat> @vfmax_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
; ZVFBFA-LABEL: vfmax_vv_nxv16bf16:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v16, v24, v16
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v12
; ZVFBFA-NEXT: ret
%vc = call <vscale x 16 x bfloat> @llvm.maximumnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb)
ret <vscale x 16 x bfloat> %vc
@@ -423,13 +373,8 @@ define <vscale x 16 x bfloat> @vfmax_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
;
; ZVFBFA-LABEL: vfmax_vf_nxv16bf16:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmax.vf v16, v16, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -504,34 +449,8 @@ define <vscale x 32 x bfloat> @vfmax_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
;
; ZVFBFA-LABEL: vfmax_vv_nxv32bf16:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: addi sp, sp, -16
-; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: sub sp, sp, a0
-; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFBFA-NEXT: addi a0, sp, 16
-; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v0, v0, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v0
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v16, v16, v24
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: add sp, sp, a0
-; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
-; ZVFBFA-NEXT: addi sp, sp, 16
-; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v16
; ZVFBFA-NEXT: ret
%vc = call <vscale x 32 x bfloat> @llvm.maximumnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb)
ret <vscale x 32 x bfloat> %vc
@@ -614,36 +533,8 @@ define <vscale x 32 x bfloat> @vfmax_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
;
; ZVFBFA-LABEL: vfmax_vf_nxv32bf16:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: addi sp, sp, -16
-; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: sub sp, sp, a0
-; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v16, fa0
-; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT: addi a0, sp, 16
-; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v12
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v16, v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v16, v0, v24
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: add sp, sp, a0
-; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
-; ZVFBFA-NEXT: addi sp, sp, 16
-; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
index bb0025d85ab1d..8473edc0ce003 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
@@ -54,18 +54,11 @@ define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscal
; ZVFBFA-LABEL: vfmin_nxv1bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
-; ZVFBFA-NEXT: vfmin.vv v9, v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x bfloat> @llvm.minimum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
ret <vscale x 1 x bfloat> %v
@@ -109,18 +102,11 @@ define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscal
; ZVFBFA-LABEL: vfmin_nxv2bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vmerge.vvm v9, v10, v8, v0
; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT: vmerge.vvm v8, v8, v10, v0
-; ZVFBFA-NEXT: vfmin.vv v9, v8, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x bfloat> @llvm.minimum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
ret <vscale x 2 x bfloat> %v
@@ -164,18 +150,11 @@ define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscal
; ZVFBFA-LABEL: vfmin_nxv4bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v9
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vmerge.vvm v8, v10, v12, v0
-; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
-; ZVFBFA-NEXT: vmerge.vvm v10, v12, v10, v0
-; ZVFBFA-NEXT: vfmin.vv v10, v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v10, v8, v9, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v9, v9
+; ZVFBFA-NEXT: vmerge.vvm v8, v9, v8, v0
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x bfloat> @llvm.minimum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
ret <vscale x 4 x bfloat> %v
@@ -219,18 +198,11 @@ define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscal
; ZVFBFA-LABEL: vfmin_nxv8bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vmerge.vvm v8, v12, v16, v0
-; ZVFBFA-NEXT: vmfeq.vv v0, v16, v16
-; ZVFBFA-NEXT: vmerge.vvm v12, v16, v12, v0
-; ZVFBFA-NEXT: vfmin.vv v12, v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v12, v8, v10, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v10, v10
+; ZVFBFA-NEXT: vmerge.vvm v8, v10, v8, v0
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v12
; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x bfloat> @llvm.minimum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
ret <vscale x 8 x bfloat> %v
@@ -272,17 +244,11 @@ define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vs
; ZVFBFA-LABEL: vfmin_nxv16bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v24, v24
-; ZVFBFA-NEXT: vmfeq.vv v7, v16, v16
-; ZVFBFA-NEXT: vmerge.vvm v8, v24, v16, v0
-; ZVFBFA-NEXT: vmv1r.v v0, v7
-; ZVFBFA-NEXT: vmerge.vvm v16, v16, v24, v0
-; ZVFBFA-NEXT: vfmin.vv v16, v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
+; ZVFBFA-NEXT: vmerge.vvm v16, v8, v12, v0
+; ZVFBFA-NEXT: vmfeq.vv v0, v12, v12
+; ZVFBFA-NEXT: vmerge.vvm v8, v12, v8, v0
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v16
; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x bfloat> @llvm.minimum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
ret <vscale x 16 x bfloat> %v
@@ -417,65 +383,13 @@ define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vs
;
; ZVFBFA-LABEL: vfmin_nxv32bf16_vv:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: addi sp, sp, -16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 4
-; ZVFBFA-NEXT: sub sp, sp, a0
-; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vmv8r.v v0, v16
-; ZVFBFA-NEXT: vmv8r.v v24, v8
-; ZVFBFA-NEXT: addi a0, sp, 16
-; ZVFBFA-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v0
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v24
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT: vmfeq.vv v3, v16, v16
+; ZVFBFA-NEXT: vmfeq.vv v7, v16, v16
; ZVFBFA-NEXT: vmerge.vvm v24, v8, v16, v0
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: add a0, sp, a0
-; ZVFBFA-NEXT: addi a0, a0, 16
-; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT: vmv1r.v v0, v3
-; ZVFBFA-NEXT: vmerge.vvm v8, v16, v8, v0
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v4
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: add a0, sp, a0
-; ZVFBFA-NEXT: addi a0, a0, 16
-; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v8, v8, v16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: add a0, sp, a0
-; ZVFBFA-NEXT: addi a0, a0, 16
-; ZVFBFA-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT: addi a0, sp, 16
-; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v20
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vmfeq.vv v0, v8, v8
-; ZVFBFA-NEXT: vmfeq.vv v7, v24, v24
-; ZVFBFA-NEXT: vmerge.vvm v16, v8, v24, v0
; ZVFBFA-NEXT: vmv1r.v v0, v7
-; ZVFBFA-NEXT: vmerge.vvm v8, v24, v8, v0
-; ZVFBFA-NEXT: vfmin.vv v16, v8, v16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: add a0, sp, a0
-; ZVFBFA-NEXT: addi a0, a0, 16
-; ZVFBFA-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v24
-; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 4
-; ZVFBFA-NEXT: add sp, sp, a0
-; ZVFBFA-NEXT: addi sp, sp, 16
+; ZVFBFA-NEXT: vmerge.vvm v8, v16, v8, v0
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v24
; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x bfloat> @llvm.minimum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
ret <vscale x 32 x bfloat> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll
index f2106256c7937..8360d5787d9b3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimumnum-sdnode.ll
@@ -44,12 +44,7 @@ define <vscale x 1 x bfloat> @vfmin_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vsca
; ZVFBFA-LABEL: vfmin_vv_nxv1bf16:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%vc = call <vscale x 1 x bfloat> @llvm.minimumnum.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb)
ret <vscale x 1 x bfloat> %vc
@@ -80,13 +75,8 @@ define <vscale x 1 x bfloat> @vfmin_vf_nxv1bf16(<vscale x 1 x bfloat> %va, bfloa
;
; ZVFBFA-LABEL: vfmin_vf_nxv1bf16:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vfmin.vf v9, v9, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -120,12 +110,7 @@ define <vscale x 2 x bfloat> @vfmin_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vsca
; ZVFBFA-LABEL: vfmin_vv_nxv2bf16:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%vc = call <vscale x 2 x bfloat> @llvm.minimumnum.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb)
ret <vscale x 2 x bfloat> %vc
@@ -156,13 +141,8 @@ define <vscale x 2 x bfloat> @vfmin_vf_nxv2bf16(<vscale x 2 x bfloat> %va, bfloa
;
; ZVFBFA-LABEL: vfmin_vf_nxv2bf16:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vfmin.vf v9, v9, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -196,12 +176,7 @@ define <vscale x 4 x bfloat> @vfmin_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vsca
; ZVFBFA-LABEL: vfmin_vv_nxv4bf16:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v10, v12, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%vc = call <vscale x 4 x bfloat> @llvm.minimumnum.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb)
ret <vscale x 4 x bfloat> %vc
@@ -232,13 +207,8 @@ define <vscale x 4 x bfloat> @vfmin_vf_nxv4bf16(<vscale x 4 x bfloat> %va, bfloa
;
; ZVFBFA-LABEL: vfmin_vf_nxv4bf16:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vfmin.vf v10, v10, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -272,12 +242,7 @@ define <vscale x 8 x bfloat> @vfmin_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vsca
; ZVFBFA-LABEL: vfmin_vv_nxv8bf16:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v12, v16, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%vc = call <vscale x 8 x bfloat> @llvm.minimumnum.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb)
ret <vscale x 8 x bfloat> %vc
@@ -308,13 +273,8 @@ define <vscale x 8 x bfloat> @vfmin_vf_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
;
; ZVFBFA-LABEL: vfmin_vf_nxv8bf16:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmin.vf v12, v12, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -347,13 +307,8 @@ define <vscale x 8 x bfloat> @vfmin_fv_nxv8bf16(<vscale x 8 x bfloat> %va, bfloa
;
; ZVFBFA-LABEL: vfmin_fv_nxv8bf16:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmin.vf v12, v12, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -387,12 +342,7 @@ define <vscale x 16 x bfloat> @vfmin_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <v
; ZVFBFA-LABEL: vfmin_vv_nxv16bf16:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v16, v24, v16
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v12
; ZVFBFA-NEXT: ret
%vc = call <vscale x 16 x bfloat> @llvm.minimumnum.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb)
ret <vscale x 16 x bfloat> %vc
@@ -423,13 +373,8 @@ define <vscale x 16 x bfloat> @vfmin_vf_nxv16bf16(<vscale x 16 x bfloat> %va, bf
;
; ZVFBFA-LABEL: vfmin_vf_nxv16bf16:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmin.vf v16, v16, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -504,34 +449,8 @@ define <vscale x 32 x bfloat> @vfmin_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <v
;
; ZVFBFA-LABEL: vfmin_vv_nxv32bf16:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: addi sp, sp, -16
-; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: sub sp, sp, a0
-; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFBFA-NEXT: addi a0, sp, 16
-; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v0, v0, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v0
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v16, v16, v24
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: add sp, sp, a0
-; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
-; ZVFBFA-NEXT: addi sp, sp, 16
-; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v16
; ZVFBFA-NEXT: ret
%vc = call <vscale x 32 x bfloat> @llvm.minimumnum.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb)
ret <vscale x 32 x bfloat> %vc
@@ -614,36 +533,8 @@ define <vscale x 32 x bfloat> @vfmin_vf_nxv32bf16(<vscale x 32 x bfloat> %va, bf
;
; ZVFBFA-LABEL: vfmin_vf_nxv32bf16:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: addi sp, sp, -16
-; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: sub sp, sp, a0
-; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v16, fa0
-; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT: addi a0, sp, 16
-; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v12
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v16, v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v16, v0, v24
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: add sp, sp, a0
-; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
-; ZVFBFA-NEXT: addi sp, sp, 16
-; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
index e228f8459f108..f0ca715f925c1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
@@ -44,12 +44,7 @@ define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscal
; ZVFBFA-LABEL: vfmax_nxv1bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x bfloat> @llvm.maxnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
ret <vscale x 1 x bfloat> %v
@@ -80,13 +75,8 @@ define <vscale x 1 x bfloat> @vfmax_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat
;
; ZVFBFA-LABEL: vfmax_nxv1bf16_vf:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vfmax.vf v9, v9, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -120,12 +110,7 @@ define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscal
; ZVFBFA-LABEL: vfmax_nxv2bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v9, v9, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x bfloat> @llvm.maxnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
ret <vscale x 2 x bfloat> %v
@@ -156,13 +141,8 @@ define <vscale x 2 x bfloat> @vfmax_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat
;
; ZVFBFA-LABEL: vfmax_nxv2bf16_vf:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vfmax.vf v9, v9, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -196,12 +176,7 @@ define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscal
; ZVFBFA-LABEL: vfmax_nxv4bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v10, v12, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x bfloat> @llvm.maxnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
ret <vscale x 4 x bfloat> %v
@@ -232,13 +207,8 @@ define <vscale x 4 x bfloat> @vfmax_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat
;
; ZVFBFA-LABEL: vfmax_nxv4bf16_vf:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vfmax.vf v10, v10, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -272,12 +242,7 @@ define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscal
; ZVFBFA-LABEL: vfmax_nxv8bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v12, v16, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x bfloat> @llvm.maxnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
ret <vscale x 8 x bfloat> %v
@@ -308,13 +273,8 @@ define <vscale x 8 x bfloat> @vfmax_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat
;
; ZVFBFA-LABEL: vfmax_nxv8bf16_vf:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmax.vf v12, v12, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -348,12 +308,7 @@ define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vs
; ZVFBFA-LABEL: vfmax_nxv16bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v16, v24, v16
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v12
; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x bfloat> @llvm.maxnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
ret <vscale x 16 x bfloat> %v
@@ -384,13 +339,8 @@ define <vscale x 16 x bfloat> @vfmax_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfl
;
; ZVFBFA-LABEL: vfmax_nxv16bf16_vf:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmax.vf v16, v16, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -465,34 +415,8 @@ define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vs
;
; ZVFBFA-LABEL: vfmax_nxv32bf16_vv:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: addi sp, sp, -16
-; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: sub sp, sp, a0
-; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFBFA-NEXT: addi a0, sp, 16
-; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v0, v0, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v0
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v16, v16, v24
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: add sp, sp, a0
-; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
-; ZVFBFA-NEXT: addi sp, sp, 16
-; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfmax.vv v8, v8, v16
; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x bfloat> @llvm.maxnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
ret <vscale x 32 x bfloat> %v
@@ -575,36 +499,8 @@ define <vscale x 32 x bfloat> @vfmax_nxv32bf16_vf(<vscale x 32 x bfloat> %a, bfl
;
; ZVFBFA-LABEL: vfmax_nxv32bf16_vf:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: addi sp, sp, -16
-; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: sub sp, sp, a0
-; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v16, fa0
-; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT: addi a0, sp, 16
-; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v12
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v16, v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmax.vv v16, v0, v24
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: add sp, sp, a0
-; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
-; ZVFBFA-NEXT: addi sp, sp, 16
-; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: vfmax.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
index b284d87ab97d4..89d67f710c7a4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
@@ -44,12 +44,7 @@ define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vv(<vscale x 1 x bfloat> %a, <vscal
; ZVFBFA-LABEL: vfmin_nxv1bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x bfloat> @llvm.minnum.nxv1bf16(<vscale x 1 x bfloat> %a, <vscale x 1 x bfloat> %b)
ret <vscale x 1 x bfloat> %v
@@ -80,13 +75,8 @@ define <vscale x 1 x bfloat> @vfmin_nxv1bf16_vf(<vscale x 1 x bfloat> %a, bfloat
;
; ZVFBFA-LABEL: vfmin_nxv1bf16_vf:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFBFA-NEXT: vfmin.vf v9, v9, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 1 x bfloat> %head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
@@ -120,12 +110,7 @@ define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vv(<vscale x 2 x bfloat> %a, <vscal
; ZVFBFA-LABEL: vfmin_nxv2bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v9, v9, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x bfloat> @llvm.minnum.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
ret <vscale x 2 x bfloat> %v
@@ -156,13 +141,8 @@ define <vscale x 2 x bfloat> @vfmin_nxv2bf16_vf(<vscale x 2 x bfloat> %a, bfloat
;
; ZVFBFA-LABEL: vfmin_nxv2bf16_vf:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFBFA-NEXT: vfmin.vf v9, v9, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v9
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 2 x bfloat> %head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
@@ -196,12 +176,7 @@ define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vv(<vscale x 4 x bfloat> %a, <vscal
; ZVFBFA-LABEL: vfmin_nxv4bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v10, v12, v10
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v9
; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x bfloat> @llvm.minnum.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
ret <vscale x 4 x bfloat> %v
@@ -232,13 +207,8 @@ define <vscale x 4 x bfloat> @vfmin_nxv4bf16_vf(<vscale x 4 x bfloat> %a, bfloat
;
; ZVFBFA-LABEL: vfmin_nxv4bf16_vf:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; ZVFBFA-NEXT: vfmin.vf v10, v10, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v10
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 4 x bfloat> %head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
@@ -272,12 +242,7 @@ define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vv(<vscale x 8 x bfloat> %a, <vscal
; ZVFBFA-LABEL: vfmin_nxv8bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v10
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v12, v16, v12
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v10
; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x bfloat> @llvm.minnum.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
ret <vscale x 8 x bfloat> %v
@@ -308,13 +273,8 @@ define <vscale x 8 x bfloat> @vfmin_nxv8bf16_vf(<vscale x 8 x bfloat> %a, bfloat
;
; ZVFBFA-LABEL: vfmin_nxv8bf16_vf:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v12, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; ZVFBFA-NEXT: vfmin.vf v12, v12, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v12
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 8 x bfloat> %head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
@@ -348,12 +308,7 @@ define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vv(<vscale x 16 x bfloat> %a, <vs
; ZVFBFA-LABEL: vfmin_nxv16bf16_vv:
; ZVFBFA: # %bb.0:
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v16, v24, v16
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v12
; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x bfloat> @llvm.minnum.nxv16bf16(<vscale x 16 x bfloat> %a, <vscale x 16 x bfloat> %b)
ret <vscale x 16 x bfloat> %v
@@ -384,13 +339,8 @@ define <vscale x 16 x bfloat> @vfmin_nxv16bf16_vf(<vscale x 16 x bfloat> %a, bfl
;
; ZVFBFA-LABEL: vfmin_nxv16bf16_vf:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: fcvt.s.bf16 fa5, fa0
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmin.vf v16, v16, fa5
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 16 x bfloat> %head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
@@ -465,34 +415,8 @@ define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vv(<vscale x 32 x bfloat> %a, <vs
;
; ZVFBFA-LABEL: vfmin_nxv32bf16_vv:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: addi sp, sp, -16
-; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: sub sp, sp, a0
-; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFBFA-NEXT: addi a0, sp, 16
-; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFBFA-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFBFA-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v0, v0, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v0
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v16, v16, v24
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: add sp, sp, a0
-; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
-; ZVFBFA-NEXT: addi sp, sp, 16
-; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfmin.vv v8, v8, v16
; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x bfloat> @llvm.minnum.nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
ret <vscale x 32 x bfloat> %v
@@ -575,36 +499,8 @@ define <vscale x 32 x bfloat> @vfmin_nxv32bf16_vf(<vscale x 32 x bfloat> %a, bfl
;
; ZVFBFA-LABEL: vfmin_nxv32bf16_vf:
; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: addi sp, sp, -16
-; ZVFBFA-NEXT: .cfi_def_cfa_offset 16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: sub sp, sp, a0
-; ZVFBFA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
-; ZVFBFA-NEXT: vfmv.v.f v16, fa0
-; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFBFA-NEXT: addi a0, sp, 16
-; ZVFBFA-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
-; ZVFBFA-NEXT: vfwcvt.f.f.v v0, v12
-; ZVFBFA-NEXT: vfwcvt.f.f.v v8, v16
-; ZVFBFA-NEXT: vfwcvt.f.f.v v24, v20
-; ZVFBFA-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v16, v16, v8
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v8, v16
-; ZVFBFA-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFBFA-NEXT: vfmin.vv v16, v0, v24
-; ZVFBFA-NEXT: vsetvli zero, zero, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfncvt.f.f.w v12, v16
-; ZVFBFA-NEXT: csrr a0, vlenb
-; ZVFBFA-NEXT: slli a0, a0, 3
-; ZVFBFA-NEXT: add sp, sp, a0
-; ZVFBFA-NEXT: .cfi_def_cfa sp, 16
-; ZVFBFA-NEXT: addi sp, sp, 16
-; ZVFBFA-NEXT: .cfi_def_cfa_offset 0
+; ZVFBFA-NEXT: vfmin.vf v8, v8, fa0
; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 32 x bfloat> poison, bfloat %b, i32 0
%splat = shufflevector <vscale x 32 x bfloat> %head, <vscale x 32 x bfloat> poison, <vscale x 32 x i32> zeroinitializer
More information about the llvm-commits
mailing list