[llvm] [RISCV][RFC] Deduplicate LLVM IR in fixed vector tests. NFC (PR #111395)

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 7 20:48:47 PDT 2024


https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/111395

>From a39c36d0fcf1beb3cea942bafa303c71d6e78d01 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 7 Oct 2024 23:41:07 +0800
Subject: [PATCH 1/2] [RISCV][RFC] Deduplicate LLVM IR in fixed vector tests.
 NFC

I'm planning on adding fixed length vector support for bf16, but in order to get test coverage a lot of tests need to be copied and pasted. Even then, I noticed the test cases themselves aren't 100% consistent.

This PR tries out an idea where instead of having individual tests for each vector type that we want to test, we instead just write it once but have multiple sed RUN lines that subsitute for each type.

The main advantage of this is that it makes testing new types much easier, e.g. this adds tests for illegal f32 and f64 vectors by adding a few new run lines, and bf16 tests can be added later similarly.

It also means we don't need to run redundant -mattr configurations on types we don't care about e.g. +zvfh on v4f32.

This same technique could also be applied to not just the fp tests but to other fixed-length and scalable vector tests later.

However it might also just be simpler to just generate these mechanically once and paste them in.
---
 .../RISCV/rvv/fixed-vectors-copysign.ll       |   99 +
 .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll     | 8017 +++++++++--------
 2 files changed, 4438 insertions(+), 3678 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-copysign.ll

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-copysign.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-copysign.ll
new file mode 100644
index 00000000000000..5801b7482d38ec
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-copysign.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+
+; Single-width copysign tests are in fixed-vectors-fp.ll
+
+define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
+; ZVFH-LABEL: copysign_neg_trunc_v4f16_v4f32:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vle32.v v8, (a1)
+; ZVFH-NEXT:    vle16.v v9, (a0)
+; ZVFH-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFH-NEXT:    vfsgnjn.vv v8, v9, v10
+; ZVFH-NEXT:    vse16.v v8, (a0)
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: copysign_neg_trunc_v4f16_v4f32:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-NEXT:    vle32.v v9, (a1)
+; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    addi a2, a1, -1
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a2
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT:    vxor.vx v9, v10, a1
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a1
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-NEXT:    ret
+  %a = load <4 x half>, ptr %x
+  %b = load <4 x float>, ptr %y
+  %c = fneg <4 x float> %b
+  %d = fptrunc <4 x float> %c to <4 x half>
+  %e = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %d)
+  store <4 x half> %e, ptr %x
+  ret void
+}
+
+define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
+; ZVFH-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 3, e32, m1, ta, ma
+; ZVFH-NEXT:    vle32.v v8, (a1)
+; ZVFH-NEXT:    vle16.v v9, (a0)
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfncvt.f.f.w v10, v8
+; ZVFH-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfsgnjn.vv v8, v9, v10
+; ZVFH-NEXT:    vse16.v v8, (a0)
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-NEXT:    vle32.v v9, (a1)
+; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    addi a2, a1, -1
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vand.vx v8, v8, a2
+; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT:    vxor.vx v9, v10, a1
+; ZVFHMIN-NEXT:    vand.vx v9, v9, a1
+; ZVFHMIN-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-NEXT:    ret
+  %a = load <3 x half>, ptr %x
+  %b = load <3 x float>, ptr %y
+  %c = fneg <3 x float> %b
+  %d = fptrunc <3 x float> %c to <3 x half>
+  %e = call <3 x half> @llvm.copysign.v3f16(<3 x half> %a, <3 x half> %d)
+  store <3 x half> %e, ptr %x
+  ret void
+}
+
+define void @copysign_neg_ext_v2f64_v2f32(ptr %x, ptr %y) {
+; CHECK-LABEL: copysign_neg_ext_v2f64_v2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vle64.v v9, (a0)
+; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
+; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vfsgnjn.vv v8, v9, v10
+; CHECK-NEXT:    vse64.v v8, (a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x double>, ptr %x
+  %b = load <2 x float>, ptr %y
+  %c = fneg <2 x float> %b
+  %d = fpext <2 x float> %c to <2 x double>
+  %e = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %d)
+  store <2 x double> %e, ptr %x
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index 7ecf8af54c8dc0..7151341a342afa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -1,3687 +1,4348 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-
-define void @fadd_v8f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: fadd_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfadd.vv v8, v8, v9
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fadd_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = load <8 x half>, ptr %y
-  %c = fadd <8 x half> %a, %b
-  store <8 x half> %c, ptr %x
-  ret void
-}
-
-define void @fadd_v6f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: fadd_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfadd.vv v8, v8, v9
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fadd_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = load <6 x half>, ptr %y
-  %c = fadd <6 x half> %a, %b
-  store <6 x half> %c, ptr %x
-  ret void
-}
-
-define void @fadd_v4f32(ptr %x, ptr %y) {
-; CHECK-LABEL: fadd_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vfadd.vv v8, v8, v9
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = load <4 x float>, ptr %y
-  %c = fadd <4 x float> %a, %b
-  store <4 x float> %c, ptr %x
-  ret void
-}
-
-define void @fadd_v2f64(ptr %x, ptr %y) {
-; CHECK-LABEL: fadd_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v9, (a1)
-; CHECK-NEXT:    vfadd.vv v8, v8, v9
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = load <2 x double>, ptr %y
-  %c = fadd <2 x double> %a, %b
-  store <2 x double> %c, ptr %x
-  ret void
-}
-
-define void @fsub_v8f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: fsub_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfsub.vv v8, v8, v9
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fsub_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = load <8 x half>, ptr %y
-  %c = fsub <8 x half> %a, %b
-  store <8 x half> %c, ptr %x
-  ret void
-}
-
-define void @fsub_v6f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: fsub_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfsub.vv v8, v8, v9
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fsub_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = load <6 x half>, ptr %y
-  %c = fsub <6 x half> %a, %b
-  store <6 x half> %c, ptr %x
-  ret void
-}
-
-define void @fsub_v4f32(ptr %x, ptr %y) {
-; CHECK-LABEL: fsub_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vfsub.vv v8, v8, v9
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = load <4 x float>, ptr %y
-  %c = fsub <4 x float> %a, %b
-  store <4 x float> %c, ptr %x
-  ret void
-}
-
-define void @fsub_v2f64(ptr %x, ptr %y) {
-; CHECK-LABEL: fsub_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v9, (a1)
-; CHECK-NEXT:    vfsub.vv v8, v8, v9
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = load <2 x double>, ptr %y
-  %c = fsub <2 x double> %a, %b
-  store <2 x double> %c, ptr %x
-  ret void
-}
-
-define void @fmul_v8f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: fmul_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfmul.vv v8, v8, v9
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fmul_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = load <8 x half>, ptr %y
-  %c = fmul <8 x half> %a, %b
-  store <8 x half> %c, ptr %x
-  ret void
-}
-
-define void @fmul_v6f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: fmul_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfmul.vv v8, v8, v9
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fmul_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = load <6 x half>, ptr %y
-  %c = fmul <6 x half> %a, %b
-  store <6 x half> %c, ptr %x
-  ret void
-}
-
-define void @fmul_v4f32(ptr %x, ptr %y) {
-; CHECK-LABEL: fmul_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vfmul.vv v8, v8, v9
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = load <4 x float>, ptr %y
-  %c = fmul <4 x float> %a, %b
-  store <4 x float> %c, ptr %x
-  ret void
-}
-
-define void @fmul_v2f64(ptr %x, ptr %y) {
-; CHECK-LABEL: fmul_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v9, (a1)
-; CHECK-NEXT:    vfmul.vv v8, v8, v9
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = load <2 x double>, ptr %y
-  %c = fmul <2 x double> %a, %b
-  store <2 x double> %c, ptr %x
-  ret void
-}
-
-define void @fdiv_v8f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: fdiv_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfdiv.vv v8, v8, v9
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fdiv_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = load <8 x half>, ptr %y
-  %c = fdiv <8 x half> %a, %b
-  store <8 x half> %c, ptr %x
-  ret void
-}
-
-define void @fdiv_v6f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: fdiv_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfdiv.vv v8, v8, v9
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fdiv_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = load <6 x half>, ptr %y
-  %c = fdiv <6 x half> %a, %b
-  store <6 x half> %c, ptr %x
-  ret void
-}
-
-define void @fdiv_v4f32(ptr %x, ptr %y) {
-; CHECK-LABEL: fdiv_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vfdiv.vv v8, v8, v9
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = load <4 x float>, ptr %y
-  %c = fdiv <4 x float> %a, %b
-  store <4 x float> %c, ptr %x
-  ret void
-}
-
-define void @fdiv_v2f64(ptr %x, ptr %y) {
-; CHECK-LABEL: fdiv_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v9, (a1)
-; CHECK-NEXT:    vfdiv.vv v8, v8, v9
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = load <2 x double>, ptr %y
-  %c = fdiv <2 x double> %a, %b
-  store <2 x double> %c, ptr %x
-  ret void
-}
-
-define void @fneg_v8f16(ptr %x) {
-; ZVFH-LABEL: fneg_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfneg.v v8, v8
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fneg_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = fneg <8 x half> %a
-  store <8 x half> %b, ptr %x
-  ret void
-}
-
-define void @fneg_v6f16(ptr %x) {
-; ZVFH-LABEL: fneg_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfneg.v v8, v8
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fneg_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = fneg <6 x half> %a
-  store <6 x half> %b, ptr %x
-  ret void
-}
-
-define void @fneg_v4f32(ptr %x) {
-; CHECK-LABEL: fneg_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfneg.v v8, v8
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = fneg <4 x float> %a
-  store <4 x float> %b, ptr %x
-  ret void
-}
-
-define void @fneg_v2f64(ptr %x) {
-; CHECK-LABEL: fneg_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfneg.v v8, v8
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = fneg <2 x double> %a
-  store <2 x double> %b, ptr %x
-  ret void
-}
-
-define void @fabs_v8f16(ptr %x) {
-; ZVFH-LABEL: fabs_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfabs.v v8, v8
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fabs_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    addi a1, a1, -1
-; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
-  store <8 x half> %b, ptr %x
-  ret void
-}
-declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
-
-define void @fabs_v6f16(ptr %x) {
-; ZVFH-LABEL: fabs_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfabs.v v8, v8
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fabs_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    addi a1, a1, -1
-; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a)
-  store <6 x half> %b, ptr %x
-  ret void
-}
-declare <6 x half> @llvm.fabs.v6f16(<6 x half>)
-
-define void @fabs_v4f32(ptr %x) {
-; CHECK-LABEL: fabs_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfabs.v v8, v8
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
-  store <4 x float> %b, ptr %x
-  ret void
-}
-declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
-
-define void @fabs_v2f64(ptr %x) {
-; CHECK-LABEL: fabs_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfabs.v v8, v8
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
-  store <2 x double> %b, ptr %x
-  ret void
-}
-declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
-
-define void @copysign_v8f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: copysign_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfsgnj.vv v8, v8, v9
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: copysign_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
-; ZVFHMIN-NEXT:    addi a1, a1, -1
-; ZVFHMIN-NEXT:    vand.vx v9, v9, a1
-; ZVFHMIN-NEXT:    vor.vv v8, v9, v8
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = load <8 x half>, ptr %y
-  %c = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
-  store <8 x half> %c, ptr %x
-  ret void
-}
-declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
-
-define void @copysign_v6f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: copysign_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfsgnj.vv v8, v8, v9
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: copysign_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
-; ZVFHMIN-NEXT:    addi a1, a1, -1
-; ZVFHMIN-NEXT:    vand.vx v9, v9, a1
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vor.vv v8, v9, v8
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = load <6 x half>, ptr %y
-  %c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b)
-  store <6 x half> %c, ptr %x
-  ret void
-}
-declare <6 x half> @llvm.copysign.v6f16(<6 x half>, <6 x half>)
-
-define void @copysign_v4f32(ptr %x, ptr %y) {
-; CHECK-LABEL: copysign_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v9
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = load <4 x float>, ptr %y
-  %c = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
-  store <4 x float> %c, ptr %x
-  ret void
-}
-declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
-
-define void @copysign_v2f64(ptr %x, ptr %y) {
-; CHECK-LABEL: copysign_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v9, (a1)
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v9
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = load <2 x double>, ptr %y
-  %c = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
-  store <2 x double> %c, ptr %x
-  ret void
-}
-declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
-
-define void @copysign_vf_v8f16(ptr %x, half %y) {
-; ZVFH-LABEL: copysign_vf_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfsgnj.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: copysign_vf_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    addi a2, a1, -1
-; ZVFHMIN-NEXT:    vand.vx v8, v8, a2
-; ZVFHMIN-NEXT:    vand.vx v9, v9, a1
-; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = insertelement <8 x half> poison, half %y, i32 0
-  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
-  %d = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %c)
-  store <8 x half> %d, ptr %x
-  ret void
-}
-
-define void @copysign_vf_v6f16(ptr %x, half %y) {
-; ZVFH-LABEL: copysign_vf_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfsgnj.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: copysign_vf_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    addi a2, a1, -1
-; ZVFHMIN-NEXT:    vand.vx v8, v8, a2
-; ZVFHMIN-NEXT:    vand.vx v9, v9, a1
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = insertelement <6 x half> poison, half %y, i32 0
-  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
-  %d = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %c)
-  store <6 x half> %d, ptr %x
-  ret void
-}
-
-define void @copysign_vf_v4f32(ptr %x, float %y) {
-; CHECK-LABEL: copysign_vf_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = insertelement <4 x float> poison, float %y, i32 0
-  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
-  %d = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %c)
-  store <4 x float> %d, ptr %x
-  ret void
-}
-
-define void @copysign_vf_v2f64(ptr %x, double %y) {
-; CHECK-LABEL: copysign_vf_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = insertelement <2 x double> poison, double %y, i32 0
-  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
-  %d = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %c)
-  store <2 x double> %d, ptr %x
-  ret void
-}
-
-define void @copysign_neg_v8f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: copysign_neg_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfsgnjn.vv v8, v8, v9
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: copysign_neg_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT:    addi a2, a1, -1
-; ZVFHMIN-NEXT:    vand.vx v9, v9, a2
-; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vor.vv v8, v9, v8
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = load <8 x half>, ptr %y
-  %c = fneg <8 x half> %b
-  %d = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %c)
-  store <8 x half> %d, ptr %x
-  ret void
-}
-
-define void @copysign_neg_v6f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: copysign_neg_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfsgnjn.vv v8, v8, v9
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: copysign_neg_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT:    addi a2, a1, -1
-; ZVFHMIN-NEXT:    vand.vx v9, v9, a2
-; ZVFHMIN-NEXT:    vand.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vor.vv v8, v9, v8
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = load <6 x half>, ptr %y
-  %c = fneg <6 x half> %b
-  %d = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %c)
-  store <6 x half> %d, ptr %x
-  ret void
-}
-
-define void @copysign_neg_v4f32(ptr %x, ptr %y) {
-; CHECK-LABEL: copysign_neg_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v9
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = load <4 x float>, ptr %y
-  %c = fneg <4 x float> %b
-  %d = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %c)
-  store <4 x float> %d, ptr %x
-  ret void
-}
-
-define void @copysign_neg_v2f64(ptr %x, ptr %y) {
-; CHECK-LABEL: copysign_neg_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v9, (a1)
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v9
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = load <2 x double>, ptr %y
-  %c = fneg <2 x double> %b
-  %d = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %c)
-  store <2 x double> %d, ptr %x
-  ret void
-}
-
-define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
-; ZVFH-LABEL: copysign_neg_trunc_v4f16_v4f32:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFH-NEXT:    vle32.v v8, (a1)
-; ZVFH-NEXT:    vle16.v v9, (a0)
-; ZVFH-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFH-NEXT:    vfsgnjn.vv v8, v9, v10
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: copysign_neg_trunc_v4f16_v4f32:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vle32.v v9, (a1)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    addi a2, a1, -1
-; ZVFHMIN-NEXT:    vand.vx v8, v8, a2
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vxor.vx v9, v10, a1
-; ZVFHMIN-NEXT:    vand.vx v9, v9, a1
-; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <4 x half>, ptr %x
-  %b = load <4 x float>, ptr %y
-  %c = fneg <4 x float> %b
-  %d = fptrunc <4 x float> %c to <4 x half>
-  %e = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %d)
-  store <4 x half> %e, ptr %x
-  ret void
-}
-declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>)
-
-define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
-; ZVFH-LABEL: copysign_neg_trunc_v3f16_v3f32:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 3, e32, m1, ta, ma
-; ZVFH-NEXT:    vle32.v v8, (a1)
-; ZVFH-NEXT:    vle16.v v9, (a0)
-; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFH-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
-; ZVFH-NEXT:    vfsgnjn.vv v8, v9, v10
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: copysign_neg_trunc_v3f16_v3f32:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vle32.v v9, (a1)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    addi a2, a1, -1
-; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vand.vx v8, v8, a2
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v9
-; ZVFHMIN-NEXT:    vxor.vx v9, v10, a1
-; ZVFHMIN-NEXT:    vand.vx v9, v9, a1
-; ZVFHMIN-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vor.vv v8, v8, v9
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <3 x half>, ptr %x
-  %b = load <3 x float>, ptr %y
-  %c = fneg <3 x float> %b
-  %d = fptrunc <3 x float> %c to <3 x half>
-  %e = call <3 x half> @llvm.copysign.v3f16(<3 x half> %a, <3 x half> %d)
-  store <3 x half> %e, ptr %x
-  ret void
-}
-declare <3 x half> @llvm.copysign.v3f16(<3 x half>, <3 x half>)
-
-define void @copysign_neg_ext_v2f64_v2f32(ptr %x, ptr %y) {
-; CHECK-LABEL: copysign_neg_ext_v2f64_v2f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a1)
-; CHECK-NEXT:    vle64.v v9, (a0)
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vfsgnjn.vv v8, v9, v10
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = load <2 x float>, ptr %y
-  %c = fneg <2 x float> %b
-  %d = fpext <2 x float> %c to <2 x double>
-  %e = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %d)
-  store <2 x double> %e, ptr %x
-  ret void
-}
-
-define void @sqrt_v8f16(ptr %x) {
-; ZVFH-LABEL: sqrt_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfsqrt.v v8, v8
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: sqrt_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a)
-  store <8 x half> %b, ptr %x
-  ret void
-}
-declare <8 x half> @llvm.sqrt.v8f16(<8 x half>)
-
-define void @sqrt_v6f16(ptr %x) {
-; ZVFH-LABEL: sqrt_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfsqrt.v v8, v8
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: sqrt_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsqrt.v v8, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a)
-  store <6 x half> %b, ptr %x
-  ret void
-}
-declare <6 x half> @llvm.sqrt.v6f16(<6 x half>)
-
-define void @sqrt_v4f32(ptr %x) {
-; CHECK-LABEL: sqrt_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfsqrt.v v8, v8
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
-  store <4 x float> %b, ptr %x
-  ret void
-}
-declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
-
-define void @sqrt_v2f64(ptr %x) {
-; CHECK-LABEL: sqrt_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfsqrt.v v8, v8
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
-  store <2 x double> %b, ptr %x
-  ret void
-}
-declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
-
-define void @fma_v8f16(ptr %x, ptr %y, ptr %z) {
-; ZVFH-LABEL: fma_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vle16.v v10, (a2)
-; ZVFH-NEXT:    vfmacc.vv v10, v8, v9
-; ZVFH-NEXT:    vse16.v v10, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fma_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a2)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = load <8 x half>, ptr %y
-  %c = load <8 x half>, ptr %z
-  %d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
-  store <8 x half> %d, ptr %x
-  ret void
-}
-declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
-
-define void @fma_v6f16(ptr %x, ptr %y, ptr %z) {
-; ZVFH-LABEL: fma_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vle16.v v10, (a2)
-; ZVFH-NEXT:    vfmacc.vv v10, v8, v9
-; ZVFH-NEXT:    vse16.v v10, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fma_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a2)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = load <6 x half>, ptr %y
-  %c = load <6 x half>, ptr %z
-  %d = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %c)
-  store <6 x half> %d, ptr %x
-  ret void
-}
-declare <6 x half> @llvm.fma.v6f16(<6 x half>, <6 x half>, <6 x half>)
-
-define void @fma_v4f32(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fma_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vle32.v v10, (a2)
-; CHECK-NEXT:    vfmacc.vv v10, v8, v9
-; CHECK-NEXT:    vse32.v v10, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = load <4 x float>, ptr %y
-  %c = load <4 x float>, ptr %z
-  %d = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
-  store <4 x float> %d, ptr %x
-  ret void
-}
-declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
-
-define void @fma_v2f64(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fma_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v9, (a1)
-; CHECK-NEXT:    vle64.v v10, (a2)
-; CHECK-NEXT:    vfmacc.vv v10, v8, v9
-; CHECK-NEXT:    vse64.v v10, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = load <2 x double>, ptr %y
-  %c = load <2 x double>, ptr %z
-  %d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
-  store <2 x double> %d, ptr %x
-  ret void
-}
-declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
-
-define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) {
-; ZVFH-LABEL: fmsub_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vle16.v v10, (a2)
-; ZVFH-NEXT:    vfmsac.vv v10, v8, v9
-; ZVFH-NEXT:    vse16.v v10, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fmsub_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a2)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = load <8 x half>, ptr %y
-  %c = load <8 x half>, ptr %z
-  %neg = fneg <8 x half> %c
-  %d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %neg)
-  store <8 x half> %d, ptr %x
-  ret void
-}
-
-define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
-; ZVFH-LABEL: fmsub_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vle16.v v10, (a2)
-; ZVFH-NEXT:    vfmsac.vv v10, v8, v9
-; ZVFH-NEXT:    vse16.v v10, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fmsub_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a2)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = load <6 x half>, ptr %y
-  %c = load <6 x half>, ptr %z
-  %neg = fneg <6 x half> %c
-  %d = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %neg)
-  store <6 x half> %d, ptr %x
-  ret void
-}
-
-define void @fnmsub_v4f32(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fnmsub_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vle32.v v10, (a2)
-; CHECK-NEXT:    vfnmsac.vv v10, v8, v9
-; CHECK-NEXT:    vse32.v v10, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = load <4 x float>, ptr %y
-  %c = load <4 x float>, ptr %z
-  %neg = fneg <4 x float> %a
-  %d = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %b, <4 x float> %c)
-  store <4 x float> %d, ptr %x
-  ret void
-}
-
-define void @fnmadd_v2f64(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fnmadd_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v9, (a1)
-; CHECK-NEXT:    vle64.v v10, (a2)
-; CHECK-NEXT:    vfnmacc.vv v10, v8, v9
-; CHECK-NEXT:    vse64.v v10, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = load <2 x double>, ptr %y
-  %c = load <2 x double>, ptr %z
-  %neg = fneg <2 x double> %b
-  %neg2 = fneg <2 x double> %c
-  %d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %neg, <2 x double> %neg2)
-  store <2 x double> %d, ptr %x
-  ret void
-}
-
-define void @fadd_v16f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: fadd_v16f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v10, (a1)
-; ZVFH-NEXT:    vfadd.vv v8, v8, v10
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fadd_v16f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
-; ZVFHMIN-NEXT:    vse16.v v12, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <16 x half>, ptr %x
-  %b = load <16 x half>, ptr %y
-  %c = fadd <16 x half> %a, %b
-  store <16 x half> %c, ptr %x
-  ret void
-}
-
-define void @fadd_v8f32(ptr %x, ptr %y) {
-; CHECK-LABEL: fadd_v8f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v10, (a1)
-; CHECK-NEXT:    vfadd.vv v8, v8, v10
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <8 x float>, ptr %x
-  %b = load <8 x float>, ptr %y
-  %c = fadd <8 x float> %a, %b
-  store <8 x float> %c, ptr %x
-  ret void
-}
-
-define void @fadd_v4f64(ptr %x, ptr %y) {
-; CHECK-LABEL: fadd_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v10, (a1)
-; CHECK-NEXT:    vfadd.vv v8, v8, v10
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x double>, ptr %x
-  %b = load <4 x double>, ptr %y
-  %c = fadd <4 x double> %a, %b
-  store <4 x double> %c, ptr %x
-  ret void
-}
-
-define void @fsub_v16f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: fsub_v16f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v10, (a1)
-; ZVFH-NEXT:    vfsub.vv v8, v8, v10
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fsub_v16f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
-; ZVFHMIN-NEXT:    vse16.v v12, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <16 x half>, ptr %x
-  %b = load <16 x half>, ptr %y
-  %c = fsub <16 x half> %a, %b
-  store <16 x half> %c, ptr %x
-  ret void
-}
-
-define void @fsub_v8f32(ptr %x, ptr %y) {
-; CHECK-LABEL: fsub_v8f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v10, (a1)
-; CHECK-NEXT:    vfsub.vv v8, v8, v10
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <8 x float>, ptr %x
-  %b = load <8 x float>, ptr %y
-  %c = fsub <8 x float> %a, %b
-  store <8 x float> %c, ptr %x
-  ret void
-}
-
-define void @fsub_v4f64(ptr %x, ptr %y) {
-; CHECK-LABEL: fsub_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v10, (a1)
-; CHECK-NEXT:    vfsub.vv v8, v8, v10
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x double>, ptr %x
-  %b = load <4 x double>, ptr %y
-  %c = fsub <4 x double> %a, %b
-  store <4 x double> %c, ptr %x
-  ret void
-}
-
-define void @fmul_v16f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: fmul_v16f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v10, (a1)
-; ZVFH-NEXT:    vfmul.vv v8, v8, v10
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fmul_v16f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
-; ZVFHMIN-NEXT:    vse16.v v12, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <16 x half>, ptr %x
-  %b = load <16 x half>, ptr %y
-  %c = fmul <16 x half> %a, %b
-  store <16 x half> %c, ptr %x
-  ret void
-}
-
-define void @fmul_v8f32(ptr %x, ptr %y) {
-; CHECK-LABEL: fmul_v8f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v10, (a1)
-; CHECK-NEXT:    vfmul.vv v8, v8, v10
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <8 x float>, ptr %x
-  %b = load <8 x float>, ptr %y
-  %c = fmul <8 x float> %a, %b
-  store <8 x float> %c, ptr %x
-  ret void
-}
-
-define void @fmul_v4f64(ptr %x, ptr %y) {
-; CHECK-LABEL: fmul_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v10, (a1)
-; CHECK-NEXT:    vfmul.vv v8, v8, v10
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x double>, ptr %x
-  %b = load <4 x double>, ptr %y
-  %c = fmul <4 x double> %a, %b
-  store <4 x double> %c, ptr %x
-  ret void
-}
-
-define void @fdiv_v16f16(ptr %x, ptr %y) {
-; ZVFH-LABEL: fdiv_v16f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v10, (a1)
-; ZVFH-NEXT:    vfdiv.vv v8, v8, v10
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fdiv_v16f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v8, v16, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
-; ZVFHMIN-NEXT:    vse16.v v12, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <16 x half>, ptr %x
-  %b = load <16 x half>, ptr %y
-  %c = fdiv <16 x half> %a, %b
-  store <16 x half> %c, ptr %x
-  ret void
-}
-
-define void @fdiv_v8f32(ptr %x, ptr %y) {
-; CHECK-LABEL: fdiv_v8f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v10, (a1)
-; CHECK-NEXT:    vfdiv.vv v8, v8, v10
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <8 x float>, ptr %x
-  %b = load <8 x float>, ptr %y
-  %c = fdiv <8 x float> %a, %b
-  store <8 x float> %c, ptr %x
-  ret void
-}
-
-define void @fdiv_v4f64(ptr %x, ptr %y) {
-; CHECK-LABEL: fdiv_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v10, (a1)
-; CHECK-NEXT:    vfdiv.vv v8, v8, v10
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x double>, ptr %x
-  %b = load <4 x double>, ptr %y
-  %c = fdiv <4 x double> %a, %b
-  store <4 x double> %c, ptr %x
-  ret void
-}
-
-define void @fneg_v16f16(ptr %x) {
-; ZVFH-LABEL: fneg_v16f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfneg.v v8, v8
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fneg_v16f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <16 x half>, ptr %x
-  %b = fneg <16 x half> %a
-  store <16 x half> %b, ptr %x
-  ret void
-}
-
-define void @fneg_v8f32(ptr %x) {
-; CHECK-LABEL: fneg_v8f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfneg.v v8, v8
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <8 x float>, ptr %x
-  %b = fneg <8 x float> %a
-  store <8 x float> %b, ptr %x
-  ret void
-}
-
-define void @fneg_v4f64(ptr %x) {
-; CHECK-LABEL: fneg_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfneg.v v8, v8
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x double>, ptr %x
-  %b = fneg <4 x double> %a
-  store <4 x double> %b, ptr %x
-  ret void
-}
-
-define void @fma_v16f16(ptr %x, ptr %y, ptr %z) {
-; ZVFH-LABEL: fma_v16f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v10, (a1)
-; ZVFH-NEXT:    vle16.v v12, (a2)
-; ZVFH-NEXT:    vfmacc.vv v12, v8, v10
-; ZVFH-NEXT:    vse16.v v12, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fma_v16f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a2)
-; ZVFHMIN-NEXT:    vle16.v v10, (a0)
-; ZVFHMIN-NEXT:    vle16.v v12, (a1)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v20, v16
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v12, v8
-; ZVFHMIN-NEXT:    vse16.v v12, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <16 x half>, ptr %x
-  %b = load <16 x half>, ptr %y
-  %c = load <16 x half>, ptr %z
-  %d = call <16 x half> @llvm.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c)
-  store <16 x half> %d, ptr %x
-  ret void
-}
-declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>)
-
-define void @fma_v8f32(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fma_v8f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v10, (a1)
-; CHECK-NEXT:    vle32.v v12, (a2)
-; CHECK-NEXT:    vfmacc.vv v12, v8, v10
-; CHECK-NEXT:    vse32.v v12, (a0)
-; CHECK-NEXT:    ret
-  %a = load <8 x float>, ptr %x
-  %b = load <8 x float>, ptr %y
-  %c = load <8 x float>, ptr %z
-  %d = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
-  store <8 x float> %d, ptr %x
-  ret void
-}
-declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
-
-define void @fma_v4f64(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fma_v4f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v10, (a1)
-; CHECK-NEXT:    vle64.v v12, (a2)
-; CHECK-NEXT:    vfmacc.vv v12, v8, v10
-; CHECK-NEXT:    vse64.v v12, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x double>, ptr %x
-  %b = load <4 x double>, ptr %y
-  %c = load <4 x double>, ptr %z
-  %d = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
-  store <4 x double> %d, ptr %x
-  ret void
-}
-declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
-
-define void @fadd_vf_v8f16(ptr %x, half %y) {
-; ZVFH-LABEL: fadd_vf_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfadd.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fadd_vf_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = insertelement <8 x half> poison, half %y, i32 0
-  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
-  %d = fadd <8 x half> %a, %c
-  store <8 x half> %d, ptr %x
-  ret void
-}
-
-define void @fadd_vf_v6f16(ptr %x, half %y) {
-; ZVFH-LABEL: fadd_vf_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfadd.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fadd_vf_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v8, v10, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = insertelement <6 x half> poison, half %y, i32 0
-  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
-  %d = fadd <6 x half> %a, %c
-  store <6 x half> %d, ptr %x
-  ret void
-}
-
-define void @fadd_vf_v4f32(ptr %x, float %y) {
-; CHECK-LABEL: fadd_vf_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = insertelement <4 x float> poison, float %y, i32 0
-  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
-  %d = fadd <4 x float> %a, %c
-  store <4 x float> %d, ptr %x
-  ret void
-}
-
-define void @fadd_vf_v2f64(ptr %x, double %y) {
-; CHECK-LABEL: fadd_vf_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = insertelement <2 x double> poison, double %y, i32 0
-  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
-  %d = fadd <2 x double> %a, %c
-  store <2 x double> %d, ptr %x
-  ret void
-}
-
-define void @fadd_fv_v8f16(ptr %x, half %y) {
-; ZVFH-LABEL: fadd_fv_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfadd.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fadd_fv_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = insertelement <8 x half> poison, half %y, i32 0
-  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
-  %d = fadd <8 x half> %c, %a
-  store <8 x half> %d, ptr %x
-  ret void
-}
-
-define void @fadd_fv_v6f16(ptr %x, half %y) {
-; ZVFH-LABEL: fadd_fv_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfadd.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fadd_fv_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = insertelement <6 x half> poison, half %y, i32 0
-  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
-  %d = fadd <6 x half> %c, %a
-  store <6 x half> %d, ptr %x
-  ret void
-}
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: sed 's/TY/half/g;s/N/8/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh -verify-machineinstrs | FileCheck %s --check-prefix=V8F16ZVFH
+; RUN: sed 's/TY/half/g;s/N/8/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh -verify-machineinstrs | FileCheck %s --check-prefix=V8F16ZVFH
 
-define void @fadd_fv_v4f32(ptr %x, float %y) {
-; CHECK-LABEL: fadd_fv_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = insertelement <4 x float> poison, float %y, i32 0
-  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
-  %d = fadd <4 x float> %c, %a
-  store <4 x float> %d, ptr %x
-  ret void
-}
-
-define void @fadd_fv_v2f64(ptr %x, double %y) {
-; CHECK-LABEL: fadd_fv_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = insertelement <2 x double> poison, double %y, i32 0
-  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
-  %d = fadd <2 x double> %c, %a
-  store <2 x double> %d, ptr %x
-  ret void
-}
-
-define void @fsub_vf_v8f16(ptr %x, half %y) {
-; ZVFH-LABEL: fsub_vf_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfsub.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fsub_vf_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = insertelement <8 x half> poison, half %y, i32 0
-  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
-  %d = fsub <8 x half> %a, %c
-  store <8 x half> %d, ptr %x
-  ret void
-}
-
-define void @fsub_vf_v6f16(ptr %x, half %y) {
-; ZVFH-LABEL: fsub_vf_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfsub.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fsub_vf_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v8, v10, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = insertelement <6 x half> poison, half %y, i32 0
-  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
-  %d = fsub <6 x half> %a, %c
-  store <6 x half> %d, ptr %x
-  ret void
-}
-
-define void @fsub_vf_v4f32(ptr %x, float %y) {
-; CHECK-LABEL: fsub_vf_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfsub.vf v8, v8, fa0
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = insertelement <4 x float> poison, float %y, i32 0
-  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
-  %d = fsub <4 x float> %a, %c
-  store <4 x float> %d, ptr %x
-  ret void
-}
-
-define void @fsub_vf_v2f64(ptr %x, double %y) {
-; CHECK-LABEL: fsub_vf_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfsub.vf v8, v8, fa0
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = insertelement <2 x double> poison, double %y, i32 0
-  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
-  %d = fsub <2 x double> %a, %c
-  store <2 x double> %d, ptr %x
-  ret void
-}
-
-define void @fsub_fv_v8f16(ptr %x, half %y) {
-; ZVFH-LABEL: fsub_fv_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfrsub.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fsub_fv_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = insertelement <8 x half> poison, half %y, i32 0
-  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
-  %d = fsub <8 x half> %c, %a
-  store <8 x half> %d, ptr %x
-  ret void
-}
-
-define void @fsub_fv_v6f16(ptr %x, half %y) {
-; ZVFH-LABEL: fsub_fv_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfrsub.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fsub_fv_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = insertelement <6 x half> poison, half %y, i32 0
-  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
-  %d = fsub <6 x half> %c, %a
-  store <6 x half> %d, ptr %x
-  ret void
-}
-
-define void @fsub_fv_v4f32(ptr %x, float %y) {
-; CHECK-LABEL: fsub_fv_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = insertelement <4 x float> poison, float %y, i32 0
-  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
-  %d = fsub <4 x float> %c, %a
-  store <4 x float> %d, ptr %x
-  ret void
-}
-
-define void @fsub_fv_v2f64(ptr %x, double %y) {
-; CHECK-LABEL: fsub_fv_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = insertelement <2 x double> poison, double %y, i32 0
-  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
-  %d = fsub <2 x double> %c, %a
-  store <2 x double> %d, ptr %x
-  ret void
-}
-
-define void @fmul_vf_v8f16(ptr %x, half %y) {
-; ZVFH-LABEL: fmul_vf_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfmul.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fmul_vf_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = insertelement <8 x half> poison, half %y, i32 0
-  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
-  %d = fmul <8 x half> %a, %c
-  store <8 x half> %d, ptr %x
-  ret void
-}
-
-define void @fmul_vf_v6f16(ptr %x, half %y) {
-; ZVFH-LABEL: fmul_vf_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfmul.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fmul_vf_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v8, v10, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = insertelement <6 x half> poison, half %y, i32 0
-  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
-  %d = fmul <6 x half> %a, %c
-  store <6 x half> %d, ptr %x
-  ret void
-}
-
-define void @fmul_vf_v4f32(ptr %x, float %y) {
-; CHECK-LABEL: fmul_vf_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = insertelement <4 x float> poison, float %y, i32 0
-  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
-  %d = fmul <4 x float> %a, %c
-  store <4 x float> %d, ptr %x
-  ret void
-}
-
-define void @fmul_vf_v2f64(ptr %x, double %y) {
-; CHECK-LABEL: fmul_vf_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = insertelement <2 x double> poison, double %y, i32 0
-  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
-  %d = fmul <2 x double> %a, %c
-  store <2 x double> %d, ptr %x
-  ret void
-}
-
-define void @fmul_fv_v8f16(ptr %x, half %y) {
-; ZVFH-LABEL: fmul_fv_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfmul.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fmul_fv_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = insertelement <8 x half> poison, half %y, i32 0
-  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
-  %d = fmul <8 x half> %c, %a
-  store <8 x half> %d, ptr %x
-  ret void
-}
-
-define void @fmul_fv_v6f16(ptr %x, half %y) {
-; ZVFH-LABEL: fmul_fv_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfmul.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fmul_fv_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = insertelement <6 x half> poison, half %y, i32 0
-  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
-  %d = fmul <6 x half> %c, %a
-  store <6 x half> %d, ptr %x
-  ret void
-}
-
-define void @fmul_fv_v4f32(ptr %x, float %y) {
-; CHECK-LABEL: fmul_fv_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = insertelement <4 x float> poison, float %y, i32 0
-  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
-  %d = fmul <4 x float> %c, %a
-  store <4 x float> %d, ptr %x
-  ret void
-}
-
-define void @fmul_fv_v2f64(ptr %x, double %y) {
-; CHECK-LABEL: fmul_fv_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = insertelement <2 x double> poison, double %y, i32 0
-  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
-  %d = fmul <2 x double> %c, %a
-  store <2 x double> %d, ptr %x
-  ret void
-}
-
-define void @fdiv_vf_v8f16(ptr %x, half %y) {
-; ZVFH-LABEL: fdiv_vf_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfdiv.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fdiv_vf_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = insertelement <8 x half> poison, half %y, i32 0
-  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
-  %d = fdiv <8 x half> %a, %c
-  store <8 x half> %d, ptr %x
-  ret void
-}
-
-define void @fdiv_vf_v6f16(ptr %x, half %y) {
-; ZVFH-LABEL: fdiv_vf_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfdiv.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fdiv_vf_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v8, v10, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = insertelement <6 x half> poison, half %y, i32 0
-  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
-  %d = fdiv <6 x half> %a, %c
-  store <6 x half> %d, ptr %x
-  ret void
-}
-
-define void @fdiv_vf_v4f32(ptr %x, float %y) {
-; CHECK-LABEL: fdiv_vf_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = insertelement <4 x float> poison, float %y, i32 0
-  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
-  %d = fdiv <4 x float> %a, %c
-  store <4 x float> %d, ptr %x
-  ret void
-}
-
-define void @fdiv_vf_v2f64(ptr %x, double %y) {
-; CHECK-LABEL: fdiv_vf_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = insertelement <2 x double> poison, double %y, i32 0
-  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
-  %d = fdiv <2 x double> %a, %c
-  store <2 x double> %d, ptr %x
-  ret void
-}
-
-define void @fdiv_fv_v8f16(ptr %x, half %y) {
-; ZVFH-LABEL: fdiv_fv_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfrdiv.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fdiv_fv_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = insertelement <8 x half> poison, half %y, i32 0
-  %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
-  %d = fdiv <8 x half> %c, %a
-  store <8 x half> %d, ptr %x
-  ret void
-}
-
-define void @fdiv_fv_v6f16(ptr %x, half %y) {
-; ZVFH-LABEL: fdiv_fv_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vfrdiv.vf v8, v8, fa0
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fdiv_fv_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfdiv.vv v8, v12, v10
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = insertelement <6 x half> poison, half %y, i32 0
-  %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
-  %d = fdiv <6 x half> %c, %a
-  store <6 x half> %d, ptr %x
-  ret void
-}
-
-define void @fdiv_fv_v4f32(ptr %x, float %y) {
-; CHECK-LABEL: fdiv_fv_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = insertelement <4 x float> poison, float %y, i32 0
-  %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
-  %d = fdiv <4 x float> %c, %a
-  store <4 x float> %d, ptr %x
-  ret void
-}
-
-define void @fdiv_fv_v2f64(ptr %x, double %y) {
-; CHECK-LABEL: fdiv_fv_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = insertelement <2 x double> poison, double %y, i32 0
-  %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
-  %d = fdiv <2 x double> %c, %a
-  store <2 x double> %d, ptr %x
-  ret void
-}
-
-define void @fma_vf_v8f16(ptr %x, ptr %y, half %z) {
-; ZVFH-LABEL: fma_vf_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfmacc.vf v9, fa0, v8
-; ZVFH-NEXT:    vse16.v v9, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fma_vf_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = load <8 x half>, ptr %y
-  %c = insertelement <8 x half> poison, half %z, i32 0
-  %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer
-  %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %d, <8 x half> %b)
-  store <8 x half> %e, ptr %x
-  ret void
-}
-
-define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) {
-; ZVFH-LABEL: fma_vf_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfmacc.vf v9, fa0, v8
-; ZVFH-NEXT:    vse16.v v9, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fma_vf_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = load <6 x half>, ptr %y
-  %c = insertelement <6 x half> poison, half %z, i32 0
-  %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer
-  %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %d, <6 x half> %b)
-  store <6 x half> %e, ptr %x
-  ret void
-}
-
-define void @fma_vf_v4f32(ptr %x, ptr %y, float %z) {
-; CHECK-LABEL: fma_vf_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
-; CHECK-NEXT:    vse32.v v9, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = load <4 x float>, ptr %y
-  %c = insertelement <4 x float> poison, float %z, i32 0
-  %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
-  %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %d, <4 x float> %b)
-  store <4 x float> %e, ptr %x
-  ret void
-}
-
-define void @fma_vf_v2f64(ptr %x, ptr %y, double %z) {
-; CHECK-LABEL: fma_vf_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v9, (a1)
-; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
-; CHECK-NEXT:    vse64.v v9, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = load <2 x double>, ptr %y
-  %c = insertelement <2 x double> poison, double %z, i32 0
-  %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
-  %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %d, <2 x double> %b)
-  store <2 x double> %e, ptr %x
-  ret void
-}
-
-define void @fma_fv_v8f16(ptr %x, ptr %y, half %z) {
-; ZVFH-LABEL: fma_fv_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfmacc.vf v9, fa0, v8
-; ZVFH-NEXT:    vse16.v v9, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fma_fv_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = load <8 x half>, ptr %y
-  %c = insertelement <8 x half> poison, half %z, i32 0
-  %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer
-  %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %d, <8 x half> %a, <8 x half> %b)
-  store <8 x half> %e, ptr %x
-  ret void
-}
-
-define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) {
-; ZVFH-LABEL: fma_fv_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfmacc.vf v9, fa0, v8
-; ZVFH-NEXT:    vse16.v v9, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fma_fv_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    fmv.x.w a1, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = load <6 x half>, ptr %y
-  %c = insertelement <6 x half> poison, half %z, i32 0
-  %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer
-  %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %d, <6 x half> %a, <6 x half> %b)
-  store <6 x half> %e, ptr %x
-  ret void
-}
-
-define void @fma_fv_v4f32(ptr %x, ptr %y, float %z) {
-; CHECK-LABEL: fma_fv_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
-; CHECK-NEXT:    vse32.v v9, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = load <4 x float>, ptr %y
-  %c = insertelement <4 x float> poison, float %z, i32 0
-  %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
-  %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %d, <4 x float> %a, <4 x float> %b)
-  store <4 x float> %e, ptr %x
-  ret void
-}
-
-define void @fma_fv_v2f64(ptr %x, ptr %y, double %z) {
-; CHECK-LABEL: fma_fv_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v9, (a1)
-; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
-; CHECK-NEXT:    vse64.v v9, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = load <2 x double>, ptr %y
-  %c = insertelement <2 x double> poison, double %z, i32 0
-  %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
-  %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %d, <2 x double> %a, <2 x double> %b)
-  store <2 x double> %e, ptr %x
-  ret void
-}
-
-define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) {
-; ZVFH-LABEL: fmsub_vf_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfmsac.vf v9, fa0, v8
-; ZVFH-NEXT:    vse16.v v9, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fmsub_vf_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.w a2, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vmv.v.x v10, a2
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = load <8 x half>, ptr %y
-  %c = insertelement <8 x half> poison, half %z, i32 0
-  %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer
-  %neg = fneg <8 x half> %b
-  %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %d, <8 x half> %neg)
-  store <8 x half> %e, ptr %x
-  ret void
-}
-
-define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
-; ZVFH-LABEL: fmsub_vf_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vfmsac.vf v9, fa0, v8
-; ZVFH-NEXT:    vse16.v v9, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fmsub_vf_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    fmv.x.w a2, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a2
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = load <6 x half>, ptr %y
-  %c = insertelement <6 x half> poison, half %z, i32 0
-  %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer
-  %neg = fneg <6 x half> %b
-  %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %d, <6 x half> %neg)
-  store <6 x half> %e, ptr %x
-  ret void
-}
-
-define void @fnmsub_vf_v4f32(ptr %x, ptr %y, float %z) {
-; CHECK-LABEL: fnmsub_vf_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vfnmsac.vf v9, fa0, v8
-; CHECK-NEXT:    vse32.v v9, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = load <4 x float>, ptr %y
-  %c = insertelement <4 x float> poison, float %z, i32 0
-  %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
-  %neg = fneg <4 x float> %a
-  %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %d, <4 x float> %b)
-  store <4 x float> %e, ptr %x
-  ret void
-}
+; RUN: sed 's/TY/half/g;s/N/8/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin -verify-machineinstrs | FileCheck %s --check-prefix=V8F16ZVFHMIN
+; RUN: sed 's/TY/half/g;s/N/8/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin -verify-machineinstrs | FileCheck %s --check-prefix=V8F16ZVFHMIN
 
-define void @fnmadd_vf_v2f64(ptr %x, ptr %y, double %z) {
-; CHECK-LABEL: fnmadd_vf_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v9, (a1)
-; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8
-; CHECK-NEXT:    vse64.v v9, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = load <2 x double>, ptr %y
-  %c = insertelement <2 x double> poison, double %z, i32 0
-  %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
-  %neg = fneg <2 x double> %a
-  %neg2 = fneg <2 x double> %b
-  %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %neg, <2 x double> %d, <2 x double> %neg2)
-  store <2 x double> %e, ptr %x
-  ret void
-}
+; RUN: sed 's/TY/half/g;s/N/6/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh -verify-machineinstrs | FileCheck %s --check-prefix=V6F16ZVFH
+; RUN: sed 's/TY/half/g;s/N/6/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh -verify-machineinstrs | FileCheck %s --check-prefix=V6F16ZVFH
 
-define void @fnmsub_fv_v4f32(ptr %x, ptr %y, float %z) {
-; CHECK-LABEL: fnmsub_fv_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vfnmsac.vf v9, fa0, v8
-; CHECK-NEXT:    vse32.v v9, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = load <4 x float>, ptr %y
-  %c = insertelement <4 x float> poison, float %z, i32 0
-  %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
-  %neg = fneg <4 x float> %d
-  %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %a, <4 x float> %b)
-  store <4 x float> %e, ptr %x
-  ret void
-}
+; RUN: sed 's/TY/half/g;s/N/6/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin -verify-machineinstrs | FileCheck %s --check-prefix=V6F16ZVFHMIN
+; RUN: sed 's/TY/half/g;s/N/6/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin -verify-machineinstrs | FileCheck %s --check-prefix=V6F16ZVFHMIN
 
-define void @fnmadd_fv_v2f64(ptr %x, ptr %y, double %z) {
-; CHECK-LABEL: fnmadd_fv_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v9, (a1)
-; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8
-; CHECK-NEXT:    vse64.v v9, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = load <2 x double>, ptr %y
-  %c = insertelement <2 x double> poison, double %z, i32 0
-  %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
-  %neg = fneg <2 x double> %d
-  %neg2 = fneg <2 x double> %b
-  %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %neg, <2 x double> %a, <2 x double> %neg2)
-  store <2 x double> %e, ptr %x
-  ret void
-}
+; RUN: sed 's/TY/float/g;s/N/4/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefix=V4F32
+; RUN: sed 's/TY/float/g;s/N/4/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefix=V4F32
 
-define void @trunc_v8f16(ptr %x) {
-; ZVFH-LABEL: trunc_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI115_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI115_0)(a1)
-; ZVFH-NEXT:    vfabs.v v9, v8
-; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
-; ZVFH-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: trunc_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v8, v10
-; ZVFHMIN-NEXT:    lui a1, 307200
-; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMIN-NEXT:    vfcvt.rtz.x.f.v v8, v10, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a)
-  store <8 x half> %b, ptr %x
-  ret void
-}
-declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
-
-define void @trunc_v6f16(ptr %x) {
-; ZVFH-LABEL: trunc_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI116_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI116_0)(a1)
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vfabs.v v9, v8
-; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
-; ZVFH-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: trunc_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v8, v10
-; ZVFHMIN-NEXT:    lui a1, 307200
-; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMIN-NEXT:    vfcvt.rtz.x.f.v v8, v10, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a)
-  store <6 x half> %b, ptr %x
-  ret void
-}
-declare <6 x half> @llvm.trunc.v6f16(<6 x half>)
-
-define void @trunc_v4f32(ptr %x) {
-; CHECK-LABEL: trunc_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    lui a1, 307200
-; CHECK-NEXT:    fmv.w.x fa5, a1
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a)
-  store <4 x float> %b, ptr %x
-  ret void
-}
-declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
-
-define void @trunc_v2f64(ptr %x) {
-; CHECK-LABEL: trunc_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    lui a1, %hi(.LCPI118_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI118_0)(a1)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a)
-  store <2 x double> %b, ptr %x
-  ret void
-}
-declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
-
-define void @ceil_v8f16(ptr %x) {
-; ZVFH-LABEL: ceil_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI119_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI119_0)(a1)
-; ZVFH-NEXT:    vfabs.v v9, v8
-; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
-; ZVFH-NEXT:    fsrmi a1, 3
-; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; ZVFH-NEXT:    fsrm a1
-; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: ceil_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v8, v10
-; ZVFHMIN-NEXT:    lui a1, 307200
-; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMIN-NEXT:    fsrmi a1, 3
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
-; ZVFHMIN-NEXT:    fsrm a1
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a)
-  store <8 x half> %b, ptr %x
-  ret void
-}
-declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
-
-define void @ceil_v6f16(ptr %x) {
-; ZVFH-LABEL: ceil_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI120_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI120_0)(a1)
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vfabs.v v9, v8
-; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
-; ZVFH-NEXT:    fsrmi a1, 3
-; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; ZVFH-NEXT:    fsrm a1
-; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: ceil_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v8, v10
-; ZVFHMIN-NEXT:    lui a1, 307200
-; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMIN-NEXT:    fsrmi a1, 3
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
-; ZVFHMIN-NEXT:    fsrm a1
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a)
-  store <6 x half> %b, ptr %x
-  ret void
-}
-declare <6 x half> @llvm.ceil.v6f16(<6 x half>)
-
-define void @ceil_v4f32(ptr %x) {
-; CHECK-LABEL: ceil_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    lui a1, 307200
-; CHECK-NEXT:    fmv.w.x fa5, a1
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a1, 3
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
-  store <4 x float> %b, ptr %x
-  ret void
-}
-declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
-
-define void @ceil_v2f64(ptr %x) {
-; CHECK-LABEL: ceil_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    lui a1, %hi(.LCPI122_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI122_0)(a1)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a1, 3
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a)
-  store <2 x double> %b, ptr %x
-  ret void
-}
-declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
-
-define void @floor_v8f16(ptr %x) {
-; ZVFH-LABEL: floor_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI123_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI123_0)(a1)
-; ZVFH-NEXT:    vfabs.v v9, v8
-; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
-; ZVFH-NEXT:    fsrmi a1, 2
-; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; ZVFH-NEXT:    fsrm a1
-; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: floor_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v8, v10
-; ZVFHMIN-NEXT:    lui a1, 307200
-; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMIN-NEXT:    fsrmi a1, 2
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
-; ZVFHMIN-NEXT:    fsrm a1
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = call <8 x half> @llvm.floor.v8f16(<8 x half> %a)
-  store <8 x half> %b, ptr %x
-  ret void
-}
-declare <8 x half> @llvm.floor.v8f16(<8 x half>)
-
-define void @floor_v6f16(ptr %x) {
-; ZVFH-LABEL: floor_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI124_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI124_0)(a1)
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vfabs.v v9, v8
-; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
-; ZVFH-NEXT:    fsrmi a1, 2
-; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; ZVFH-NEXT:    fsrm a1
-; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: floor_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v8, v10
-; ZVFHMIN-NEXT:    lui a1, 307200
-; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMIN-NEXT:    fsrmi a1, 2
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
-; ZVFHMIN-NEXT:    fsrm a1
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a)
-  store <6 x half> %b, ptr %x
-  ret void
-}
-declare <6 x half> @llvm.floor.v6f16(<6 x half>)
-
-define void @floor_v4f32(ptr %x) {
-; CHECK-LABEL: floor_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    lui a1, 307200
-; CHECK-NEXT:    fmv.w.x fa5, a1
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a1, 2
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = call <4 x float> @llvm.floor.v4f32(<4 x float> %a)
-  store <4 x float> %b, ptr %x
-  ret void
-}
-declare <4 x float> @llvm.floor.v4f32(<4 x float>)
-
-define void @floor_v2f64(ptr %x) {
-; CHECK-LABEL: floor_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    lui a1, %hi(.LCPI126_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI126_0)(a1)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a1, 2
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = call <2 x double> @llvm.floor.v2f64(<2 x double> %a)
-  store <2 x double> %b, ptr %x
-  ret void
-}
-declare <2 x double> @llvm.floor.v2f64(<2 x double>)
-
-define void @round_v8f16(ptr %x) {
-; ZVFH-LABEL: round_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI127_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI127_0)(a1)
-; ZVFH-NEXT:    vfabs.v v9, v8
-; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
-; ZVFH-NEXT:    fsrmi a1, 4
-; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; ZVFH-NEXT:    fsrm a1
-; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: round_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v8, v10
-; ZVFHMIN-NEXT:    lui a1, 307200
-; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMIN-NEXT:    fsrmi a1, 4
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
-; ZVFHMIN-NEXT:    fsrm a1
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = call <8 x half> @llvm.round.v8f16(<8 x half> %a)
-  store <8 x half> %b, ptr %x
-  ret void
-}
-declare <8 x half> @llvm.round.v8f16(<8 x half>)
-
-define void @round_v6f16(ptr %x) {
-; ZVFH-LABEL: round_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI128_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI128_0)(a1)
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vfabs.v v9, v8
-; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
-; ZVFH-NEXT:    fsrmi a1, 4
-; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; ZVFH-NEXT:    fsrm a1
-; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: round_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v8, v10
-; ZVFHMIN-NEXT:    lui a1, 307200
-; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMIN-NEXT:    fsrmi a1, 4
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
-; ZVFHMIN-NEXT:    fsrm a1
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = call <6 x half> @llvm.round.v6f16(<6 x half> %a)
-  store <6 x half> %b, ptr %x
-  ret void
-}
-declare <6 x half> @llvm.round.v6f16(<6 x half>)
-
-define void @round_v4f32(ptr %x) {
-; CHECK-LABEL: round_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    lui a1, 307200
-; CHECK-NEXT:    fmv.w.x fa5, a1
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a1, 4
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = call <4 x float> @llvm.round.v4f32(<4 x float> %a)
-  store <4 x float> %b, ptr %x
-  ret void
-}
-declare <4 x float> @llvm.round.v4f32(<4 x float>)
-
-define void @round_v2f64(ptr %x) {
-; CHECK-LABEL: round_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    lui a1, %hi(.LCPI130_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI130_0)(a1)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    fsrmi a1, 4
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    fsrm a1
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = call <2 x double> @llvm.round.v2f64(<2 x double> %a)
-  store <2 x double> %b, ptr %x
-  ret void
-}
-declare <2 x double> @llvm.round.v2f64(<2 x double>)
-
-define void @rint_v8f16(ptr %x) {
-; ZVFH-LABEL: rint_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI131_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI131_0)(a1)
-; ZVFH-NEXT:    vfabs.v v9, v8
-; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
-; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: rint_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v8, v10
-; ZVFHMIN-NEXT:    lui a1, 307200
-; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = call <8 x half> @llvm.rint.v8f16(<8 x half> %a)
-  store <8 x half> %b, ptr %x
-  ret void
-}
-declare <8 x half> @llvm.rint.v8f16(<8 x half>)
-
-define void @rint_v4f32(ptr %x) {
-; CHECK-LABEL: rint_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    lui a1, 307200
-; CHECK-NEXT:    fmv.w.x fa5, a1
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = call <4 x float> @llvm.rint.v4f32(<4 x float> %a)
-  store <4 x float> %b, ptr %x
-  ret void
-}
-declare <4 x float> @llvm.rint.v4f32(<4 x float>)
-
-define void @rint_v2f64(ptr %x) {
-; CHECK-LABEL: rint_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    lui a1, %hi(.LCPI133_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI133_0)(a1)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = call <2 x double> @llvm.rint.v2f64(<2 x double> %a)
-  store <2 x double> %b, ptr %x
-  ret void
-}
-declare <2 x double> @llvm.rint.v2f64(<2 x double>)
-
-define void @nearbyint_v8f16(ptr %x) {
-; ZVFH-LABEL: nearbyint_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    lui a1, %hi(.LCPI134_0)
-; ZVFH-NEXT:    flh fa5, %lo(.LCPI134_0)(a1)
-; ZVFH-NEXT:    vfabs.v v9, v8
-; ZVFH-NEXT:    vmflt.vf v0, v9, fa5
-; ZVFH-NEXT:    frflags a1
-; ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; ZVFH-NEXT:    fsflags a1
-; ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; ZVFH-NEXT:    vse16.v v8, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: nearbyint_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfabs.v v8, v10
-; ZVFHMIN-NEXT:    lui a1, 307200
-; ZVFHMIN-NEXT:    fmv.w.x fa5, a1
-; ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
-; ZVFHMIN-NEXT:    frflags a1
-; ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
-; ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMIN-NEXT:    fsflags a1
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a)
-  store <8 x half> %b, ptr %x
-  ret void
-}
-declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
-
-define void @nearbyint_v4f32(ptr %x) {
-; CHECK-LABEL: nearbyint_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    lui a1, 307200
-; CHECK-NEXT:    fmv.w.x fa5, a1
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    frflags a1
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    fsflags a1
-; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a)
-  store <4 x float> %b, ptr %x
-  ret void
-}
-declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
-
-define void @nearbyint_v2f64(ptr %x) {
-; CHECK-LABEL: nearbyint_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    lui a1, %hi(.LCPI136_0)
-; CHECK-NEXT:    fld fa5, %lo(.LCPI136_0)(a1)
-; CHECK-NEXT:    vfabs.v v9, v8
-; CHECK-NEXT:    vmflt.vf v0, v9, fa5
-; CHECK-NEXT:    frflags a1
-; CHECK-NEXT:    vfcvt.x.f.v v9, v8, v0.t
-; CHECK-NEXT:    vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT:    fsflags a1
-; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
-; CHECK-NEXT:    vse64.v v8, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a)
-  store <2 x double> %b, ptr %x
-  ret void
-}
-declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
-
-define void @fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
-; ZVFH-LABEL: fmuladd_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vle16.v v10, (a2)
-; ZVFH-NEXT:    vfmacc.vv v10, v8, v9
-; ZVFH-NEXT:    vse16.v v10, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fmuladd_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vle16.v v10, (a2)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = load <8 x half>, ptr %y
-  %c = load <8 x half>, ptr %z
-  %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
-  store <8 x half> %d, ptr %x
-  ret void
-}
-declare <8 x half> @llvm.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>)
-
-define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
-; ZVFH-LABEL: fmuladd_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vle16.v v10, (a2)
-; ZVFH-NEXT:    vfmacc.vv v10, v8, v9
-; ZVFH-NEXT:    vse16.v v10, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fmuladd_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vle16.v v10, (a2)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfadd.vv v8, v8, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = load <6 x half>, ptr %y
-  %c = load <6 x half>, ptr %z
-  %d = call <6 x half> @llvm.fmuladd.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %c)
-  store <6 x half> %d, ptr %x
-  ret void
-}
-declare <6 x half> @llvm.fmuladd.v6f16(<6 x half>, <6 x half>, <6 x half>)
-
-define void @fmuladd_v4f32(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fmuladd_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vle32.v v10, (a2)
-; CHECK-NEXT:    vfmacc.vv v10, v8, v9
-; CHECK-NEXT:    vse32.v v10, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = load <4 x float>, ptr %y
-  %c = load <4 x float>, ptr %z
-  %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
-  store <4 x float> %d, ptr %x
-  ret void
-}
-declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
-
-define void @fmuladd_v2f64(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fmuladd_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v9, (a1)
-; CHECK-NEXT:    vle64.v v10, (a2)
-; CHECK-NEXT:    vfmacc.vv v10, v8, v9
-; CHECK-NEXT:    vse64.v v10, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = load <2 x double>, ptr %y
-  %c = load <2 x double>, ptr %z
-  %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
-  store <2 x double> %d, ptr %x
-  ret void
-}
-declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>)
-
-define void @fmsub_fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
-; ZVFH-LABEL: fmsub_fmuladd_v8f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vle16.v v10, (a2)
-; ZVFH-NEXT:    vfmsac.vv v10, v8, v9
-; ZVFH-NEXT:    vse16.v v10, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fmsub_fmuladd_v8f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vle16.v v10, (a2)
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v8, v8, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <8 x half>, ptr %x
-  %b = load <8 x half>, ptr %y
-  %c = load <8 x half>, ptr %z
-  %neg = fneg <8 x half> %c
-  %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %neg)
-  store <8 x half> %d, ptr %x
-  ret void
-}
+; RUN: sed 's/TY/float/g;s/N/6/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefix=V6F32
+; RUN: sed 's/TY/float/g;s/N/6/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefix=V6F32
 
-define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
-; ZVFH-LABEL: fmsub_fmuladd_v6f16:
-; ZVFH:       # %bb.0:
-; ZVFH-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT:    vle16.v v8, (a0)
-; ZVFH-NEXT:    vle16.v v9, (a1)
-; ZVFH-NEXT:    vle16.v v10, (a2)
-; ZVFH-NEXT:    vfmsac.vv v10, v8, v9
-; ZVFH-NEXT:    vse16.v v10, (a0)
-; ZVFH-NEXT:    ret
-;
-; ZVFHMIN-LABEL: fmsub_fmuladd_v6f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a1)
-; ZVFHMIN-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-NEXT:    vle16.v v10, (a2)
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
-; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; ZVFHMIN-NEXT:    vfsub.vv v8, v8, v12
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vfncvt.f.f.w v10, v8
-; ZVFHMIN-NEXT:    vse16.v v10, (a0)
-; ZVFHMIN-NEXT:    ret
-  %a = load <6 x half>, ptr %x
-  %b = load <6 x half>, ptr %y
-  %c = load <6 x half>, ptr %z
-  %neg = fneg <6 x half> %c
-  %d = call <6 x half> @llvm.fmuladd.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %neg)
-  store <6 x half> %d, ptr %x
-  ret void
-}
+; RUN: sed 's/TY/double/g;s/N/2/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefix=V2F64
+; RUN: sed 's/TY/double/g;s/N/2/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefix=V2F64
 
-define void @fnmsub_fmuladd_v4f32(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fnmsub_fmuladd_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vle32.v v10, (a2)
-; CHECK-NEXT:    vfnmsac.vv v10, v8, v9
-; CHECK-NEXT:    vse32.v v10, (a0)
-; CHECK-NEXT:    ret
-  %a = load <4 x float>, ptr %x
-  %b = load <4 x float>, ptr %y
-  %c = load <4 x float>, ptr %z
-  %neg = fneg <4 x float> %a
-  %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %neg, <4 x float> %b, <4 x float> %c)
-  store <4 x float> %d, ptr %x
-  ret void
-}
+; RUN: sed 's/TY/double/g;s/N/3/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefix=V3F64
+; RUN: sed 's/TY/double/g;s/N/3/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefix=V3F64
 
-define void @fnmadd_fmuladd_v2f64(ptr %x, ptr %y, ptr %z) {
-; CHECK-LABEL: fnmadd_fmuladd_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    vle64.v v9, (a1)
-; CHECK-NEXT:    vle64.v v10, (a2)
-; CHECK-NEXT:    vfnmacc.vv v10, v8, v9
-; CHECK-NEXT:    vse64.v v10, (a0)
-; CHECK-NEXT:    ret
-  %a = load <2 x double>, ptr %x
-  %b = load <2 x double>, ptr %y
-  %c = load <2 x double>, ptr %z
-  %neg = fneg <2 x double> %b
-  %neg2 = fneg <2 x double> %c
-  %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %neg, <2 x double> %neg2)
-  store <2 x double> %d, ptr %x
-  ret void
+define <N x TY> @fadd_vv(<N x TY> %a, <N x TY> %b) {
+; V8F16ZVFH-LABEL: fadd_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfadd.vv v8, v8, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fadd_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfadd.vv v10, v12, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fadd_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfadd.vv v8, v8, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fadd_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfadd.vv v10, v12, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fadd_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfadd.vv v8, v8, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fadd_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfadd.vv v8, v8, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fadd_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfadd.vv v8, v8, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fadd_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfadd.vv v8, v8, v10
+; V3F64-NEXT:    ret
+  %c = fadd <N x TY> %a, %b
+  ret <N x TY> %c
+}
+
+define <N x TY> @fadd_vf(<N x TY> %a, TY %b) {
+; V8F16ZVFH-LABEL: fadd_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfadd.vf v8, v8, fa0
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fadd_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfadd.vv v10, v10, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fadd_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfadd.vf v8, v8, fa0
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fadd_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfadd.vv v10, v10, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fadd_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfadd.vf v8, v8, fa0
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fadd_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfadd.vf v8, v8, fa0
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fadd_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfadd.vf v8, v8, fa0
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fadd_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfadd.vf v8, v8, fa0
+; V3F64-NEXT:    ret
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %c = fadd <N x TY> %a, %b.splat
+  ret <N x TY> %c
+}
+
+define <N x TY> @fadd_fv(<N x TY> %a, TY %b) {
+; V8F16ZVFH-LABEL: fadd_fv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfadd.vf v8, v8, fa0
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fadd_fv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfadd.vv v10, v12, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fadd_fv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfadd.vf v8, v8, fa0
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fadd_fv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfadd.vv v10, v12, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fadd_fv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfadd.vf v8, v8, fa0
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fadd_fv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfadd.vf v8, v8, fa0
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fadd_fv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfadd.vf v8, v8, fa0
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fadd_fv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfadd.vf v8, v8, fa0
+; V3F64-NEXT:    ret
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %c = fadd <N x TY> %b.splat, %a
+  ret <N x TY> %c
+}
+
+define <N x TY> @fsub_vv(<N x TY> %a, <N x TY> %b) {
+; V8F16ZVFH-LABEL: fsub_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfsub.vv v8, v8, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fsub_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfsub.vv v10, v12, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fsub_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfsub.vv v8, v8, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fsub_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfsub.vv v10, v12, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fsub_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfsub.vv v8, v8, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fsub_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfsub.vv v8, v8, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fsub_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfsub.vv v8, v8, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fsub_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfsub.vv v8, v8, v10
+; V3F64-NEXT:    ret
+  %c = fsub <N x TY> %a, %b
+  ret <N x TY> %c
+}
+
+define <N x TY> @fsub_vf(<N x TY> %a, TY %b) {
+; V8F16ZVFH-LABEL: fsub_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfsub.vf v8, v8, fa0
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fsub_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfsub.vv v10, v10, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fsub_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfsub.vf v8, v8, fa0
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fsub_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfsub.vv v10, v10, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fsub_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfsub.vf v8, v8, fa0
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fsub_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfsub.vf v8, v8, fa0
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fsub_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfsub.vf v8, v8, fa0
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fsub_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfsub.vf v8, v8, fa0
+; V3F64-NEXT:    ret
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %c = fsub <N x TY> %a, %b.splat
+  ret <N x TY> %c
+}
+
+define <N x TY> @fsub_fv(<N x TY> %a, TY %b) {
+; V8F16ZVFH-LABEL: fsub_fv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfrsub.vf v8, v8, fa0
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fsub_fv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfsub.vv v10, v12, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fsub_fv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfrsub.vf v8, v8, fa0
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fsub_fv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfsub.vv v10, v12, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fsub_fv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfrsub.vf v8, v8, fa0
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fsub_fv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfrsub.vf v8, v8, fa0
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fsub_fv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfrsub.vf v8, v8, fa0
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fsub_fv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfrsub.vf v8, v8, fa0
+; V3F64-NEXT:    ret
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %c = fsub <N x TY> %b.splat, %a
+  ret <N x TY> %c
+}
+
+define <N x TY> @fmul_vv(<N x TY> %a, <N x TY> %b) {
+; V8F16ZVFH-LABEL: fmul_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmul.vv v8, v8, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmul_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmul_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmul.vv v8, v8, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmul_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmul_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmul.vv v8, v8, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmul_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmul.vv v8, v8, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmul_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmul.vv v8, v8, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmul_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmul.vv v8, v8, v10
+; V3F64-NEXT:    ret
+  %c = fmul <N x TY> %a, %b
+  ret <N x TY> %c
+}
+
+define <N x TY> @fmul_vf(<N x TY> %a, TY %b) {
+; V8F16ZVFH-LABEL: fmul_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmul.vf v8, v8, fa0
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmul_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v10, v10, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmul_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmul.vf v8, v8, fa0
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmul_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v10, v10, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmul_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmul.vf v8, v8, fa0
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmul_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmul.vf v8, v8, fa0
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmul_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmul.vf v8, v8, fa0
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmul_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmul.vf v8, v8, fa0
+; V3F64-NEXT:    ret
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %c = fmul <N x TY> %a, %b.splat
+  ret <N x TY> %c
+}
+
+define <N x TY> @fmul_fv(<N x TY> %a, TY %b) {
+; V8F16ZVFH-LABEL: fmul_fv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmul.vf v8, v8, fa0
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmul_fv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmul_fv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmul.vf v8, v8, fa0
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmul_fv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmul_fv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmul.vf v8, v8, fa0
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmul_fv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmul.vf v8, v8, fa0
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmul_fv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmul.vf v8, v8, fa0
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmul_fv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmul.vf v8, v8, fa0
+; V3F64-NEXT:    ret
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %c = fmul <N x TY> %b.splat, %a
+  ret <N x TY> %c
+}
+
+define <N x TY> @fdiv_vv(<N x TY> %a, <N x TY> %b) {
+; V8F16ZVFH-LABEL: fdiv_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfdiv.vv v8, v8, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fdiv_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfdiv.vv v10, v12, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fdiv_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfdiv.vv v8, v8, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fdiv_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfdiv.vv v10, v12, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fdiv_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfdiv.vv v8, v8, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fdiv_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfdiv.vv v8, v8, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fdiv_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfdiv.vv v8, v8, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fdiv_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfdiv.vv v8, v8, v10
+; V3F64-NEXT:    ret
+  %c = fdiv <N x TY> %a, %b
+  ret <N x TY> %c
+}
+
+define <N x TY> @fdiv_vf(<N x TY> %a, TY %b) {
+; V8F16ZVFH-LABEL: fdiv_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfdiv.vf v8, v8, fa0
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fdiv_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfdiv.vv v10, v10, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fdiv_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfdiv.vf v8, v8, fa0
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fdiv_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfdiv.vv v10, v10, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fdiv_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfdiv.vf v8, v8, fa0
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fdiv_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfdiv.vf v8, v8, fa0
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fdiv_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfdiv.vf v8, v8, fa0
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fdiv_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfdiv.vf v8, v8, fa0
+; V3F64-NEXT:    ret
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %c = fdiv <N x TY> %a, %b.splat
+  ret <N x TY> %c
+}
+
+define <N x TY> @fdiv_fv(<N x TY> %a, TY %b) {
+; V8F16ZVFH-LABEL: fdiv_fv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfrdiv.vf v8, v8, fa0
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fdiv_fv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfdiv.vv v10, v12, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fdiv_fv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfrdiv.vf v8, v8, fa0
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fdiv_fv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfdiv.vv v10, v12, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fdiv_fv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfrdiv.vf v8, v8, fa0
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fdiv_fv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfrdiv.vf v8, v8, fa0
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fdiv_fv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfrdiv.vf v8, v8, fa0
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fdiv_fv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfrdiv.vf v8, v8, fa0
+; V3F64-NEXT:    ret
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %c = fdiv <N x TY> %b.splat, %a
+  ret <N x TY> %c
+}
+
+define <N x TY> @fneg(<N x TY> %a) {
+; V8F16ZVFH-LABEL: fneg:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfneg.v v8, v8
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fneg:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    lui a0, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fneg:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfneg.v v8, v8
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fneg:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    lui a0, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fneg:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfneg.v v8, v8
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fneg:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfneg.v v8, v8
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fneg:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfneg.v v8, v8
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fneg:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfneg.v v8, v8
+; V3F64-NEXT:    ret
+  %b = fneg <N x TY> %a
+  ret <N x TY> %b
+}
+
+define <N x TY> @fabs(<N x TY> %a) {
+; V8F16ZVFH-LABEL: fabs:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfabs.v v8, v8
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fabs:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    lui a0, 8
+; V8F16ZVFHMIN-NEXT:    addi a0, a0, -1
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fabs:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfabs.v v8, v8
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fabs:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    lui a0, 8
+; V6F16ZVFHMIN-NEXT:    addi a0, a0, -1
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fabs:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfabs.v v8, v8
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fabs:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfabs.v v8, v8
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fabs:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfabs.v v8, v8
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fabs:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfabs.v v8, v8
+; V3F64-NEXT:    ret
+  %b = call <N x TY> @llvm.fabs(<N x TY> %a)
+  ret <N x TY> %b
+}
+
+define <N x TY> @copysign_vv(<N x TY> %a, <N x TY> %b) {
+; V8F16ZVFH-LABEL: copysign_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfsgnj.vv v8, v8, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: copysign_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    lui a0, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; V8F16ZVFHMIN-NEXT:    addi a0, a0, -1
+; V8F16ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; V8F16ZVFHMIN-NEXT:    vor.vv v8, v8, v9
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: copysign_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfsgnj.vv v8, v8, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: copysign_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    lui a0, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; V6F16ZVFHMIN-NEXT:    addi a0, a0, -1
+; V6F16ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; V6F16ZVFHMIN-NEXT:    vor.vv v8, v8, v9
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: copysign_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfsgnj.vv v8, v8, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: copysign_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfsgnj.vv v8, v8, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: copysign_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfsgnj.vv v8, v8, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: copysign_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfsgnj.vv v8, v8, v10
+; V3F64-NEXT:    ret
+  %c = call <N x TY> @llvm.copysign(<N x TY> %a, <N x TY> %b)
+  ret <N x TY> %c
+}
+
+define <N x TY> @copysign_vf(<N x TY> %a, TY %b) {
+; V8F16ZVFH-LABEL: copysign_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfsgnj.vf v8, v8, fa0
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: copysign_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V8F16ZVFHMIN-NEXT:    lui a0, 8
+; V8F16ZVFHMIN-NEXT:    addi a1, a0, -1
+; V8F16ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; V8F16ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; V8F16ZVFHMIN-NEXT:    vor.vv v8, v8, v9
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: copysign_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfsgnj.vf v8, v8, fa0
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: copysign_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v9, a0
+; V6F16ZVFHMIN-NEXT:    lui a0, 8
+; V6F16ZVFHMIN-NEXT:    addi a1, a0, -1
+; V6F16ZVFHMIN-NEXT:    vand.vx v8, v8, a1
+; V6F16ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; V6F16ZVFHMIN-NEXT:    vor.vv v8, v8, v9
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: copysign_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfsgnj.vf v8, v8, fa0
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: copysign_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfsgnj.vf v8, v8, fa0
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: copysign_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfsgnj.vf v8, v8, fa0
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: copysign_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfsgnj.vf v8, v8, fa0
+; V3F64-NEXT:    ret
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %c = call <N x TY> @llvm.copysign(<N x TY> %a, <N x TY> %b.splat)
+  ret <N x TY> %c
+}
+
+define <N x TY> @copysign_neg(<N x TY> %a, <N x TY> %b) {
+; V8F16ZVFH-LABEL: copysign_neg:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfsgnjn.vv v8, v8, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: copysign_neg:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    lui a0, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; V8F16ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; V8F16ZVFHMIN-NEXT:    addi a0, a0, -1
+; V8F16ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; V8F16ZVFHMIN-NEXT:    vor.vv v8, v8, v9
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: copysign_neg:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfsgnjn.vv v8, v8, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: copysign_neg:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    lui a0, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v9, v9, a0
+; V6F16ZVFHMIN-NEXT:    vand.vx v9, v9, a0
+; V6F16ZVFHMIN-NEXT:    addi a0, a0, -1
+; V6F16ZVFHMIN-NEXT:    vand.vx v8, v8, a0
+; V6F16ZVFHMIN-NEXT:    vor.vv v8, v8, v9
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: copysign_neg:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfsgnjn.vv v8, v8, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: copysign_neg:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfsgnjn.vv v8, v8, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: copysign_neg:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfsgnjn.vv v8, v8, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: copysign_neg:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfsgnjn.vv v8, v8, v10
+; V3F64-NEXT:    ret
+  %c = fneg <N x TY> %b
+  %d = call <N x TY> @llvm.copysign(<N x TY> %a, <N x TY> %c)
+  ret <N x TY> %d
+}
+
+define <N x TY> @sqrt(<N x TY> %a) {
+; V8F16ZVFH-LABEL: sqrt:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfsqrt.v v8, v8
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: sqrt:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfsqrt.v v10, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: sqrt:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfsqrt.v v8, v8
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: sqrt:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfsqrt.v v10, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: sqrt:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfsqrt.v v8, v8
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: sqrt:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfsqrt.v v8, v8
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: sqrt:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfsqrt.v v8, v8
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: sqrt:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfsqrt.v v8, v8
+; V3F64-NEXT:    ret
+  %b = call <N x TY> @llvm.sqrt(<N x TY> %a)
+  ret <N x TY> %b
+}
+
+define <N x TY> @vfmacc_vv(<N x TY> %a, <N x TY> %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: vfmacc_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmacc.vv v8, v10, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: vfmacc_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmadd.vv v10, v14, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: vfmacc_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmacc.vv v8, v10, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: vfmacc_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmadd.vv v10, v14, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: vfmacc_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmacc.vv v8, v10, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: vfmacc_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmacc.vv v8, v12, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: vfmacc_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmacc.vv v8, v10, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: vfmacc_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmacc.vv v8, v12, v10
+; V3F64-NEXT:    ret
+  %d = call <N x TY> @llvm.fma(<N x TY> %c, <N x TY> %b, <N x TY> %a)
+  ret <N x TY> %d
+}
+
+define <N x TY> @vfmacc_vf(<N x TY> %a, TY %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: vfmacc_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmacc.vf v8, fa0, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: vfmacc_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmadd.vv v16, v14, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: vfmacc_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmacc.vf v8, fa0, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: vfmacc_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmadd.vv v16, v14, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: vfmacc_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmacc.vf v8, fa0, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: vfmacc_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmacc.vf v8, fa0, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: vfmacc_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmacc.vf v8, fa0, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: vfmacc_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmacc.vf v8, fa0, v10
+; V3F64-NEXT:    ret
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %d = call <N x TY> @llvm.fma(<N x TY> %c, <N x TY> %b.splat, <N x TY> %a)
+  ret <N x TY> %d
+}
+
+define <N x TY> @vfnmacc_vv(<N x TY> %a, <N x TY> %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: vfnmacc_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfnmacc.vv v8, v10, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: vfnmacc_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    lui a0, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; V8F16ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmadd.vv v10, v14, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: vfnmacc_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfnmacc.vv v8, v10, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: vfnmacc_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    lui a0, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; V6F16ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmadd.vv v10, v14, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: vfnmacc_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfnmacc.vv v8, v10, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: vfnmacc_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfnmacc.vv v8, v12, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: vfnmacc_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfnmacc.vv v8, v10, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: vfnmacc_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfnmacc.vv v8, v12, v10
+; V3F64-NEXT:    ret
+  %a.neg = fneg <N x TY> %a
+  %c.neg = fneg <N x TY> %c
+  %d = call <N x TY> @llvm.fma(<N x TY> %c.neg, <N x TY> %b, <N x TY> %a.neg)
+  ret <N x TY> %d
+}
+
+define <N x TY> @vfnmacc_vf(<N x TY> %a, TY %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: vfnmacc_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfnmacc.vf v8, fa0, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: vfnmacc_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    lui a1, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; V8F16ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmadd.vv v16, v14, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: vfnmacc_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfnmacc.vf v8, fa0, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: vfnmacc_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    lui a1, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; V6F16ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmadd.vv v16, v14, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: vfnmacc_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfnmacc.vf v8, fa0, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: vfnmacc_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfnmacc.vf v8, fa0, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: vfnmacc_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfnmacc.vf v8, fa0, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: vfnmacc_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfnmacc.vf v8, fa0, v10
+; V3F64-NEXT:    ret
+  %a.neg = fneg <N x TY> %a
+  %c.neg = fneg <N x TY> %c
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %d = call <N x TY> @llvm.fma(<N x TY> %c.neg, <N x TY> %b.splat, <N x TY> %a.neg)
+  ret <N x TY> %d
+}
+
+define <N x TY> @vfmsac_vv(<N x TY> %a, <N x TY> %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: vfmsac_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmsac.vv v8, v10, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: vfmsac_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    lui a0, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmadd.vv v10, v14, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: vfmsac_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmsac.vv v8, v10, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: vfmsac_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    lui a0, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmadd.vv v10, v14, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: vfmsac_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmsac.vv v8, v10, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: vfmsac_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmsac.vv v8, v12, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: vfmsac_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmsac.vv v8, v10, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: vfmsac_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmsac.vv v8, v12, v10
+; V3F64-NEXT:    ret
+  %a.neg = fneg <N x TY> %a
+  %d = call <N x TY> @llvm.fma(<N x TY> %c, <N x TY> %b, <N x TY> %a.neg)
+  ret <N x TY> %d
+}
+
+define <N x TY> @vfmsac_vf(<N x TY> %a, TY %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: vfmsac_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmsac.vf v8, fa0, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: vfmsac_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    lui a1, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmadd.vv v16, v14, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: vfmsac_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmsac.vf v8, fa0, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: vfmsac_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    lui a1, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmadd.vv v16, v14, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: vfmsac_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmsac.vf v8, fa0, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: vfmsac_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmsac.vf v8, fa0, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: vfmsac_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmsac.vf v8, fa0, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: vfmsac_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmsac.vf v8, fa0, v10
+; V3F64-NEXT:    ret
+  %a.neg = fneg <N x TY> %a
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %d = call <N x TY> @llvm.fma(<N x TY> %c, <N x TY> %b.splat, <N x TY> %a.neg)
+  ret <N x TY> %d
+}
+
+define <N x TY> @vfnmsac_vv(<N x TY> %a, <N x TY> %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: vfnmsac_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfnmsac.vv v8, v10, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: vfnmsac_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    lui a0, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: vfnmsac_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfnmsac.vv v8, v10, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: vfnmsac_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    lui a0, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmadd.vv v14, v12, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: vfnmsac_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfnmsac.vv v8, v10, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: vfnmsac_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfnmsac.vv v8, v12, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: vfnmsac_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfnmsac.vv v8, v10, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: vfnmsac_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfnmsac.vv v8, v12, v10
+; V3F64-NEXT:    ret
+  %c.neg = fneg <N x TY> %c
+  %d = call <N x TY> @llvm.fma(<N x TY> %c.neg, <N x TY> %b, <N x TY> %a)
+  ret <N x TY> %d
+}
+
+define <N x TY> @vfnmsac_vf(<N x TY> %a, TY %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: vfnmsac_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfnmsac.vf v8, fa0, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: vfnmsac_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    lui a1, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmadd.vv v16, v12, v14
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: vfnmsac_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfnmsac.vf v8, fa0, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: vfnmsac_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    lui a1, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmadd.vv v16, v12, v14
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: vfnmsac_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfnmsac.vf v8, fa0, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: vfnmsac_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfnmsac.vf v8, fa0, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: vfnmsac_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfnmsac.vf v8, fa0, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: vfnmsac_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfnmsac.vf v8, fa0, v10
+; V3F64-NEXT:    ret
+  %c.neg = fneg <N x TY> %c
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %d = call <N x TY> @llvm.fma(<N x TY> %c.neg, <N x TY> %b.splat, <N x TY> %a)
+  ret <N x TY> %d
+}
+
+define <N x TY> @vfmadd_vv(<N x TY> %a, <N x TY> %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: vfmadd_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmadd.vv v8, v9, v10
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: vfmadd_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: vfmadd_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmadd.vv v8, v9, v10
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: vfmadd_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: vfmadd_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmadd.vv v8, v9, v10
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: vfmadd_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmadd.vv v8, v10, v12
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: vfmadd_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmadd.vv v8, v9, v10
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: vfmadd_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmadd.vv v8, v10, v12
+; V3F64-NEXT:    ret
+  %d = call <N x TY> @llvm.fma(<N x TY> %a, <N x TY> %b, <N x TY> %c)
+  ret <N x TY> %d
+}
+
+define <N x TY> @vfmadd_vf(<N x TY> %a, TY %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: vfmadd_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmadd.vf v8, fa0, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: vfmadd_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmadd.vv v16, v14, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: vfmadd_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmadd.vf v8, fa0, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: vfmadd_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmadd.vv v16, v14, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: vfmadd_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmadd.vf v8, fa0, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: vfmadd_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmadd.vf v8, fa0, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: vfmadd_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmadd.vf v8, fa0, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: vfmadd_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmadd.vf v8, fa0, v10
+; V3F64-NEXT:    ret
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %d = call <N x TY> @llvm.fma(<N x TY> %a, <N x TY> %b.splat, <N x TY> %c)
+  ret <N x TY> %d
+}
+
+define <N x TY> @vfnmadd_vv(<N x TY> %a, <N x TY> %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: vfnmadd_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfnmadd.vv v8, v9, v10
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: vfnmadd_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    lui a0, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; V8F16ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: vfnmadd_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfnmadd.vv v8, v9, v10
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: vfnmadd_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    lui a0, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; V6F16ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: vfnmadd_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfnmadd.vv v8, v9, v10
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: vfnmadd_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfnmadd.vv v8, v10, v12
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: vfnmadd_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfnmadd.vv v8, v9, v10
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: vfnmadd_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfnmadd.vv v8, v10, v12
+; V3F64-NEXT:    ret
+  %a.neg = fneg <N x TY> %a
+  %c.neg = fneg <N x TY> %c
+  %d = call <N x TY> @llvm.fma(<N x TY> %a.neg, <N x TY> %b, <N x TY> %c.neg)
+  ret <N x TY> %d
+}
+
+define <N x TY> @vfnmadd_vf(<N x TY> %a, TY %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: vfnmadd_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfnmadd.vf v8, fa0, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: vfnmadd_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    lui a1, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; V8F16ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmadd.vv v16, v14, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: vfnmadd_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfnmadd.vf v8, fa0, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: vfnmadd_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    lui a1, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; V6F16ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmadd.vv v16, v14, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: vfnmadd_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfnmadd.vf v8, fa0, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: vfnmadd_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfnmadd.vf v8, fa0, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: vfnmadd_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfnmadd.vf v8, fa0, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: vfnmadd_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfnmadd.vf v8, fa0, v10
+; V3F64-NEXT:    ret
+  %a.neg = fneg <N x TY> %a
+  %c.neg = fneg <N x TY> %c
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %d = call <N x TY> @llvm.fma(<N x TY> %a.neg, <N x TY> %b.splat, <N x TY> %c.neg)
+  ret <N x TY> %d
+}
+
+define <N x TY> @vfmsub_vv(<N x TY> %a, <N x TY> %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: vfmsub_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmsub.vv v8, v9, v10
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: vfmsub_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    lui a0, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: vfmsub_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmsub.vv v8, v9, v10
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: vfmsub_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    lui a0, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmadd.vv v14, v10, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v14
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: vfmsub_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmsub.vv v8, v9, v10
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: vfmsub_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmsub.vv v8, v10, v12
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: vfmsub_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmsub.vv v8, v9, v10
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: vfmsub_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmsub.vv v8, v10, v12
+; V3F64-NEXT:    ret
+  %c.neg = fneg <N x TY> %c
+  %d = call <N x TY> @llvm.fma(<N x TY> %a, <N x TY> %b, <N x TY> %c.neg)
+  ret <N x TY> %d
+}
+
+define <N x TY> @vfmsub_vf(<N x TY> %a, TY %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: vfmsub_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmsub.vf v8, fa0, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: vfmsub_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    lui a1, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmadd.vv v16, v14, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: vfmsub_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmsub.vf v8, fa0, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: vfmsub_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    lui a1, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmadd.vv v16, v14, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: vfmsub_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmsub.vf v8, fa0, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: vfmsub_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmsub.vf v8, fa0, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: vfmsub_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmsub.vf v8, fa0, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: vfmsub_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmsub.vf v8, fa0, v10
+; V3F64-NEXT:    ret
+  %c.neg = fneg <N x TY> %c
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %d = call <N x TY> @llvm.fma(<N x TY> %a, <N x TY> %b.splat, <N x TY> %c.neg)
+  ret <N x TY> %d
+}
+
+define <N x TY> @vfnmsub_vv(<N x TY> %a, <N x TY> %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: vfnmsub_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfnmsub.vv v8, v9, v10
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: vfnmsub_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    lui a0, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmadd.vv v10, v12, v14
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: vfnmsub_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfnmsub.vv v8, v9, v10
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: vfnmsub_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    lui a0, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmadd.vv v10, v12, v14
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: vfnmsub_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfnmsub.vv v8, v9, v10
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: vfnmsub_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfnmsub.vv v8, v10, v12
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: vfnmsub_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfnmsub.vv v8, v9, v10
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: vfnmsub_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfnmsub.vv v8, v10, v12
+; V3F64-NEXT:    ret
+  %a.neg = fneg <N x TY> %a
+  %d = call <N x TY> @llvm.fma(<N x TY> %a.neg, <N x TY> %b, <N x TY> %c)
+  ret <N x TY> %d
+}
+
+define <N x TY> @vfnmsub_vf(<N x TY> %a, TY %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: vfnmsub_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfnmsub.vf v8, fa0, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: vfnmsub_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    lui a1, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmadd.vv v16, v12, v14
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: vfnmsub_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfnmsub.vf v8, fa0, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: vfnmsub_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    lui a1, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmadd.vv v16, v12, v14
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v16
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: vfnmsub_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfnmsub.vf v8, fa0, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: vfnmsub_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfnmsub.vf v8, fa0, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: vfnmsub_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfnmsub.vf v8, fa0, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: vfnmsub_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfnmsub.vf v8, fa0, v10
+; V3F64-NEXT:    ret
+  %a.neg = fneg <N x TY> %a
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %d = call <N x TY> @llvm.fma(<N x TY> %a.neg, <N x TY> %b.splat, <N x TY> %c)
+  ret <N x TY> %d
+}
+
+define <N x TY> @trunc(<N x TY> %a) {
+; V8F16ZVFH-LABEL: trunc:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI37_0)
+; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI37_0)(a0)
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfabs.v v9, v8
+; V8F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
+; V8F16ZVFH-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
+; V8F16ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V8F16ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; V8F16ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: trunc:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfabs.v v8, v10
+; V8F16ZVFHMIN-NEXT:    lui a0, 307200
+; V8F16ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; V8F16ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
+; V8F16ZVFHMIN-NEXT:    vfcvt.rtz.x.f.v v8, v10, v0.t
+; V8F16ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V8F16ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: trunc:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI37_0)
+; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI37_0)(a0)
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfabs.v v9, v8
+; V6F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
+; V6F16ZVFH-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
+; V6F16ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V6F16ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; V6F16ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: trunc:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfabs.v v8, v10
+; V6F16ZVFHMIN-NEXT:    lui a0, 307200
+; V6F16ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; V6F16ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
+; V6F16ZVFHMIN-NEXT:    vfcvt.rtz.x.f.v v8, v10, v0.t
+; V6F16ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V6F16ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: trunc:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfabs.v v9, v8
+; V4F32-NEXT:    lui a0, 307200
+; V4F32-NEXT:    fmv.w.x fa5, a0
+; V4F32-NEXT:    vmflt.vf v0, v9, fa5
+; V4F32-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
+; V4F32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V4F32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; V4F32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: trunc:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfabs.v v10, v8
+; V6F32-NEXT:    lui a0, 307200
+; V6F32-NEXT:    fmv.w.x fa5, a0
+; V6F32-NEXT:    vmflt.vf v0, v10, fa5
+; V6F32-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
+; V6F32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; V6F32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V6F32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: trunc:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    lui a0, %hi(.LCPI37_0)
+; V2F64-NEXT:    fld fa5, %lo(.LCPI37_0)(a0)
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfabs.v v9, v8
+; V2F64-NEXT:    vmflt.vf v0, v9, fa5
+; V2F64-NEXT:    vfcvt.rtz.x.f.v v9, v8, v0.t
+; V2F64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V2F64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; V2F64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: trunc:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    lui a0, %hi(.LCPI37_0)
+; V3F64-NEXT:    fld fa5, %lo(.LCPI37_0)(a0)
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfabs.v v10, v8
+; V3F64-NEXT:    vmflt.vf v0, v10, fa5
+; V3F64-NEXT:    vfcvt.rtz.x.f.v v10, v8, v0.t
+; V3F64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; V3F64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; V3F64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; V3F64-NEXT:    ret
+  %b = call <N x TY> @llvm.trunc(<N x TY> %a)
+  ret <N x TY> %b
+}
+
+define <N x TY> @ceil(<N x TY> %a) {
+; V8F16ZVFH-LABEL: ceil:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI38_0)
+; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI38_0)(a0)
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfabs.v v9, v8
+; V8F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
+; V8F16ZVFH-NEXT:    fsrmi a0, 3
+; V8F16ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V8F16ZVFH-NEXT:    fsrm a0
+; V8F16ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V8F16ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; V8F16ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: ceil:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfabs.v v8, v10
+; V8F16ZVFHMIN-NEXT:    lui a0, 307200
+; V8F16ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; V8F16ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
+; V8F16ZVFHMIN-NEXT:    fsrmi a0, 3
+; V8F16ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; V8F16ZVFHMIN-NEXT:    fsrm a0
+; V8F16ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V8F16ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: ceil:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI38_0)
+; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI38_0)(a0)
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfabs.v v9, v8
+; V6F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
+; V6F16ZVFH-NEXT:    fsrmi a0, 3
+; V6F16ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V6F16ZVFH-NEXT:    fsrm a0
+; V6F16ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V6F16ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; V6F16ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: ceil:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfabs.v v8, v10
+; V6F16ZVFHMIN-NEXT:    lui a0, 307200
+; V6F16ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; V6F16ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
+; V6F16ZVFHMIN-NEXT:    fsrmi a0, 3
+; V6F16ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; V6F16ZVFHMIN-NEXT:    fsrm a0
+; V6F16ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V6F16ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: ceil:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfabs.v v9, v8
+; V4F32-NEXT:    lui a0, 307200
+; V4F32-NEXT:    fmv.w.x fa5, a0
+; V4F32-NEXT:    vmflt.vf v0, v9, fa5
+; V4F32-NEXT:    fsrmi a0, 3
+; V4F32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V4F32-NEXT:    fsrm a0
+; V4F32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V4F32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; V4F32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: ceil:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfabs.v v10, v8
+; V6F32-NEXT:    lui a0, 307200
+; V6F32-NEXT:    fmv.w.x fa5, a0
+; V6F32-NEXT:    vmflt.vf v0, v10, fa5
+; V6F32-NEXT:    fsrmi a0, 3
+; V6F32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; V6F32-NEXT:    fsrm a0
+; V6F32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; V6F32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V6F32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: ceil:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    lui a0, %hi(.LCPI38_0)
+; V2F64-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfabs.v v9, v8
+; V2F64-NEXT:    vmflt.vf v0, v9, fa5
+; V2F64-NEXT:    fsrmi a0, 3
+; V2F64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V2F64-NEXT:    fsrm a0
+; V2F64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V2F64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; V2F64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: ceil:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    lui a0, %hi(.LCPI38_0)
+; V3F64-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfabs.v v10, v8
+; V3F64-NEXT:    vmflt.vf v0, v10, fa5
+; V3F64-NEXT:    fsrmi a0, 3
+; V3F64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; V3F64-NEXT:    fsrm a0
+; V3F64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; V3F64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; V3F64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; V3F64-NEXT:    ret
+  %b = call <N x TY> @llvm.ceil(<N x TY> %a)
+  ret <N x TY> %b
+}
+
+define <N x TY> @floor(<N x TY> %a) {
+; V8F16ZVFH-LABEL: floor:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI39_0)
+; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI39_0)(a0)
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfabs.v v9, v8
+; V8F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
+; V8F16ZVFH-NEXT:    fsrmi a0, 2
+; V8F16ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V8F16ZVFH-NEXT:    fsrm a0
+; V8F16ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V8F16ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; V8F16ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: floor:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfabs.v v8, v10
+; V8F16ZVFHMIN-NEXT:    lui a0, 307200
+; V8F16ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; V8F16ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
+; V8F16ZVFHMIN-NEXT:    fsrmi a0, 2
+; V8F16ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; V8F16ZVFHMIN-NEXT:    fsrm a0
+; V8F16ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V8F16ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: floor:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI39_0)
+; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI39_0)(a0)
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfabs.v v9, v8
+; V6F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
+; V6F16ZVFH-NEXT:    fsrmi a0, 2
+; V6F16ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V6F16ZVFH-NEXT:    fsrm a0
+; V6F16ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V6F16ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; V6F16ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: floor:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfabs.v v8, v10
+; V6F16ZVFHMIN-NEXT:    lui a0, 307200
+; V6F16ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; V6F16ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
+; V6F16ZVFHMIN-NEXT:    fsrmi a0, 2
+; V6F16ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; V6F16ZVFHMIN-NEXT:    fsrm a0
+; V6F16ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V6F16ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: floor:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfabs.v v9, v8
+; V4F32-NEXT:    lui a0, 307200
+; V4F32-NEXT:    fmv.w.x fa5, a0
+; V4F32-NEXT:    vmflt.vf v0, v9, fa5
+; V4F32-NEXT:    fsrmi a0, 2
+; V4F32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V4F32-NEXT:    fsrm a0
+; V4F32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V4F32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; V4F32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: floor:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfabs.v v10, v8
+; V6F32-NEXT:    lui a0, 307200
+; V6F32-NEXT:    fmv.w.x fa5, a0
+; V6F32-NEXT:    vmflt.vf v0, v10, fa5
+; V6F32-NEXT:    fsrmi a0, 2
+; V6F32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; V6F32-NEXT:    fsrm a0
+; V6F32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; V6F32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V6F32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: floor:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    lui a0, %hi(.LCPI39_0)
+; V2F64-NEXT:    fld fa5, %lo(.LCPI39_0)(a0)
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfabs.v v9, v8
+; V2F64-NEXT:    vmflt.vf v0, v9, fa5
+; V2F64-NEXT:    fsrmi a0, 2
+; V2F64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V2F64-NEXT:    fsrm a0
+; V2F64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V2F64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; V2F64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: floor:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    lui a0, %hi(.LCPI39_0)
+; V3F64-NEXT:    fld fa5, %lo(.LCPI39_0)(a0)
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfabs.v v10, v8
+; V3F64-NEXT:    vmflt.vf v0, v10, fa5
+; V3F64-NEXT:    fsrmi a0, 2
+; V3F64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; V3F64-NEXT:    fsrm a0
+; V3F64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; V3F64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; V3F64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; V3F64-NEXT:    ret
+  %b = call <N x TY> @llvm.floor(<N x TY> %a)
+  ret <N x TY> %b
+}
+
+define <N x TY> @round(<N x TY> %a) {
+; V8F16ZVFH-LABEL: round:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI40_0)
+; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI40_0)(a0)
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfabs.v v9, v8
+; V8F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
+; V8F16ZVFH-NEXT:    fsrmi a0, 4
+; V8F16ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V8F16ZVFH-NEXT:    fsrm a0
+; V8F16ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V8F16ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; V8F16ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: round:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfabs.v v8, v10
+; V8F16ZVFHMIN-NEXT:    lui a0, 307200
+; V8F16ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; V8F16ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
+; V8F16ZVFHMIN-NEXT:    fsrmi a0, 4
+; V8F16ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; V8F16ZVFHMIN-NEXT:    fsrm a0
+; V8F16ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V8F16ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: round:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI40_0)
+; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI40_0)(a0)
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfabs.v v9, v8
+; V6F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
+; V6F16ZVFH-NEXT:    fsrmi a0, 4
+; V6F16ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V6F16ZVFH-NEXT:    fsrm a0
+; V6F16ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V6F16ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; V6F16ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: round:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfabs.v v8, v10
+; V6F16ZVFHMIN-NEXT:    lui a0, 307200
+; V6F16ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; V6F16ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
+; V6F16ZVFHMIN-NEXT:    fsrmi a0, 4
+; V6F16ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; V6F16ZVFHMIN-NEXT:    fsrm a0
+; V6F16ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V6F16ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: round:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfabs.v v9, v8
+; V4F32-NEXT:    lui a0, 307200
+; V4F32-NEXT:    fmv.w.x fa5, a0
+; V4F32-NEXT:    vmflt.vf v0, v9, fa5
+; V4F32-NEXT:    fsrmi a0, 4
+; V4F32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V4F32-NEXT:    fsrm a0
+; V4F32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V4F32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; V4F32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: round:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfabs.v v10, v8
+; V6F32-NEXT:    lui a0, 307200
+; V6F32-NEXT:    fmv.w.x fa5, a0
+; V6F32-NEXT:    vmflt.vf v0, v10, fa5
+; V6F32-NEXT:    fsrmi a0, 4
+; V6F32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; V6F32-NEXT:    fsrm a0
+; V6F32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; V6F32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V6F32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: round:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    lui a0, %hi(.LCPI40_0)
+; V2F64-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfabs.v v9, v8
+; V2F64-NEXT:    vmflt.vf v0, v9, fa5
+; V2F64-NEXT:    fsrmi a0, 4
+; V2F64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V2F64-NEXT:    fsrm a0
+; V2F64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V2F64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; V2F64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: round:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    lui a0, %hi(.LCPI40_0)
+; V3F64-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfabs.v v10, v8
+; V3F64-NEXT:    vmflt.vf v0, v10, fa5
+; V3F64-NEXT:    fsrmi a0, 4
+; V3F64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; V3F64-NEXT:    fsrm a0
+; V3F64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; V3F64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; V3F64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; V3F64-NEXT:    ret
+  %b = call <N x TY> @llvm.round(<N x TY> %a)
+  ret <N x TY> %b
+}
+
+define <N x TY> @roundeven(<N x TY> %a) {
+  %b = call <N x TY> @llvm.roundeven(<N x TY> %a)
+  ret <N x TY> %b
+}
+
+define <N x TY> @rint(<N x TY> %a) {
+; V8F16ZVFH-LABEL: rint:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI41_0)
+; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI41_0)(a0)
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfabs.v v9, v8
+; V8F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
+; V8F16ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V8F16ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V8F16ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; V8F16ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: rint:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfabs.v v8, v10
+; V8F16ZVFHMIN-NEXT:    lui a0, 307200
+; V8F16ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; V8F16ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
+; V8F16ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; V8F16ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V8F16ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: rint:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI41_0)
+; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI41_0)(a0)
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfabs.v v9, v8
+; V6F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
+; V6F16ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V6F16ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V6F16ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; V6F16ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: rint:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfabs.v v8, v10
+; V6F16ZVFHMIN-NEXT:    lui a0, 307200
+; V6F16ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; V6F16ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
+; V6F16ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; V6F16ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V6F16ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: rint:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfabs.v v9, v8
+; V4F32-NEXT:    lui a0, 307200
+; V4F32-NEXT:    fmv.w.x fa5, a0
+; V4F32-NEXT:    vmflt.vf v0, v9, fa5
+; V4F32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V4F32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V4F32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; V4F32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: rint:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfabs.v v10, v8
+; V6F32-NEXT:    lui a0, 307200
+; V6F32-NEXT:    fmv.w.x fa5, a0
+; V6F32-NEXT:    vmflt.vf v0, v10, fa5
+; V6F32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; V6F32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; V6F32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V6F32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: rint:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    lui a0, %hi(.LCPI41_0)
+; V2F64-NEXT:    fld fa5, %lo(.LCPI41_0)(a0)
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfabs.v v9, v8
+; V2F64-NEXT:    vmflt.vf v0, v9, fa5
+; V2F64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V2F64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V2F64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; V2F64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: rint:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    lui a0, %hi(.LCPI41_0)
+; V3F64-NEXT:    fld fa5, %lo(.LCPI41_0)(a0)
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfabs.v v10, v8
+; V3F64-NEXT:    vmflt.vf v0, v10, fa5
+; V3F64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; V3F64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; V3F64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; V3F64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; V3F64-NEXT:    ret
+  %b = call <N x TY> @llvm.rint(<N x TY> %a)
+  ret <N x TY> %b
+}
+
+define <N x TY> @nearbyint(<N x TY> %a) {
+; V8F16ZVFH-LABEL: nearbyint:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI42_0)
+; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI42_0)(a0)
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfabs.v v9, v8
+; V8F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
+; V8F16ZVFH-NEXT:    frflags a0
+; V8F16ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V8F16ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V8F16ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; V8F16ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V8F16ZVFH-NEXT:    fsflags a0
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: nearbyint:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfabs.v v8, v10
+; V8F16ZVFHMIN-NEXT:    lui a0, 307200
+; V8F16ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; V8F16ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
+; V8F16ZVFHMIN-NEXT:    frflags a0
+; V8F16ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; V8F16ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V8F16ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    fsflags a0
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: nearbyint:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI42_0)
+; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI42_0)(a0)
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfabs.v v9, v8
+; V6F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
+; V6F16ZVFH-NEXT:    frflags a0
+; V6F16ZVFH-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V6F16ZVFH-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V6F16ZVFH-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
+; V6F16ZVFH-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V6F16ZVFH-NEXT:    fsflags a0
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: nearbyint:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfabs.v v8, v10
+; V6F16ZVFHMIN-NEXT:    lui a0, 307200
+; V6F16ZVFHMIN-NEXT:    fmv.w.x fa5, a0
+; V6F16ZVFHMIN-NEXT:    vmflt.vf v0, v8, fa5
+; V6F16ZVFHMIN-NEXT:    frflags a0
+; V6F16ZVFHMIN-NEXT:    vfcvt.x.f.v v8, v10, v0.t
+; V6F16ZVFHMIN-NEXT:    vfcvt.f.x.v v8, v8, v0.t
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V6F16ZVFHMIN-NEXT:    vfsgnj.vv v10, v8, v10, v0.t
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    fsflags a0
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: nearbyint:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfabs.v v9, v8
+; V4F32-NEXT:    lui a0, 307200
+; V4F32-NEXT:    fmv.w.x fa5, a0
+; V4F32-NEXT:    vmflt.vf v0, v9, fa5
+; V4F32-NEXT:    frflags a0
+; V4F32-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V4F32-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V4F32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; V4F32-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V4F32-NEXT:    fsflags a0
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: nearbyint:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfabs.v v10, v8
+; V6F32-NEXT:    lui a0, 307200
+; V6F32-NEXT:    fmv.w.x fa5, a0
+; V6F32-NEXT:    vmflt.vf v0, v10, fa5
+; V6F32-NEXT:    frflags a0
+; V6F32-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; V6F32-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; V6F32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; V6F32-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; V6F32-NEXT:    fsflags a0
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: nearbyint:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    lui a0, %hi(.LCPI42_0)
+; V2F64-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfabs.v v9, v8
+; V2F64-NEXT:    vmflt.vf v0, v9, fa5
+; V2F64-NEXT:    frflags a0
+; V2F64-NEXT:    vfcvt.x.f.v v9, v8, v0.t
+; V2F64-NEXT:    vfcvt.f.x.v v9, v9, v0.t
+; V2F64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; V2F64-NEXT:    vfsgnj.vv v8, v9, v8, v0.t
+; V2F64-NEXT:    fsflags a0
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: nearbyint:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    lui a0, %hi(.LCPI42_0)
+; V3F64-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfabs.v v10, v8
+; V3F64-NEXT:    vmflt.vf v0, v10, fa5
+; V3F64-NEXT:    frflags a0
+; V3F64-NEXT:    vfcvt.x.f.v v10, v8, v0.t
+; V3F64-NEXT:    vfcvt.f.x.v v10, v10, v0.t
+; V3F64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; V3F64-NEXT:    vfsgnj.vv v8, v10, v8, v0.t
+; V3F64-NEXT:    fsflags a0
+; V3F64-NEXT:    ret
+  %b = call <N x TY> @llvm.nearbyint(<N x TY> %a)
+  ret <N x TY> %b
+}
+
+define <N x TY> @fmuladd_vfmacc_vv(<N x TY> %a, <N x TY> %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: fmuladd_vfmacc_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmacc.vv v8, v10, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmuladd_vfmacc_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v10, v14, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfadd.vv v10, v10, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmuladd_vfmacc_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmacc.vv v8, v10, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmuladd_vfmacc_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v10, v14, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfadd.vv v10, v10, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmuladd_vfmacc_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmacc.vv v8, v10, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmuladd_vfmacc_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmacc.vv v8, v12, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmuladd_vfmacc_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmacc.vv v8, v10, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmuladd_vfmacc_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmacc.vv v8, v12, v10
+; V3F64-NEXT:    ret
+  %d = call <N x TY> @llvm.fmuladd(<N x TY> %c, <N x TY> %b, <N x TY> %a)
+  ret <N x TY> %d
+}
+
+define <N x TY> @fmuladd_vfmacc_vf(<N x TY> %a, TY %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: fmuladd_vfmacc_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmacc.vf v8, fa0, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmuladd_vfmacc_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v14
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfadd.vv v10, v10, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmuladd_vfmacc_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmacc.vf v8, fa0, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmuladd_vfmacc_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v14
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfadd.vv v10, v10, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmuladd_vfmacc_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmacc.vf v8, fa0, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmuladd_vfmacc_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmacc.vf v8, fa0, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmuladd_vfmacc_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmacc.vf v8, fa0, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmuladd_vfmacc_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmacc.vf v8, fa0, v10
+; V3F64-NEXT:    ret
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %d = call <N x TY> @llvm.fmuladd(<N x TY> %c, <N x TY> %b.splat, <N x TY> %a)
+  ret <N x TY> %d
+}
+
+define <N x TY> @fmuladd_vfnmacc_vv(<N x TY> %a, <N x TY> %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: fmuladd_vfnmacc_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfnmacc.vv v8, v10, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmuladd_vfnmacc_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    lui a0, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfsub.vv v10, v10, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmuladd_vfnmacc_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfnmacc.vv v8, v10, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmuladd_vfnmacc_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    lui a0, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v10, v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfsub.vv v10, v10, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmuladd_vfnmacc_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfnmacc.vv v8, v10, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmuladd_vfnmacc_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfnmacc.vv v8, v12, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmuladd_vfnmacc_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfnmacc.vv v8, v10, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmuladd_vfnmacc_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfnmacc.vv v8, v12, v10
+; V3F64-NEXT:    ret
+  %a.neg = fneg <N x TY> %a
+  %c.neg = fneg <N x TY> %c
+  %d = call <N x TY> @llvm.fmuladd(<N x TY> %c.neg, <N x TY> %b, <N x TY> %a.neg)
+  ret <N x TY> %d
+}
+
+define <N x TY> @fmuladd_vfnmacc_vf(<N x TY> %a, TY %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: fmuladd_vfnmacc_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfnmacc.vf v8, fa0, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmuladd_vfnmacc_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    lui a1, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v14
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfsub.vv v10, v10, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmuladd_vfnmacc_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfnmacc.vf v8, fa0, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmuladd_vfnmacc_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    lui a1, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v14
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfsub.vv v10, v10, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmuladd_vfnmacc_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfnmacc.vf v8, fa0, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmuladd_vfnmacc_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfnmacc.vf v8, fa0, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmuladd_vfnmacc_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfnmacc.vf v8, fa0, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmuladd_vfnmacc_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfnmacc.vf v8, fa0, v10
+; V3F64-NEXT:    ret
+  %a.neg = fneg <N x TY> %a
+  %c.neg = fneg <N x TY> %c
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %d = call <N x TY> @llvm.fmuladd(<N x TY> %c.neg, <N x TY> %b.splat, <N x TY> %a.neg)
+  ret <N x TY> %d
+}
+
+define <N x TY> @fmuladd_vfmsac_vv(<N x TY> %a, <N x TY> %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: fmuladd_vfmsac_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmsac.vv v8, v10, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmuladd_vfmsac_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v10, v14, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfsub.vv v10, v10, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmuladd_vfmsac_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmsac.vv v8, v10, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmuladd_vfmsac_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v10, v14, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfsub.vv v10, v10, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmuladd_vfmsac_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmsac.vv v8, v10, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmuladd_vfmsac_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmsac.vv v8, v12, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmuladd_vfmsac_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmsac.vv v8, v10, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmuladd_vfmsac_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmsac.vv v8, v12, v10
+; V3F64-NEXT:    ret
+  %a.neg = fneg <N x TY> %a
+  %d = call <N x TY> @llvm.fmuladd(<N x TY> %c, <N x TY> %b, <N x TY> %a.neg)
+  ret <N x TY> %d
+}
+
+define <N x TY> @fmuladd_vfmsac_vf(<N x TY> %a, TY %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: fmuladd_vfmsac_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmsac.vf v8, fa0, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmuladd_vfmsac_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v14
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfsub.vv v10, v10, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmuladd_vfmsac_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmsac.vf v8, fa0, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmuladd_vfmsac_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v14
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfsub.vv v10, v10, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmuladd_vfmsac_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmsac.vf v8, fa0, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmuladd_vfmsac_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmsac.vf v8, fa0, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmuladd_vfmsac_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmsac.vf v8, fa0, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmuladd_vfmsac_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmsac.vf v8, fa0, v10
+; V3F64-NEXT:    ret
+  %a.neg = fneg <N x TY> %a
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %d = call <N x TY> @llvm.fmuladd(<N x TY> %c, <N x TY> %b.splat, <N x TY> %a.neg)
+  ret <N x TY> %d
+}
+
+define <N x TY> @fmuladd_vfnmsac_vv(<N x TY> %a, <N x TY> %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: fmuladd_vfnmsac_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfnmsac.vv v8, v10, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmuladd_vfnmsac_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v10, v14, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfsub.vv v10, v12, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmuladd_vfnmsac_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfnmsac.vv v8, v10, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmuladd_vfnmsac_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v10, v14, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfsub.vv v10, v12, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmuladd_vfnmsac_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfnmsac.vv v8, v10, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmuladd_vfnmsac_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfnmsac.vv v8, v12, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmuladd_vfnmsac_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfnmsac.vv v8, v10, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmuladd_vfnmsac_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfnmsac.vv v8, v12, v10
+; V3F64-NEXT:    ret
+  %c.neg = fneg <N x TY> %c
+  %d = call <N x TY> @llvm.fmuladd(<N x TY> %c.neg, <N x TY> %b, <N x TY> %a)
+  ret <N x TY> %d
+}
+
+define <N x TY> @fmuladd_vfnmsac_vf(<N x TY> %a, TY %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: fmuladd_vfnmsac_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfnmsac.vf v8, fa0, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmuladd_vfnmsac_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v14
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfsub.vv v10, v12, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmuladd_vfnmsac_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfnmsac.vf v8, fa0, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmuladd_vfnmsac_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v14
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfsub.vv v10, v12, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmuladd_vfnmsac_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfnmsac.vf v8, fa0, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmuladd_vfnmsac_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfnmsac.vf v8, fa0, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmuladd_vfnmsac_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfnmsac.vf v8, fa0, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmuladd_vfnmsac_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfnmsac.vf v8, fa0, v10
+; V3F64-NEXT:    ret
+  %c.neg = fneg <N x TY> %c
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %d = call <N x TY> @llvm.fmuladd(<N x TY> %c.neg, <N x TY> %b.splat, <N x TY> %a)
+  ret <N x TY> %d
+}
+
+define <N x TY> @fmuladd_vfmadd_vv(<N x TY> %a, <N x TY> %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: fmuladd_vfmadd_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmadd.vv v8, v9, v10
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmuladd_vfmadd_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfadd.vv v10, v8, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmuladd_vfmadd_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmadd.vv v8, v9, v10
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmuladd_vfmadd_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfadd.vv v10, v8, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmuladd_vfmadd_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmadd.vv v8, v9, v10
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmuladd_vfmadd_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmadd.vv v8, v10, v12
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmuladd_vfmadd_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmadd.vv v8, v9, v10
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmuladd_vfmadd_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmadd.vv v8, v10, v12
+; V3F64-NEXT:    ret
+  %d = call <N x TY> @llvm.fmuladd(<N x TY> %a, <N x TY> %b, <N x TY> %c)
+  ret <N x TY> %d
+}
+
+define <N x TY> @fmuladd_vfmadd_vf(<N x TY> %a, TY %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: fmuladd_vfmadd_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmadd.vf v8, fa0, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmuladd_vfmadd_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v14
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfadd.vv v10, v10, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmuladd_vfmadd_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmadd.vf v8, fa0, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmuladd_vfmadd_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v14
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfadd.vv v10, v10, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmuladd_vfmadd_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmadd.vf v8, fa0, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmuladd_vfmadd_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmadd.vf v8, fa0, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmuladd_vfmadd_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmadd.vf v8, fa0, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmuladd_vfmadd_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmadd.vf v8, fa0, v10
+; V3F64-NEXT:    ret
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %d = call <N x TY> @llvm.fmuladd(<N x TY> %a, <N x TY> %b.splat, <N x TY> %c)
+  ret <N x TY> %d
+}
+
+define <N x TY> @fmuladd_vfnmadd_vv(<N x TY> %a, <N x TY> %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: fmuladd_vfnmadd_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfnmadd.vv v8, v9, v10
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmuladd_vfnmadd_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    lui a0, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v8, v12, v14
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfsub.vv v10, v8, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmuladd_vfnmadd_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfnmadd.vv v8, v9, v10
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmuladd_vfnmadd_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    lui a0, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v8, v12, v14
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfsub.vv v10, v8, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmuladd_vfnmadd_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfnmadd.vv v8, v9, v10
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmuladd_vfnmadd_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfnmadd.vv v8, v10, v12
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmuladd_vfnmadd_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfnmadd.vv v8, v9, v10
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmuladd_vfnmadd_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfnmadd.vv v8, v10, v12
+; V3F64-NEXT:    ret
+  %a.neg = fneg <N x TY> %a
+  %c.neg = fneg <N x TY> %c
+  %d = call <N x TY> @llvm.fmuladd(<N x TY> %a.neg, <N x TY> %b, <N x TY> %c.neg)
+  ret <N x TY> %d
+}
+
+define <N x TY> @fmuladd_vfnmadd_vf(<N x TY> %a, TY %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: fmuladd_vfnmadd_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfnmadd.vf v8, fa0, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmuladd_vfnmadd_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    lui a1, 8
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v14
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfsub.vv v10, v10, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmuladd_vfnmadd_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfnmadd.vf v8, fa0, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmuladd_vfnmadd_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    lui a1, 8
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v14
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfsub.vv v10, v10, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmuladd_vfnmadd_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfnmadd.vf v8, fa0, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmuladd_vfnmadd_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfnmadd.vf v8, fa0, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmuladd_vfnmadd_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfnmadd.vf v8, fa0, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmuladd_vfnmadd_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfnmadd.vf v8, fa0, v10
+; V3F64-NEXT:    ret
+  %a.neg = fneg <N x TY> %a
+  %c.neg = fneg <N x TY> %c
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %d = call <N x TY> @llvm.fmuladd(<N x TY> %a.neg, <N x TY> %b.splat, <N x TY> %c.neg)
+  ret <N x TY> %d
+}
+
+define <N x TY> @fmuladd_vfmsub_vv(<N x TY> %a, <N x TY> %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: fmuladd_vfmsub_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmsub.vv v8, v9, v10
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmuladd_vfmsub_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfsub.vv v10, v8, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmuladd_vfmsub_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmsub.vv v8, v9, v10
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmuladd_vfmsub_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfsub.vv v10, v8, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmuladd_vfmsub_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmsub.vv v8, v9, v10
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmuladd_vfmsub_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmsub.vv v8, v10, v12
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmuladd_vfmsub_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmsub.vv v8, v9, v10
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmuladd_vfmsub_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmsub.vv v8, v10, v12
+; V3F64-NEXT:    ret
+  %c.neg = fneg <N x TY> %c
+  %d = call <N x TY> @llvm.fmuladd(<N x TY> %a, <N x TY> %b, <N x TY> %c.neg)
+  ret <N x TY> %d
+}
+
+define <N x TY> @fmuladd_vfmsub_vf(<N x TY> %a, TY %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: fmuladd_vfmsub_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfmsub.vf v8, fa0, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmuladd_vfmsub_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v14
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfsub.vv v10, v10, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmuladd_vfmsub_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfmsub.vf v8, fa0, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmuladd_vfmsub_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v14
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfsub.vv v10, v10, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmuladd_vfmsub_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfmsub.vf v8, fa0, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmuladd_vfmsub_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfmsub.vf v8, fa0, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmuladd_vfmsub_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfmsub.vf v8, fa0, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmuladd_vfmsub_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfmsub.vf v8, fa0, v10
+; V3F64-NEXT:    ret
+  %c.neg = fneg <N x TY> %c
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %d = call <N x TY> @llvm.fmuladd(<N x TY> %a, <N x TY> %b.splat, <N x TY> %c.neg)
+  ret <N x TY> %d
+}
+
+define <N x TY> @fmuladd_vfnmsub_vv(<N x TY> %a, <N x TY> %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: fmuladd_vfnmsub_vv:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfnmsub.vv v8, v9, v10
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmuladd_vfnmsub_vv:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfsub.vv v10, v12, v8
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmuladd_vfnmsub_vv:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfnmsub.vv v8, v9, v10
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmuladd_vfnmsub_vv:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v8, v14, v12
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v11, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfsub.vv v10, v12, v8
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmuladd_vfnmsub_vv:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfnmsub.vv v8, v9, v10
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmuladd_vfnmsub_vv:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfnmsub.vv v8, v10, v12
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmuladd_vfnmsub_vv:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfnmsub.vv v8, v9, v10
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmuladd_vfnmsub_vv:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfnmsub.vv v8, v10, v12
+; V3F64-NEXT:    ret
+  %a.neg = fneg <N x TY> %a
+  %d = call <N x TY> @llvm.fmuladd(<N x TY> %a.neg, <N x TY> %b, <N x TY> %c)
+  ret <N x TY> %d
+}
+
+define <N x TY> @fmuladd_vfnmsub_vf(<N x TY> %a, TY %b, <N x TY> %c) {
+; V8F16ZVFH-LABEL: fmuladd_vfnmsub_vf:
+; V8F16ZVFH:       # %bb.0:
+; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFH-NEXT:    vfnmsub.vf v8, fa0, v9
+; V8F16ZVFH-NEXT:    ret
+;
+; V8F16ZVFHMIN-LABEL: fmuladd_vfnmsub_vf:
+; V8F16ZVFHMIN:       # %bb.0:
+; V8F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V8F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v14
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V8F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfsub.vv v10, v12, v10
+; V8F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V8F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V8F16ZVFHMIN-NEXT:    ret
+;
+; V6F16ZVFH-LABEL: fmuladd_vfnmsub_vf:
+; V6F16ZVFH:       # %bb.0:
+; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFH-NEXT:    vfnmsub.vf v8, fa0, v9
+; V6F16ZVFH-NEXT:    ret
+;
+; V6F16ZVFHMIN-LABEL: fmuladd_vfnmsub_vf:
+; V6F16ZVFHMIN:       # %bb.0:
+; V6F16ZVFHMIN-NEXT:    fmv.x.w a0, fa0
+; V6F16ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vmv.v.x v10, a0
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfmul.vv v10, v12, v14
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8
+; V6F16ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfsub.vv v10, v12, v10
+; V6F16ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
+; V6F16ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v10
+; V6F16ZVFHMIN-NEXT:    ret
+;
+; V4F32-LABEL: fmuladd_vfnmsub_vf:
+; V4F32:       # %bb.0:
+; V4F32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; V4F32-NEXT:    vfnmsub.vf v8, fa0, v9
+; V4F32-NEXT:    ret
+;
+; V6F32-LABEL: fmuladd_vfnmsub_vf:
+; V6F32:       # %bb.0:
+; V6F32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; V6F32-NEXT:    vfnmsub.vf v8, fa0, v10
+; V6F32-NEXT:    ret
+;
+; V2F64-LABEL: fmuladd_vfnmsub_vf:
+; V2F64:       # %bb.0:
+; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; V2F64-NEXT:    vfnmsub.vf v8, fa0, v9
+; V2F64-NEXT:    ret
+;
+; V3F64-LABEL: fmuladd_vfnmsub_vf:
+; V3F64:       # %bb.0:
+; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; V3F64-NEXT:    vfnmsub.vf v8, fa0, v10
+; V3F64-NEXT:    ret
+  %a.neg = fneg <N x TY> %a
+  %b.head = insertelement <N x TY> poison, TY %b, i32 0
+  %b.splat = shufflevector <N x TY> %b.head, <N x TY> poison, <N x i32> zeroinitializer
+  %d = call <N x TY> @llvm.fmuladd(<N x TY> %a.neg, <N x TY> %b.splat, <N x TY> %c)
+  ret <N x TY> %d
 }

>From b74f4fb7093f983ae63fb7cdd6ddce81018577cb Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 8 Oct 2024 11:48:17 +0800
Subject: [PATCH 2/2] Fix labels + remove accidental extra test

---
 .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll     | 101 +++++++++---------
 1 file changed, 48 insertions(+), 53 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index 7151341a342afa..56769f1bb809ba 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -2321,8 +2321,8 @@ define <N x TY> @vfnmsub_vf(<N x TY> %a, TY %b, <N x TY> %c) {
 define <N x TY> @trunc(<N x TY> %a) {
 ; V8F16ZVFH-LABEL: trunc:
 ; V8F16ZVFH:       # %bb.0:
-; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI37_0)
-; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI37_0)(a0)
+; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI34_0)
+; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI34_0)(a0)
 ; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; V8F16ZVFH-NEXT:    vfabs.v v9, v8
 ; V8F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
@@ -2351,8 +2351,8 @@ define <N x TY> @trunc(<N x TY> %a) {
 ;
 ; V6F16ZVFH-LABEL: trunc:
 ; V6F16ZVFH:       # %bb.0:
-; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI37_0)
-; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI37_0)(a0)
+; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI34_0)
+; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI34_0)(a0)
 ; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; V6F16ZVFH-NEXT:    vfabs.v v9, v8
 ; V6F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
@@ -2407,8 +2407,8 @@ define <N x TY> @trunc(<N x TY> %a) {
 ;
 ; V2F64-LABEL: trunc:
 ; V2F64:       # %bb.0:
-; V2F64-NEXT:    lui a0, %hi(.LCPI37_0)
-; V2F64-NEXT:    fld fa5, %lo(.LCPI37_0)(a0)
+; V2F64-NEXT:    lui a0, %hi(.LCPI34_0)
+; V2F64-NEXT:    fld fa5, %lo(.LCPI34_0)(a0)
 ; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; V2F64-NEXT:    vfabs.v v9, v8
 ; V2F64-NEXT:    vmflt.vf v0, v9, fa5
@@ -2420,8 +2420,8 @@ define <N x TY> @trunc(<N x TY> %a) {
 ;
 ; V3F64-LABEL: trunc:
 ; V3F64:       # %bb.0:
-; V3F64-NEXT:    lui a0, %hi(.LCPI37_0)
-; V3F64-NEXT:    fld fa5, %lo(.LCPI37_0)(a0)
+; V3F64-NEXT:    lui a0, %hi(.LCPI34_0)
+; V3F64-NEXT:    fld fa5, %lo(.LCPI34_0)(a0)
 ; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; V3F64-NEXT:    vfabs.v v10, v8
 ; V3F64-NEXT:    vmflt.vf v0, v10, fa5
@@ -2437,8 +2437,8 @@ define <N x TY> @trunc(<N x TY> %a) {
 define <N x TY> @ceil(<N x TY> %a) {
 ; V8F16ZVFH-LABEL: ceil:
 ; V8F16ZVFH:       # %bb.0:
-; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI38_0)
-; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI38_0)(a0)
+; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI35_0)
+; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI35_0)(a0)
 ; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; V8F16ZVFH-NEXT:    vfabs.v v9, v8
 ; V8F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
@@ -2471,8 +2471,8 @@ define <N x TY> @ceil(<N x TY> %a) {
 ;
 ; V6F16ZVFH-LABEL: ceil:
 ; V6F16ZVFH:       # %bb.0:
-; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI38_0)
-; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI38_0)(a0)
+; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI35_0)
+; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI35_0)(a0)
 ; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; V6F16ZVFH-NEXT:    vfabs.v v9, v8
 ; V6F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
@@ -2535,8 +2535,8 @@ define <N x TY> @ceil(<N x TY> %a) {
 ;
 ; V2F64-LABEL: ceil:
 ; V2F64:       # %bb.0:
-; V2F64-NEXT:    lui a0, %hi(.LCPI38_0)
-; V2F64-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
+; V2F64-NEXT:    lui a0, %hi(.LCPI35_0)
+; V2F64-NEXT:    fld fa5, %lo(.LCPI35_0)(a0)
 ; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; V2F64-NEXT:    vfabs.v v9, v8
 ; V2F64-NEXT:    vmflt.vf v0, v9, fa5
@@ -2550,8 +2550,8 @@ define <N x TY> @ceil(<N x TY> %a) {
 ;
 ; V3F64-LABEL: ceil:
 ; V3F64:       # %bb.0:
-; V3F64-NEXT:    lui a0, %hi(.LCPI38_0)
-; V3F64-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
+; V3F64-NEXT:    lui a0, %hi(.LCPI35_0)
+; V3F64-NEXT:    fld fa5, %lo(.LCPI35_0)(a0)
 ; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; V3F64-NEXT:    vfabs.v v10, v8
 ; V3F64-NEXT:    vmflt.vf v0, v10, fa5
@@ -2569,8 +2569,8 @@ define <N x TY> @ceil(<N x TY> %a) {
 define <N x TY> @floor(<N x TY> %a) {
 ; V8F16ZVFH-LABEL: floor:
 ; V8F16ZVFH:       # %bb.0:
-; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI39_0)
-; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI39_0)(a0)
+; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI36_0)
+; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI36_0)(a0)
 ; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; V8F16ZVFH-NEXT:    vfabs.v v9, v8
 ; V8F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
@@ -2603,8 +2603,8 @@ define <N x TY> @floor(<N x TY> %a) {
 ;
 ; V6F16ZVFH-LABEL: floor:
 ; V6F16ZVFH:       # %bb.0:
-; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI39_0)
-; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI39_0)(a0)
+; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI36_0)
+; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI36_0)(a0)
 ; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; V6F16ZVFH-NEXT:    vfabs.v v9, v8
 ; V6F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
@@ -2667,8 +2667,8 @@ define <N x TY> @floor(<N x TY> %a) {
 ;
 ; V2F64-LABEL: floor:
 ; V2F64:       # %bb.0:
-; V2F64-NEXT:    lui a0, %hi(.LCPI39_0)
-; V2F64-NEXT:    fld fa5, %lo(.LCPI39_0)(a0)
+; V2F64-NEXT:    lui a0, %hi(.LCPI36_0)
+; V2F64-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
 ; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; V2F64-NEXT:    vfabs.v v9, v8
 ; V2F64-NEXT:    vmflt.vf v0, v9, fa5
@@ -2682,8 +2682,8 @@ define <N x TY> @floor(<N x TY> %a) {
 ;
 ; V3F64-LABEL: floor:
 ; V3F64:       # %bb.0:
-; V3F64-NEXT:    lui a0, %hi(.LCPI39_0)
-; V3F64-NEXT:    fld fa5, %lo(.LCPI39_0)(a0)
+; V3F64-NEXT:    lui a0, %hi(.LCPI36_0)
+; V3F64-NEXT:    fld fa5, %lo(.LCPI36_0)(a0)
 ; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; V3F64-NEXT:    vfabs.v v10, v8
 ; V3F64-NEXT:    vmflt.vf v0, v10, fa5
@@ -2701,8 +2701,8 @@ define <N x TY> @floor(<N x TY> %a) {
 define <N x TY> @round(<N x TY> %a) {
 ; V8F16ZVFH-LABEL: round:
 ; V8F16ZVFH:       # %bb.0:
-; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI40_0)
-; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI40_0)(a0)
+; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI37_0)
+; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI37_0)(a0)
 ; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; V8F16ZVFH-NEXT:    vfabs.v v9, v8
 ; V8F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
@@ -2735,8 +2735,8 @@ define <N x TY> @round(<N x TY> %a) {
 ;
 ; V6F16ZVFH-LABEL: round:
 ; V6F16ZVFH:       # %bb.0:
-; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI40_0)
-; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI40_0)(a0)
+; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI37_0)
+; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI37_0)(a0)
 ; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; V6F16ZVFH-NEXT:    vfabs.v v9, v8
 ; V6F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
@@ -2799,8 +2799,8 @@ define <N x TY> @round(<N x TY> %a) {
 ;
 ; V2F64-LABEL: round:
 ; V2F64:       # %bb.0:
-; V2F64-NEXT:    lui a0, %hi(.LCPI40_0)
-; V2F64-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
+; V2F64-NEXT:    lui a0, %hi(.LCPI37_0)
+; V2F64-NEXT:    fld fa5, %lo(.LCPI37_0)(a0)
 ; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; V2F64-NEXT:    vfabs.v v9, v8
 ; V2F64-NEXT:    vmflt.vf v0, v9, fa5
@@ -2814,8 +2814,8 @@ define <N x TY> @round(<N x TY> %a) {
 ;
 ; V3F64-LABEL: round:
 ; V3F64:       # %bb.0:
-; V3F64-NEXT:    lui a0, %hi(.LCPI40_0)
-; V3F64-NEXT:    fld fa5, %lo(.LCPI40_0)(a0)
+; V3F64-NEXT:    lui a0, %hi(.LCPI37_0)
+; V3F64-NEXT:    fld fa5, %lo(.LCPI37_0)(a0)
 ; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; V3F64-NEXT:    vfabs.v v10, v8
 ; V3F64-NEXT:    vmflt.vf v0, v10, fa5
@@ -2830,16 +2830,11 @@ define <N x TY> @round(<N x TY> %a) {
   ret <N x TY> %b
 }
 
-define <N x TY> @roundeven(<N x TY> %a) {
-  %b = call <N x TY> @llvm.roundeven(<N x TY> %a)
-  ret <N x TY> %b
-}
-
 define <N x TY> @rint(<N x TY> %a) {
 ; V8F16ZVFH-LABEL: rint:
 ; V8F16ZVFH:       # %bb.0:
-; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI41_0)
-; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI41_0)(a0)
+; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI38_0)
+; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI38_0)(a0)
 ; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; V8F16ZVFH-NEXT:    vfabs.v v9, v8
 ; V8F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
@@ -2868,8 +2863,8 @@ define <N x TY> @rint(<N x TY> %a) {
 ;
 ; V6F16ZVFH-LABEL: rint:
 ; V6F16ZVFH:       # %bb.0:
-; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI41_0)
-; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI41_0)(a0)
+; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI38_0)
+; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI38_0)(a0)
 ; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; V6F16ZVFH-NEXT:    vfabs.v v9, v8
 ; V6F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
@@ -2924,8 +2919,8 @@ define <N x TY> @rint(<N x TY> %a) {
 ;
 ; V2F64-LABEL: rint:
 ; V2F64:       # %bb.0:
-; V2F64-NEXT:    lui a0, %hi(.LCPI41_0)
-; V2F64-NEXT:    fld fa5, %lo(.LCPI41_0)(a0)
+; V2F64-NEXT:    lui a0, %hi(.LCPI38_0)
+; V2F64-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
 ; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; V2F64-NEXT:    vfabs.v v9, v8
 ; V2F64-NEXT:    vmflt.vf v0, v9, fa5
@@ -2937,8 +2932,8 @@ define <N x TY> @rint(<N x TY> %a) {
 ;
 ; V3F64-LABEL: rint:
 ; V3F64:       # %bb.0:
-; V3F64-NEXT:    lui a0, %hi(.LCPI41_0)
-; V3F64-NEXT:    fld fa5, %lo(.LCPI41_0)(a0)
+; V3F64-NEXT:    lui a0, %hi(.LCPI38_0)
+; V3F64-NEXT:    fld fa5, %lo(.LCPI38_0)(a0)
 ; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; V3F64-NEXT:    vfabs.v v10, v8
 ; V3F64-NEXT:    vmflt.vf v0, v10, fa5
@@ -2954,8 +2949,8 @@ define <N x TY> @rint(<N x TY> %a) {
 define <N x TY> @nearbyint(<N x TY> %a) {
 ; V8F16ZVFH-LABEL: nearbyint:
 ; V8F16ZVFH:       # %bb.0:
-; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI42_0)
-; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI42_0)(a0)
+; V8F16ZVFH-NEXT:    lui a0, %hi(.LCPI39_0)
+; V8F16ZVFH-NEXT:    flh fa5, %lo(.LCPI39_0)(a0)
 ; V8F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; V8F16ZVFH-NEXT:    vfabs.v v9, v8
 ; V8F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
@@ -2988,8 +2983,8 @@ define <N x TY> @nearbyint(<N x TY> %a) {
 ;
 ; V6F16ZVFH-LABEL: nearbyint:
 ; V6F16ZVFH:       # %bb.0:
-; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI42_0)
-; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI42_0)(a0)
+; V6F16ZVFH-NEXT:    lui a0, %hi(.LCPI39_0)
+; V6F16ZVFH-NEXT:    flh fa5, %lo(.LCPI39_0)(a0)
 ; V6F16ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
 ; V6F16ZVFH-NEXT:    vfabs.v v9, v8
 ; V6F16ZVFH-NEXT:    vmflt.vf v0, v9, fa5
@@ -3052,8 +3047,8 @@ define <N x TY> @nearbyint(<N x TY> %a) {
 ;
 ; V2F64-LABEL: nearbyint:
 ; V2F64:       # %bb.0:
-; V2F64-NEXT:    lui a0, %hi(.LCPI42_0)
-; V2F64-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
+; V2F64-NEXT:    lui a0, %hi(.LCPI39_0)
+; V2F64-NEXT:    fld fa5, %lo(.LCPI39_0)(a0)
 ; V2F64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; V2F64-NEXT:    vfabs.v v9, v8
 ; V2F64-NEXT:    vmflt.vf v0, v9, fa5
@@ -3067,8 +3062,8 @@ define <N x TY> @nearbyint(<N x TY> %a) {
 ;
 ; V3F64-LABEL: nearbyint:
 ; V3F64:       # %bb.0:
-; V3F64-NEXT:    lui a0, %hi(.LCPI42_0)
-; V3F64-NEXT:    fld fa5, %lo(.LCPI42_0)(a0)
+; V3F64-NEXT:    lui a0, %hi(.LCPI39_0)
+; V3F64-NEXT:    fld fa5, %lo(.LCPI39_0)(a0)
 ; V3F64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; V3F64-NEXT:    vfabs.v v10, v8
 ; V3F64-NEXT:    vmflt.vf v0, v10, fa5



More information about the llvm-commits mailing list