[llvm] [RISCV] Support fixed vector VP_LOAD/STORE for bf16 and f16 without Zvfh. (PR #107297)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 4 12:41:55 PDT 2024
https://github.com/topperc created https://github.com/llvm/llvm-project/pull/107297
This allows odd sized vector load/store to be legalized to a VP_LOAD/STORE using EVL.
I changed the bf16 tests in fixed-vectors-load.ll and fixed-vectors-store.ll to use an illegal type to be consistent with the intent of these files. A legal type is already tested in fixed-vectors-load-store.ll
>From e19eaa210cc8b0420d898b7aef02a105314f7ed3 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 4 Sep 2024 12:30:48 -0700
Subject: [PATCH] [RISCV] Support fixed vector VP_LOAD/STORE for bf16 and f16
without Zvfh.
This allows odd sized vector load/store to be legalized to a
VP_LOAD/STORE using EVL.
I changed the bf16 tests in fixed-vectors-load.ll and fixed-vectors-store.ll
to use an illegal type to be consistent with the intent of these files.
A legal type is already tested in fixed-vectors-load-store.ll
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 7 +-
.../CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 2786 ++++++-----------
.../CodeGen/RISCV/rvv/fixed-vectors-load.ll | 10 +-
.../CodeGen/RISCV/rvv/fixed-vectors-store.ll | 8 +-
4 files changed, 975 insertions(+), 1836 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index bc661c72e5ecca..714129262b9def 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1099,8 +1099,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
// TODO: Could we merge some code with zvfhmin?
- if (Subtarget.hasVInstructionsBF16Minimal()) {
- for (MVT VT : BF16VecVTs) {
+ if (Subtarget.hasVInstructionsBF16Minimal()) { for (MVT VT : BF16VecVTs) {
if (!isTypeLegal(VT))
continue;
setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
@@ -1317,6 +1316,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// FIXME: mload, mstore, mgather, mscatter, vp_load/store,
// vp_stride_load/store, vp_gather/scatter can be hoisted to here.
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+ setOperationAction({ISD::VP_LOAD, ISD::VP_STORE}, VT, Custom);
setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
@@ -1378,8 +1378,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(
{ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
- setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
- ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
+ setOperationAction({ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
ISD::VP_SCATTER},
VT, Custom);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index 56cd718536daa4..d996a9c05aca4d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -49,42 +49,20 @@ define void @fadd_v6f16(ptr %x, ptr %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fadd_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfadd.vv v8, v8, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fadd_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfadd.vv v8, v8, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fadd_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v8, v8, v10
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = fadd <6 x half> %a, %b
@@ -173,42 +151,20 @@ define void @fsub_v6f16(ptr %x, ptr %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fsub_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfsub.vv v8, v8, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fsub_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfsub.vv v8, v8, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fsub_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfsub.vv v8, v8, v10
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = fsub <6 x half> %a, %b
@@ -297,42 +253,20 @@ define void @fmul_v6f16(ptr %x, ptr %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fmul_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v8, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fmul_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v8, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fmul_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmul.vv v8, v8, v10
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = fmul <6 x half> %a, %b
@@ -421,42 +355,20 @@ define void @fdiv_v6f16(ptr %x, ptr %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fdiv_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfdiv.vv v8, v8, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fdiv_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfdiv.vv v8, v8, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fdiv_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v10
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = fdiv <6 x half> %a, %b
@@ -576,115 +488,55 @@ define void @fneg_v6f16(ptr %x) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fneg_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: addi sp, sp, -16
-; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: mv a1, sp
-; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp)
-; ZVFHMIN-RV32-NEXT: flh fa4, 0(sp)
-; ZVFHMIN-RV32-NEXT: flh fa3, 4(sp)
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa4
-; ZVFHMIN-RV32-NEXT: lui a3, 1048568
-; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa3
-; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp)
-; ZVFHMIN-RV32-NEXT: xor a1, a1, a3
-; ZVFHMIN-RV32-NEXT: xor a2, a2, a3
-; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a5, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-RV32-NEXT: xor a4, a4, a3
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV32-NEXT: fmv.x.h a6, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp)
-; ZVFHMIN-RV32-NEXT: xor a5, a5, a3
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a5
-; ZVFHMIN-RV32-NEXT: xor a6, a6, a3
-; ZVFHMIN-RV32-NEXT: fmv.x.h a7, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp)
-; ZVFHMIN-RV32-NEXT: lui t0, 8
-; ZVFHMIN-RV32-NEXT: xor a7, a7, t0
-; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a7
-; ZVFHMIN-RV32-NEXT: fmv.x.h a7, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp)
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a6
-; ZVFHMIN-RV32-NEXT: xor a6, a7, a3
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a6
-; ZVFHMIN-RV32-NEXT: fmv.x.h a6, fa5
-; ZVFHMIN-RV32-NEXT: xor a3, a6, a3
-; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a3
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a5
-; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: addi sp, sp, 16
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fneg_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: addi sp, sp, -16
-; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: mv a1, sp
-; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp)
-; ZVFHMIN-RV64-NEXT: flh fa4, 0(sp)
-; ZVFHMIN-RV64-NEXT: flh fa3, 4(sp)
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa4
-; ZVFHMIN-RV64-NEXT: lui a3, 1048568
-; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa3
-; ZVFHMIN-RV64-NEXT: flh fa5, 6(sp)
-; ZVFHMIN-RV64-NEXT: lui a5, 8
-; ZVFHMIN-RV64-NEXT: xor a2, a2, a5
-; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-RV64-NEXT: xor a1, a1, a3
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-RV64-NEXT: xor a4, a4, a3
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: flh fa5, 8(sp)
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV64-NEXT: xor a2, a2, a3
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp)
-; ZVFHMIN-RV64-NEXT: xor a1, a1, a3
-; ZVFHMIN-RV64-NEXT: xor a2, a2, a5
-; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT: flh fa5, 14(sp)
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-RV64-NEXT: xor a2, a2, a3
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: xor a1, a1, a3
-; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: addi sp, sp, 16
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fneg_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: mv a1, sp
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-NEXT: lui a3, 1048568
+; ZVFHMIN-NEXT: fmv.x.h a4, fa3
+; ZVFHMIN-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-NEXT: lui a5, 8
+; ZVFHMIN-NEXT: xor a2, a2, a5
+; ZVFHMIN-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-NEXT: xor a1, a1, a3
+; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-NEXT: xor a4, a4, a3
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-NEXT: xor a2, a2, a3
+; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-NEXT: xor a1, a1, a3
+; ZVFHMIN-NEXT: xor a2, a2, a5
+; ZVFHMIN-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-NEXT: xor a2, a2, a3
+; ZVFHMIN-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: xor a1, a1, a3
+; ZVFHMIN-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
+; ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = fneg <6 x half> %a
store <6 x half> %b, ptr %x
@@ -851,9 +703,10 @@ define void @fabs_v6f16(ptr %x) {
; ZVFHMIN-RV32: # %bb.0:
; ZVFHMIN-RV32-NEXT: addi sp, sp, -16
; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1)
; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp)
; ZVFHMIN-RV32-NEXT: flh fa4, 0(sp)
@@ -864,44 +717,35 @@ define void @fabs_v6f16(ptr %x) {
; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa3
; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp)
; ZVFHMIN-RV32-NEXT: addi a3, a3, -1
-; ZVFHMIN-RV32-NEXT: and a1, a1, a3
; ZVFHMIN-RV32-NEXT: and a2, a2, a3
-; ZVFHMIN-RV32-NEXT: fmv.x.h a5, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp)
; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-RV32-NEXT: and a1, a1, a3
; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN-RV32-NEXT: and a4, a4, a3
-; ZVFHMIN-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp)
; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV32-NEXT: and a5, a5, a3
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a5
-; ZVFHMIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-RV32-NEXT: and a2, a2, a3
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp)
-; ZVFHMIN-RV32-NEXT: and a6, a6, a3
-; ZVFHMIN-RV32-NEXT: and a7, a7, a3
-; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a7
-; ZVFHMIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-RV32-NEXT: and a1, a1, a3
+; ZVFHMIN-RV32-NEXT: and a2, a2, a3
+; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp)
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a6
-; ZVFHMIN-RV32-NEXT: and a6, a7, a3
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a6
-; ZVFHMIN-RV32-NEXT: fmv.x.h a6, fa5
-; ZVFHMIN-RV32-NEXT: and a3, a6, a3
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV32-NEXT: and a2, a2, a3
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-RV32-NEXT: and a1, a1, a3
; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a3
+; ZVFHMIN-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a5
-; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
; ZVFHMIN-RV32-NEXT: addi sp, sp, 16
; ZVFHMIN-RV32-NEXT: ret
;
@@ -909,9 +753,10 @@ define void @fabs_v6f16(ptr %x) {
; ZVFHMIN-RV64: # %bb.0:
; ZVFHMIN-RV64-NEXT: addi sp, sp, -16
; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
; ZVFHMIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1)
; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp)
; ZVFHMIN-RV64-NEXT: flh fa4, 0(sp)
@@ -948,12 +793,9 @@ define void @fabs_v6f16(ptr %x) {
; ZVFHMIN-RV64-NEXT: and a1, a1, a3
; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15
; ZVFHMIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-RV64-NEXT: vse16.v v9, (a0)
; ZVFHMIN-RV64-NEXT: addi sp, sp, 16
; ZVFHMIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
@@ -1314,10 +1156,11 @@ define void @copysign_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-ZFH-RV32: # %bb.0:
; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0)
; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a1)
@@ -1345,35 +1188,26 @@ define void @copysign_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa4
; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa2, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5
; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 28(sp)
; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 12(sp)
; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a6
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 30(sp)
; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a2
; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a6
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFH-RV32-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
; ZVFHMIN-ZFH-RV32-NEXT: ret
;
@@ -1381,10 +1215,11 @@ define void @copysign_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-ZFH-RV64: # %bb.0:
; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
-; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0)
; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a1)
@@ -1429,12 +1264,9 @@ define void @copysign_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15
; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
; ZVFHMIN-ZFH-RV64-NEXT: ret
;
@@ -1442,93 +1274,85 @@ define void @copysign_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-ZFHIN-RV32: # %bb.0:
; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -32
; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 32
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a1)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 20(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa3
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui a1, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a2, a1
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: lui t1, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, t1, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a2, t1, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a2
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a4, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, t1
; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 26(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t0, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a7, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t2, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, t0, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a5, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a6, a6, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t0, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT: and t0, t2, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, t2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a7, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a7, t0, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT: and t0, t2, t1
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a6, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: and t1, t1, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: or t0, t1, t0
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a5, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a7, t1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, t0
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t1, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT: and t0, t0, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a7, t0, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, t1, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a7, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 32
; ZVFHMIN-ZFHIN-RV32-NEXT: ret
;
@@ -1536,10 +1360,11 @@ define void @copysign_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-ZFHIN-RV64: # %bb.0:
; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -32
; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 32
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0)
; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1)
@@ -1611,12 +1436,9 @@ define void @copysign_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15
; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 32
; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
@@ -1909,9 +1731,10 @@ define void @copysign_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-ZFH-RV32: # %bb.0:
; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -16
; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp)
; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 0(sp)
@@ -1928,36 +1751,27 @@ define void @copysign_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a4, fa4
; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5
; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp)
; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa5, fa0
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a6
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a2
; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp)
; ZVFHMIN-ZFH-RV32-NEXT: fmv.h fa5, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa5, fa4, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFH-RV32-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFH-RV32-NEXT: ret
;
@@ -1965,9 +1779,10 @@ define void @copysign_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-ZFH-RV64: # %bb.0:
; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -16
; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp)
; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 0(sp)
@@ -2002,12 +1817,9 @@ define void @copysign_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15
; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFH-RV64-NEXT: ret
;
@@ -2015,70 +1827,62 @@ define void @copysign_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-ZFHIN-RV32: # %bb.0:
; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -16
; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-ZFHIN-RV32-NEXT: lui a3, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a1, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui a4, 8
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a5, a2, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a4, a4, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a6, a6, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a7, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a7, a7, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a3, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT: and t0, t0, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: or t0, t0, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: and t1, t1, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, t1, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a1
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, t0
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a4, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a4, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFHIN-RV32-NEXT: ret
;
@@ -2086,9 +1890,10 @@ define void @copysign_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-ZFHIN-RV64: # %bb.0:
; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -16
; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa0
@@ -2138,12 +1943,9 @@ define void @copysign_vf_v6f16(ptr %x, half %y) {
; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15
; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
@@ -2523,10 +2325,11 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-ZFH-RV32: # %bb.0:
; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v9, (a0)
; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 30(sp)
; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 28(sp)
@@ -2549,40 +2352,31 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN-ZFH-RV32-NEXT: flh ft1, 6(sp)
; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa0, ft0, fa0
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa0
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa1, ft1, fa1
; ZVFHMIN-ZFH-RV32-NEXT: flh fa0, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a4, fa1
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa1
; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 8(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa2, fa0, fa2
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa2
; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa3, fa1, fa3
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a6, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa3
; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 12(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a6
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 14(sp)
; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa4, fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa2, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFH-RV32-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
; ZVFHMIN-ZFH-RV32-NEXT: ret
;
@@ -2590,10 +2384,11 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-ZFH-RV64: # %bb.0:
; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
-; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v9, (a0)
; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 30(sp)
; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 28(sp)
@@ -2638,12 +2433,9 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15
; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
; ZVFHMIN-ZFH-RV64-NEXT: ret
;
@@ -2651,10 +2443,11 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-ZFHIN-RV32: # %bb.0:
; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -32
; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 32
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 30(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 28(sp)
@@ -2665,87 +2458,78 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 22(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa3
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 16(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: not a1, a1
; ZVFHMIN-ZFHIN-RV32-NEXT: not a2, a2
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 18(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: not a6, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: not t1, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT: not t2, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: not t1, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a6, a6
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: mv a4, sp
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a4)
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a5, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a5)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: not t3, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT: not a5, t0
-; ZVFHMIN-ZFHIN-RV32-NEXT: not a3, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: lui t0, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a4, t0, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT: and t4, a7, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: not a7, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: not t0, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT: not t2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui t3, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a3, t3, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t4, a5, a3
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: lui a7, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a3, t4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: lui a5, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t2, t2, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or t2, t4, t2
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: and t4, t4, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t4, t4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t0, t0, t3
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a5, t4, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: and t4, t4, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: or t0, t4, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, t2
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and t0, t0, a3
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: and t3, t3, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT: or t3, t4, t3
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, t3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: and t4, t4, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a7, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a7, t0, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a7, a3
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: and t2, t2, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT: or t2, t4, t2
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, t2
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a6, a7, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: and t4, t4, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT: and t1, t1, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT: or t1, t4, t1
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h t4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: and t4, t4, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, t1, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a6, a6, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, t3
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a6, t4, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, t1
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a6, a6, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a4, a7, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a4, a4, a3
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a6, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a4, a2
; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a5
; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15
; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, t3
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, t2
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 32
; ZVFHMIN-ZFHIN-RV32-NEXT: ret
;
@@ -2753,10 +2537,11 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-ZFHIN-RV64: # %bb.0:
; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -32
; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 32
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v9, (a0)
; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 30(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 28(sp)
@@ -2836,12 +2621,9 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15
; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 32
; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
@@ -3112,135 +2894,124 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
;
; ZVFHMIN-ZFH-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
; ZVFHMIN-ZFH-RV32: # %bb.0:
-; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
-; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 3, e16, mf4, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vle32.v v9, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 8
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFH-RV32-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 8
; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 10(sp)
; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa5, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 8(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 16(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 12(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 12(sp)
; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa4, fa3
; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa5
; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa2, fa1
; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp)
; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
; ZVFHMIN-ZFH-RV32-NEXT: fsgnjn.h fa5, fa5, fa4
; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa5
; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 24
-; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 28(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 4(a0)
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 3, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFH-RV32-NEXT: ret
;
; ZVFHMIN-ZFH-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
; ZVFHMIN-ZFH-RV64: # %bb.0:
-; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
-; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
-; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vle32.v v8, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vle64.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 3, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vle32.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
-; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v10, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 8
-; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp)
; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa4, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 0(sp)
; ZVFHMIN-ZFH-RV64-NEXT: flh fa3, 8(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa2, 4(sp)
; ZVFHMIN-ZFH-RV64-NEXT: flh fa1, 12(sp)
; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa4, fa3
; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa1, fa2
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa2, fa1
; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 6(sp)
; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 14(sp)
; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.x v8, a2
; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsgnjn.h fa5, fa5, fa4
; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa5
; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 24
-; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 28(sp)
-; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 4(a0)
-; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 3, e16, mf4, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFH-RV64-NEXT: ret
;
; ZVFHMIN-ZFHIN-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
; ZVFHMIN-ZFHIN-RV32: # %bb.0:
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -32
-; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 32
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 3, e16, mf4, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vle32.v v9, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV32-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 8
; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 10(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: lui a2, 8
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: addi a4, a2, -1
; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a4
; ZVFHMIN-ZFHIN-RV32-NEXT: not a3, a3
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: lui a6, 1048568
; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a6
; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a1, a3
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: and a5, a5, a4
; ZVFHMIN-ZFHIN-RV32-NEXT: not a3, a3
; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a3, a2
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: or a2, a5, a2
; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a2
; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: and a3, a3, a4
; ZVFHMIN-ZFHIN-RV32-NEXT: not a1, a1
; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6
; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a3, a1
; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN-ZFHIN-RV32-NEXT: and a2, a2, a4
@@ -3249,75 +3020,66 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
; ZVFHMIN-ZFHIN-RV32-NEXT: and a1, a1, a6
; ZVFHMIN-ZFHIN-RV32-NEXT: or a1, a2, a1
; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 24
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 4(a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 3, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFHIN-RV32-NEXT: ret
;
; ZVFHMIN-ZFHIN-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
; ZVFHMIN-ZFHIN-RV64: # %bb.0:
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -32
-; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 32
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV64-NEXT: vle32.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; ZVFHMIN-ZFHIN-RV64-NEXT: vle64.v v9, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -16
+; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 3, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vle32.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-ZFHIN-RV64-NEXT: vfncvt.f.f.w v10, v8
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 16
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v10, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vfncvt.f.f.w v8, v9
; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a1)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa4, 10(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: not a1, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui a2, 8
; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT: lui a4, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a5, a4, -1
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addiw a4, a2, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a3, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a5, fa5
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: not a3, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a6, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT: and a4, a6, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT: or a3, a4, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: not a1, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: lui a6, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a1, a3
; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 22(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a5, a5, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a3, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a3, a2
; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a2, a5, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.x v8, a2
; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: not a1, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a1, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6
; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a3, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a2, a2, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT: not a1, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT: and a1, a1, a6
; ZVFHMIN-ZFHIN-RV64-NEXT: or a1, a2, a1
; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi a1, sp, 24
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 28(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT: fsh fa5, 4(a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 32
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 3, e16, mf4, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <3 x half>, ptr %x
%b = load <3 x float>, ptr %y
@@ -3385,38 +3147,18 @@ define void @sqrt_v6f16(ptr %x) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: sqrt_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfsqrt.v v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: sqrt_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfsqrt.v v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: sqrt_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfsqrt.v v8, v9
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a)
store <6 x half> %b, ptr %x
@@ -3508,46 +3250,22 @@ define void @fma_v6f16(ptr %x, ptr %y, ptr %z) {
; ZVFH-NEXT: vse16.v v10, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fma_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a2)
-; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: vle16.v v10, (a1)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmadd.vv v9, v8, v11
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fma_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a2)
-; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vle16.v v10, (a1)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmadd.vv v9, v8, v11
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fma_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a2)
+; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = load <6 x half>, ptr %z
@@ -3692,128 +3410,63 @@ define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
; ZVFH-NEXT: vse16.v v10, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fmsub_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: addi sp, sp, -16
-; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a2)
-; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: vle16.v v10, (a1)
-; ZVFHMIN-RV32-NEXT: mv a1, sp
-; ZVFHMIN-RV32-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp)
-; ZVFHMIN-RV32-NEXT: flh fa4, 0(sp)
-; ZVFHMIN-RV32-NEXT: flh fa3, 4(sp)
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa4
-; ZVFHMIN-RV32-NEXT: lui a3, 1048568
-; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa3
-; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp)
-; ZVFHMIN-RV32-NEXT: lui a5, 8
-; ZVFHMIN-RV32-NEXT: xor a2, a2, a5
-; ZVFHMIN-RV32-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-RV32-NEXT: xor a1, a1, a3
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-RV32-NEXT: xor a4, a4, a3
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp)
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV32-NEXT: xor a2, a2, a3
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v8, v8, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp)
-; ZVFHMIN-RV32-NEXT: xor a1, a1, a3
-; ZVFHMIN-RV32-NEXT: xor a2, a2, a5
-; ZVFHMIN-RV32-NEXT: vmv.v.x v11, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp)
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v11, v11, a1
-; ZVFHMIN-RV32-NEXT: xor a2, a2, a3
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v11, v11, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: xor a1, a1, a3
-; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v11, v11, a1
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v11, v8, 4, v0.t
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v11
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmadd.vv v9, v11, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: addi sp, sp, 16
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fmsub_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: addi sp, sp, -16
-; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a2)
-; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vle16.v v10, (a1)
-; ZVFHMIN-RV64-NEXT: mv a1, sp
-; ZVFHMIN-RV64-NEXT: vse16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp)
-; ZVFHMIN-RV64-NEXT: flh fa4, 0(sp)
-; ZVFHMIN-RV64-NEXT: flh fa3, 4(sp)
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa4
-; ZVFHMIN-RV64-NEXT: lui a3, 1048568
-; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa3
-; ZVFHMIN-RV64-NEXT: flh fa5, 6(sp)
-; ZVFHMIN-RV64-NEXT: lui a5, 8
-; ZVFHMIN-RV64-NEXT: xor a2, a2, a5
-; ZVFHMIN-RV64-NEXT: vmv.v.x v8, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-RV64-NEXT: xor a1, a1, a3
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a1
-; ZVFHMIN-RV64-NEXT: xor a4, a4, a3
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: flh fa5, 8(sp)
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV64-NEXT: xor a2, a2, a3
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v8, v8, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp)
-; ZVFHMIN-RV64-NEXT: xor a1, a1, a3
-; ZVFHMIN-RV64-NEXT: xor a2, a2, a5
-; ZVFHMIN-RV64-NEXT: vmv.v.x v11, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT: flh fa5, 14(sp)
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v11, v11, a1
-; ZVFHMIN-RV64-NEXT: xor a2, a2, a3
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v11, v11, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: xor a1, a1, a3
-; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v11, v11, a1
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v11, v8, 4, v0.t
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v11
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmadd.vv v9, v11, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: addi sp, sp, 16
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fmsub_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a2)
+; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a1)
+; ZVFHMIN-NEXT: mv a1, sp
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-NEXT: flh fa4, 0(sp)
+; ZVFHMIN-NEXT: flh fa3, 4(sp)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-NEXT: lui a3, 1048568
+; ZVFHMIN-NEXT: fmv.x.h a4, fa3
+; ZVFHMIN-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-NEXT: lui a5, 8
+; ZVFHMIN-NEXT: xor a2, a2, a5
+; ZVFHMIN-NEXT: vmv.v.x v8, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-NEXT: xor a1, a1, a3
+; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-NEXT: xor a4, a4, a3
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-NEXT: xor a2, a2, a3
+; ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-NEXT: xor a1, a1, a3
+; ZVFHMIN-NEXT: xor a2, a2, a5
+; ZVFHMIN-NEXT: vmv.v.x v11, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-NEXT: vslide1down.vx v11, v11, a1
+; ZVFHMIN-NEXT: xor a2, a2, a3
+; ZVFHMIN-NEXT: vslide1down.vx v11, v11, a2
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: xor a1, a1, a3
+; ZVFHMIN-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-NEXT: vslide1down.vx v11, v11, a1
+; ZVFHMIN-NEXT: vslidedown.vi v11, v8, 4, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmadd.vv v9, v11, v8
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = load <6 x half>, ptr %z
@@ -4565,52 +4218,25 @@ define void @fadd_vf_v6f16(ptr %x, half %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fadd_vf_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: li a2, 192
-; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfadd.vv v8, v9, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fadd_vf_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: li a2, 192
-; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfadd.vv v8, v9, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fadd_vf_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: li a2, 192
+; ZVFHMIN-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v8, v9, v10
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -4699,52 +4325,25 @@ define void @fadd_fv_v6f16(ptr %x, half %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fadd_fv_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: li a2, 192
-; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfadd.vv v8, v10, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fadd_fv_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: li a2, 192
-; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfadd.vv v8, v10, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fadd_fv_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: li a2, 192
+; ZVFHMIN-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v8, v10, v9
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -4833,52 +4432,25 @@ define void @fsub_vf_v6f16(ptr %x, half %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fsub_vf_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: li a2, 192
-; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfsub.vv v8, v9, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fsub_vf_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: li a2, 192
-; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfsub.vv v8, v9, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fsub_vf_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: li a2, 192
+; ZVFHMIN-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfsub.vv v8, v9, v10
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -4967,52 +4539,25 @@ define void @fsub_fv_v6f16(ptr %x, half %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fsub_fv_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: li a2, 192
-; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfsub.vv v8, v10, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fsub_fv_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: li a2, 192
-; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfsub.vv v8, v10, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fsub_fv_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: li a2, 192
+; ZVFHMIN-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfsub.vv v8, v10, v9
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -5101,52 +4646,25 @@ define void @fmul_vf_v6f16(ptr %x, half %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fmul_vf_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: li a2, 192
-; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v9, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fmul_vf_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: li a2, 192
-; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v9, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fmul_vf_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: li a2, 192
+; ZVFHMIN-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmul.vv v8, v9, v10
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -5235,52 +4753,25 @@ define void @fmul_fv_v6f16(ptr %x, half %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fmul_fv_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: li a2, 192
-; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v10, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fmul_fv_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: li a2, 192
-; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v10, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fmul_fv_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: li a2, 192
+; ZVFHMIN-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmul.vv v8, v10, v9
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -5369,52 +4860,25 @@ define void @fdiv_vf_v6f16(ptr %x, half %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fdiv_vf_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: li a2, 192
-; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfdiv.vv v8, v9, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fdiv_vf_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: li a2, 192
-; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfdiv.vv v8, v9, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fdiv_vf_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: li a2, 192
+; ZVFHMIN-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfdiv.vv v8, v9, v10
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -5503,52 +4967,25 @@ define void @fdiv_fv_v6f16(ptr %x, half %y) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fdiv_fv_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: li a2, 192
-; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfdiv.vv v8, v10, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fdiv_fv_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: li a2, 192
-; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a2
-; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a1, v0
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfdiv.vv v8, v10, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fdiv_fv_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: li a2, 192
+; ZVFHMIN-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v9, a2
+; ZVFHMIN-NEXT: vmerge.vxm v9, v9, a1, v0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfdiv.vv v8, v10, v9
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -5642,56 +5079,27 @@ define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) {
; ZVFH-NEXT: vse16.v v9, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fma_vf_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: li a2, 192
-; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV32-NEXT: vmv.v.x v10, a2
-; ZVFHMIN-RV32-NEXT: vmerge.vxm v10, v10, a1, v0
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v10
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmadd.vv v9, v11, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fma_vf_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: li a2, 192
-; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV64-NEXT: vmv.v.x v10, a2
-; ZVFHMIN-RV64-NEXT: vmerge.vxm v10, v10, a1, v0
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v10
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmadd.vv v9, v11, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fma_vf_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: li a2, 192
+; ZVFHMIN-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a2
+; ZVFHMIN-NEXT: vmerge.vxm v10, v10, a1, v0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmadd.vv v9, v11, v10
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = insertelement <6 x half> poison, half %z, i32 0
@@ -5791,56 +5199,27 @@ define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) {
; ZVFH-NEXT: vse16.v v9, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fma_fv_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT: li a2, 192
-; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV32-NEXT: vmv.v.x v10, a2
-; ZVFHMIN-RV32-NEXT: vmerge.vxm v10, v10, a1, v0
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v10
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmadd.vv v9, v11, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fma_fv_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT: li a2, 192
-; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a2
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa0
-; ZVFHMIN-RV64-NEXT: vmv.v.x v10, a2
-; ZVFHMIN-RV64-NEXT: vmerge.vxm v10, v10, a1, v0
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v10
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmadd.vv v9, v11, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fma_fv_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa5
+; ZVFHMIN-NEXT: li a2, 192
+; ZVFHMIN-NEXT: vmv.s.x v0, a2
+; ZVFHMIN-NEXT: fmv.x.h a2, fa0
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vmv.v.x v10, a2
+; ZVFHMIN-NEXT: vmerge.vxm v10, v10, a1, v0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmadd.vv v9, v11, v10
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = insertelement <6 x half> poison, half %z, i32 0
@@ -5982,138 +5361,68 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
; ZVFH-NEXT: vse16.v v9, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fmsub_vf_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: addi sp, sp, -16
-; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
-; ZVFHMIN-RV32-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: mv a1, sp
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: flh fa5, 2(sp)
-; ZVFHMIN-RV32-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 0(sp)
-; ZVFHMIN-RV32-NEXT: fmv.x.h a3, fa5
-; ZVFHMIN-RV32-NEXT: li a4, 192
-; ZVFHMIN-RV32-NEXT: vmv.s.x v0, a4
-; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 4(sp)
-; ZVFHMIN-RV32-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-RV32-NEXT: vmerge.vxm v9, v9, a3, v0
-; ZVFHMIN-RV32-NEXT: lui a1, 1048568
-; ZVFHMIN-RV32-NEXT: fmv.x.h a3, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 6(sp)
-; ZVFHMIN-RV32-NEXT: lui a5, 8
-; ZVFHMIN-RV32-NEXT: xor a4, a4, a5
-; ZVFHMIN-RV32-NEXT: vmv.v.x v10, a4
-; ZVFHMIN-RV32-NEXT: fmv.x.h a4, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-RV32-NEXT: xor a2, a2, a1
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v10, v10, a2
-; ZVFHMIN-RV32-NEXT: xor a3, a3, a1
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 8(sp)
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v10, v10, a3
-; ZVFHMIN-RV32-NEXT: xor a4, a4, a1
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v10, v10, a4
-; ZVFHMIN-RV32-NEXT: fmv.x.h a3, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp)
-; ZVFHMIN-RV32-NEXT: xor a2, a2, a1
-; ZVFHMIN-RV32-NEXT: xor a3, a3, a5
-; ZVFHMIN-RV32-NEXT: vmv.v.x v11, a3
-; ZVFHMIN-RV32-NEXT: fmv.x.h a3, fa5
-; ZVFHMIN-RV32-NEXT: flh fa5, 14(sp)
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v11, v11, a2
-; ZVFHMIN-RV32-NEXT: xor a3, a3, a1
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v11, v11, a3
-; ZVFHMIN-RV32-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT: xor a1, a2, a1
-; ZVFHMIN-RV32-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-RV32-NEXT: vslide1down.vx v11, v11, a1
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v11, v10, 4, v0.t
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v11
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmadd.vv v9, v11, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: addi sp, sp, 16
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fmsub_vf_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: addi sp, sp, -16
-; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
-; ZVFHMIN-RV64-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: mv a1, sp
-; ZVFHMIN-RV64-NEXT: vse16.v v9, (a1)
-; ZVFHMIN-RV64-NEXT: flh fa5, 2(sp)
-; ZVFHMIN-RV64-NEXT: fmv.x.h a1, fa0
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT: flh fa5, 0(sp)
-; ZVFHMIN-RV64-NEXT: fmv.x.h a3, fa5
-; ZVFHMIN-RV64-NEXT: li a4, 192
-; ZVFHMIN-RV64-NEXT: vmv.s.x v0, a4
-; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa5
-; ZVFHMIN-RV64-NEXT: flh fa5, 4(sp)
-; ZVFHMIN-RV64-NEXT: vmv.v.x v9, a1
-; ZVFHMIN-RV64-NEXT: vmerge.vxm v9, v9, a3, v0
-; ZVFHMIN-RV64-NEXT: lui a1, 1048568
-; ZVFHMIN-RV64-NEXT: fmv.x.h a3, fa5
-; ZVFHMIN-RV64-NEXT: flh fa5, 6(sp)
-; ZVFHMIN-RV64-NEXT: lui a5, 8
-; ZVFHMIN-RV64-NEXT: xor a4, a4, a5
-; ZVFHMIN-RV64-NEXT: vmv.v.x v10, a4
-; ZVFHMIN-RV64-NEXT: fmv.x.h a4, fa5
-; ZVFHMIN-RV64-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-RV64-NEXT: xor a2, a2, a1
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v10, v10, a2
-; ZVFHMIN-RV64-NEXT: xor a3, a3, a1
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT: flh fa5, 8(sp)
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v10, v10, a3
-; ZVFHMIN-RV64-NEXT: xor a4, a4, a1
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v10, v10, a4
-; ZVFHMIN-RV64-NEXT: fmv.x.h a3, fa5
-; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp)
-; ZVFHMIN-RV64-NEXT: xor a2, a2, a1
-; ZVFHMIN-RV64-NEXT: xor a3, a3, a5
-; ZVFHMIN-RV64-NEXT: vmv.v.x v11, a3
-; ZVFHMIN-RV64-NEXT: fmv.x.h a3, fa5
-; ZVFHMIN-RV64-NEXT: flh fa5, 14(sp)
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v11, v11, a2
-; ZVFHMIN-RV64-NEXT: xor a3, a3, a1
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v11, v11, a3
-; ZVFHMIN-RV64-NEXT: fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT: xor a1, a2, a1
-; ZVFHMIN-RV64-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-RV64-NEXT: vslide1down.vx v11, v11, a1
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v11, v10, 4, v0.t
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v11
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmadd.vv v9, v11, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: addi sp, sp, 16
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fmsub_vf_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: addi sp, sp, -16
+; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v9, (a1)
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: mv a1, sp
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-NEXT: vse16.v v9, (a1)
+; ZVFHMIN-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-NEXT: fmv.x.h a1, fa0
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-NEXT: li a4, 192
+; ZVFHMIN-NEXT: vmv.s.x v0, a4
+; ZVFHMIN-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-NEXT: vmv.v.x v9, a1
+; ZVFHMIN-NEXT: vmerge.vxm v9, v9, a3, v0
+; ZVFHMIN-NEXT: lui a1, 1048568
+; ZVFHMIN-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-NEXT: lui a5, 8
+; ZVFHMIN-NEXT: xor a4, a4, a5
+; ZVFHMIN-NEXT: vmv.v.x v10, a4
+; ZVFHMIN-NEXT: fmv.x.h a4, fa5
+; ZVFHMIN-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-NEXT: xor a2, a2, a1
+; ZVFHMIN-NEXT: vslide1down.vx v10, v10, a2
+; ZVFHMIN-NEXT: xor a3, a3, a1
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-NEXT: vslide1down.vx v10, v10, a3
+; ZVFHMIN-NEXT: xor a4, a4, a1
+; ZVFHMIN-NEXT: vslide1down.vx v10, v10, a4
+; ZVFHMIN-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-NEXT: xor a2, a2, a1
+; ZVFHMIN-NEXT: xor a3, a3, a5
+; ZVFHMIN-NEXT: vmv.v.x v11, a3
+; ZVFHMIN-NEXT: fmv.x.h a3, fa5
+; ZVFHMIN-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-NEXT: vslide1down.vx v11, v11, a2
+; ZVFHMIN-NEXT: xor a3, a3, a1
+; ZVFHMIN-NEXT: vslide1down.vx v11, v11, a3
+; ZVFHMIN-NEXT: fmv.x.h a2, fa5
+; ZVFHMIN-NEXT: xor a1, a2, a1
+; ZVFHMIN-NEXT: vmv.v.i v0, 15
+; ZVFHMIN-NEXT: vslide1down.vx v11, v11, a1
+; ZVFHMIN-NEXT: vslidedown.vi v11, v10, 4, v0.t
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmadd.vv v9, v11, v10
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: addi sp, sp, 16
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = insertelement <6 x half> poison, half %z, i32 0
@@ -6715,9 +6024,10 @@ define void @trunc_v6f16(ptr %x) {
; ZVFHMIN-ZFH-RV32: # %bb.0:
; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -16
; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 2(sp)
; ZVFHMIN-ZFH-RV32-NEXT: lui a1, %hi(.LCPI116_0)
@@ -6750,79 +6060,69 @@ define void @trunc_v6f16(ptr %x) {
; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_6:
; ZVFHMIN-ZFH-RV32-NEXT: flh fa3, 6(sp)
; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa0, fa3
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a2, fa0, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa1
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a2, .LBB116_8
+; ZVFHMIN-ZFH-RV32-NEXT: flt.h a1, fa0, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa1
+; ZVFHMIN-ZFH-RV32-NEXT: beqz a1, .LBB116_8
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.7:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a2, fa3, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa1, a2, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa3, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa1, a1, rtz
; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa3, fa1, fa3
; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_8:
; ZVFHMIN-ZFH-RV32-NEXT: flh fa1, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa4
; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa4, fa1
; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa4, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a2
; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB116_10
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.9:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a3, fa1, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa4, a3, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a2, fa1, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa4, a2, rtz
; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa1, fa4, fa1
; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_10:
; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 8(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa2
; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa2, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a4, fa2, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa1
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a4, .LBB116_12
+; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa1
+; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB116_12
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.11:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a4, fa4, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa2, a4, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a3, fa4, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa2, a3, rtz
; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa2, fa4
; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_12:
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a2
; ZVFHMIN-ZFH-RV32-NEXT: flh fa2, 12(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a4, fa3
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a6, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a6
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a2, fa3
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v9, a3
; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa4, fa2
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a6, fa4, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a6, .LBB116_14
+; ZVFHMIN-ZFH-RV32-NEXT: flt.h a3, fa4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT: beqz a3, .LBB116_14
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.13:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a5, fa2, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa4, a5, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa2, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa4, a1, rtz
; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa2, fa4, fa2
; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_14:
; ZVFHMIN-ZFH-RV32-NEXT: flh fa4, 14(sp)
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa2
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa2
; ZVFHMIN-ZFH-RV32-NEXT: fabs.h fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT: flt.h a6, fa3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
-; ZVFHMIN-ZFH-RV32-NEXT: beqz a6, .LBB116_16
+; ZVFHMIN-ZFH-RV32-NEXT: flt.h a2, fa3, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT: beqz a2, .LBB116_16
; ZVFHMIN-ZFH-RV32-NEXT: # %bb.15:
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a5, fa4, rtz
-; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa5, a5, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.w.h a1, fa4, rtz
+; ZVFHMIN-ZFH-RV32-NEXT: fcvt.h.w fa5, a1, rtz
; ZVFHMIN-ZFH-RV32-NEXT: fsgnj.h fa4, fa5, fa4
; ZVFHMIN-ZFH-RV32-NEXT: .LBB116_16:
-; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT: fmv.x.h a1, fa4
; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a5
-; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vmv.v.x v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV32-NEXT: vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFH-RV32-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFH-RV32-NEXT: ret
;
@@ -6830,9 +6130,10 @@ define void @trunc_v6f16(ptr %x) {
; ZVFHMIN-ZFH-RV64: # %bb.0:
; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -16
; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFH-RV64-NEXT: flh fa4, 2(sp)
; ZVFHMIN-ZFH-RV64-NEXT: lui a1, %hi(.LCPI116_0)
@@ -6925,13 +6226,9 @@ define void @trunc_v6f16(ptr %x) {
; ZVFHMIN-ZFH-RV64-NEXT: fmv.x.h a1, fa4
; ZVFHMIN-ZFH-RV64-NEXT: vmv.v.i v0, 15
; ZVFHMIN-ZFH-RV64-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFH-RV64-NEXT: ret
;
@@ -6939,9 +6236,10 @@ define void @trunc_v6f16(ptr %x) {
; ZVFHMIN-ZFHIN-RV32: # %bb.0:
; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -16
; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa5
@@ -6980,89 +6278,79 @@ define void @trunc_v6f16(ptr %x) {
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa2, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa0
; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa0, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a2, fa0, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa1
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a2, .LBB116_8
+; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a1, fa0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa1
+; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a1, .LBB116_8
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.7:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a2, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa1, a2, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa1, a1, rtz
; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa1, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_8:
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a2
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa2, fa1
; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a3, fa1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a2, fa1, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa3, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a3, .LBB116_10
+; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a2, .LBB116_10
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.9:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a3, fa2, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa1, a3, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a2, fa2, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa1, a2, rtz
; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa2, fa1, fa2
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_10:
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa1, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa3
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa3, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa2, fa2
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa1
; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa1, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a4, fa1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa2
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a4, .LBB116_12
+; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a3, fa1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a2, fa2
+; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a3, .LBB116_12
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.11:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a4, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa2, a4, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a3, fa4, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa2, a3, rtz
; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa2, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_12:
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a4, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa3
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 12(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a6, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a3, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v9, a3
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa3
; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a6, fa3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a6, .LBB116_14
+; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a3, fa3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a3, .LBB116_14
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.13:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a5, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a5, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a2, fa4, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa3, a2, rtz
; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa3, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_14:
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 14(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a1
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.h fa4, fa3
; ZVFHMIN-ZFHIN-RV32-NEXT: fabs.s fa3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a6, fa3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a6, .LBB116_16
+; ZVFHMIN-ZFHIN-RV32-NEXT: flt.s a2, fa3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: beqz a2, .LBB116_16
; ZVFHMIN-ZFHIN-RV32-NEXT: # %bb.15:
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a5, fa4, rtz
-; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa5, a5, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.w.s a1, fa4, rtz
+; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.s.w fa5, a1, rtz
; ZVFHMIN-ZFHIN-RV32-NEXT: fsgnj.s fa4, fa5, fa4
; ZVFHMIN-ZFHIN-RV32-NEXT: .LBB116_16:
; ZVFHMIN-ZFHIN-RV32-NEXT: fcvt.h.s fa5, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT: fmv.x.h a1, fa5
; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.i v0, 15
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vmv.v.x v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFHIN-RV32-NEXT: ret
;
@@ -7070,9 +6358,10 @@ define void @trunc_v6f16(ptr %x) {
; ZVFHMIN-ZFHIN-RV64: # %bb.0:
; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, -16
; ZVFHMIN-ZFHIN-RV64-NEXT: .cfi_def_cfa_offset 16
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV64-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFHIN-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v8, (a1)
; ZVFHMIN-ZFHIN-RV64-NEXT: flh fa5, 2(sp)
; ZVFHMIN-ZFHIN-RV64-NEXT: fcvt.s.h fa4, fa5
@@ -7181,13 +6470,9 @@ define void @trunc_v6f16(ptr %x) {
; ZVFHMIN-ZFHIN-RV64-NEXT: fmv.x.h a1, fa5
; ZVFHMIN-ZFHIN-RV64-NEXT: vmv.v.i v0, 15
; ZVFHMIN-ZFHIN-RV64-NEXT: vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 6, e16, mf2, ta, mu
; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-ZFHIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFHIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV64-NEXT: vse16.v v9, (a0)
; ZVFHMIN-ZFHIN-RV64-NEXT: addi sp, sp, 16
; ZVFHMIN-ZFHIN-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
@@ -7320,56 +6605,27 @@ define void @ceil_v6f16(ptr %x) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: ceil_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfabs.v v8, v9
-; ZVFHMIN-RV32-NEXT: lui a1, 307200
-; ZVFHMIN-RV32-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-RV32-NEXT: vmflt.vf v0, v8, fa5
-; ZVFHMIN-RV32-NEXT: fsrmi a1, 3
-; ZVFHMIN-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMIN-RV32-NEXT: fsrm a1
-; ZVFHMIN-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMIN-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: ceil_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfabs.v v8, v9
-; ZVFHMIN-RV64-NEXT: lui a1, 307200
-; ZVFHMIN-RV64-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-RV64-NEXT: vmflt.vf v0, v8, fa5
-; ZVFHMIN-RV64-NEXT: fsrmi a1, 3
-; ZVFHMIN-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMIN-RV64-NEXT: fsrm a1
-; ZVFHMIN-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMIN-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: ceil_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a1, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a1, 3
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: fsrm a1
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a)
store <6 x half> %b, ptr %x
@@ -7506,56 +6762,27 @@ define void @floor_v6f16(ptr %x) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: floor_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfabs.v v8, v9
-; ZVFHMIN-RV32-NEXT: lui a1, 307200
-; ZVFHMIN-RV32-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-RV32-NEXT: vmflt.vf v0, v8, fa5
-; ZVFHMIN-RV32-NEXT: fsrmi a1, 2
-; ZVFHMIN-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMIN-RV32-NEXT: fsrm a1
-; ZVFHMIN-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMIN-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: floor_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfabs.v v8, v9
-; ZVFHMIN-RV64-NEXT: lui a1, 307200
-; ZVFHMIN-RV64-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-RV64-NEXT: vmflt.vf v0, v8, fa5
-; ZVFHMIN-RV64-NEXT: fsrmi a1, 2
-; ZVFHMIN-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMIN-RV64-NEXT: fsrm a1
-; ZVFHMIN-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMIN-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: floor_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a1, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a1, 2
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: fsrm a1
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a)
store <6 x half> %b, ptr %x
@@ -7692,56 +6919,27 @@ define void @round_v6f16(ptr %x) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: round_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfabs.v v8, v9
-; ZVFHMIN-RV32-NEXT: lui a1, 307200
-; ZVFHMIN-RV32-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-RV32-NEXT: vmflt.vf v0, v8, fa5
-; ZVFHMIN-RV32-NEXT: fsrmi a1, 4
-; ZVFHMIN-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMIN-RV32-NEXT: fsrm a1
-; ZVFHMIN-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMIN-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: round_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfabs.v v8, v9
-; ZVFHMIN-RV64-NEXT: lui a1, 307200
-; ZVFHMIN-RV64-NEXT: fmv.w.x fa5, a1
-; ZVFHMIN-RV64-NEXT: vmflt.vf v0, v8, fa5
-; ZVFHMIN-RV64-NEXT: fsrmi a1, 4
-; ZVFHMIN-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t
-; ZVFHMIN-RV64-NEXT: fsrm a1
-; ZVFHMIN-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu
-; ZVFHMIN-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: round_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a0)
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfabs.v v8, v9
+; ZVFHMIN-NEXT: lui a1, 307200
+; ZVFHMIN-NEXT: fmv.w.x fa5, a1
+; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
+; ZVFHMIN-NEXT: fsrmi a1, 4
+; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
+; ZVFHMIN-NEXT: fsrm a1
+; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.round.v6f16(<6 x half> %a)
store <6 x half> %b, ptr %x
@@ -8075,56 +7273,27 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
; ZVFH-NEXT: vse16.v v10, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fmuladd_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: vle16.v v10, (a2)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v8, v11
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfadd.vv v8, v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fmuladd_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vle16.v v10, (a2)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v8, v11
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfadd.vv v8, v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fmuladd_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a2)
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmul.vv v8, v8, v11
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = load <6 x half>, ptr %z
@@ -8233,56 +7402,27 @@ define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
; ZVFH-NEXT: vse16.v v10, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-RV32-LABEL: fmsub_fmuladd_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: vle16.v v10, (a2)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v8, v11
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfsub.vv v8, v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fmsub_fmuladd_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vle16.v v10, (a2)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v8, v11
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfsub.vv v8, v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
+; ZVFHMIN-LABEL: fmsub_fmuladd_v6f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-NEXT: vle16.v v9, (a0)
+; ZVFHMIN-NEXT: vle16.v v10, (a2)
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmul.vv v8, v8, v11
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9
+; ZVFHMIN-NEXT: vsetivli zero, 6, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
+; ZVFHMIN-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = load <6 x half>, ptr %z
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
index 19587438ea9475..22cde3c36ef619 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
@@ -182,12 +182,12 @@ define <16 x i64> @exact_vlen_i64_m8(ptr %p) vscale_range(2,2) {
ret <16 x i64> %v
}
-define <8 x bfloat> @load_v8bf16(ptr %p) {
-; CHECK-LABEL: load_v8bf16:
+define <6 x bfloat> @load_v6bf16(ptr %p) {
+; CHECK-LABEL: load_v6bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: ret
- %x = load <8 x bfloat>, ptr %p
- ret <8 x bfloat> %x
+ %x = load <6 x bfloat>, ptr %p
+ ret <6 x bfloat> %x
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
index 7f18ee44631a1e..169d99abb13c22 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
@@ -294,13 +294,13 @@ define void @exact_vlen_i64_m8(ptr %p) vscale_range(2,2) {
ret void
}
-define void @store_v8bf16(ptr %p, <8 x bfloat> %v) {
-; CHECK-LABEL: store_v8bf16:
+define void @store_v6bf16(ptr %p, <6 x bfloat> %v) {
+; CHECK-LABEL: store_v6bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
- store <8 x bfloat> %v, ptr %p
+ store <6 x bfloat> %v, ptr %p
ret void
}
More information about the llvm-commits
mailing list