[llvm] d21a435 - [LegalizeVectorOps][RISCV] Don't scalarize FNEG in ExpandFNEG if FSUB is marked Promote.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 18 18:25:16 PDT 2024


Author: Craig Topper
Date: 2024-09-18T18:19:21-07:00
New Revision: d21a43579e36af4aa90bf541aa8bab33e7500297

URL: https://github.com/llvm/llvm-project/commit/d21a43579e36af4aa90bf541aa8bab33e7500297
DIFF: https://github.com/llvm/llvm-project/commit/d21a43579e36af4aa90bf541aa8bab33e7500297.diff

LOG: [LegalizeVectorOps][RISCV] Don't scalarize FNEG in ExpandFNEG if FSUB is marked Promote.

We have a special check that tries to determine if vector FP
operations are supported for the type to determine whether to
scalarize or not. If FP arithmetic would be promoted, don't unroll.

This improves Zvfhmin codegen on RISC-V.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 3dc5affacc5a76..5d433204d5da08 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1806,7 +1806,7 @@ SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
 
   // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
   if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) ||
-      !(TLI.isOperationLegalOrCustom(ISD::FSUB, VT) || VT.isScalableVector()))
+      !(TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) || VT.isScalableVector()))
     return SDValue();
 
   SDLoc DL(Node);

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index b5c40fbfaac6c9..5ab8eab091c2e4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -428,50 +428,11 @@ define void @fneg_v8f16(ptr %x) {
 ;
 ; ZVFHMIN-LABEL: fneg_v8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    mv a1, sp
-; ZVFHMIN-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-NEXT:    flh fa4, 0(sp)
-; ZVFHMIN-NEXT:    flh fa3, 4(sp)
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-NEXT:    lui a3, 1048568
-; ZVFHMIN-NEXT:    fmv.x.h a4, fa3
-; ZVFHMIN-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-NEXT:    lui a5, 8
-; ZVFHMIN-NEXT:    xor a2, a2, a5
-; ZVFHMIN-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-NEXT:    xor a4, a4, a3
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-NEXT:    xor a2, a2, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    xor a2, a2, a5
-; ZVFHMIN-NEXT:    vmv.v.x v9, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-NEXT:    xor a2, a2, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    vmv.v.i v0, 15
-; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-NEXT:    vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = fneg <8 x half> %a
@@ -490,52 +451,11 @@ define void @fneg_v6f16(ptr %x) {
 ;
 ; ZVFHMIN-LABEL: fneg_v6f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    mv a1, sp
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-NEXT:    flh fa4, 0(sp)
-; ZVFHMIN-NEXT:    flh fa3, 4(sp)
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-NEXT:    lui a3, 1048568
-; ZVFHMIN-NEXT:    fmv.x.h a4, fa3
-; ZVFHMIN-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-NEXT:    lui a5, 8
-; ZVFHMIN-NEXT:    xor a2, a2, a5
-; ZVFHMIN-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-NEXT:    xor a4, a4, a3
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-NEXT:    xor a2, a2, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    xor a2, a2, a5
-; ZVFHMIN-NEXT:    vmv.v.x v9, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-NEXT:    xor a2, a2, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    vmv.v.i v0, 15
-; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, mu
-; ZVFHMIN-NEXT:    vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = fneg <6 x half> %a
@@ -2013,48 +1933,50 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
 ; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    lui a1, 8
+; ZVFHMIN-ZFH-RV32-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 28(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 24(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa3, fa2
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 6(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 22(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa0, 20(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft0, 16(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft1, 18(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
-; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft2, 2(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft3, 0(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h ft1, ft2, ft1
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, ft1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h ft0, ft3, ft0
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, ft0
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft0, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa4, fa4, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa2
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 28(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft1, 6(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa0, ft0, fa0
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa1, ft1, fa1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa0, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 8(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa2, fa0, fa2
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa2
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa3, fa1, fa3
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa3
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 30(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v9, a2
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 14(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa4, fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa4
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
@@ -2070,48 +1992,50 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
 ; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    lui a1, 8
+; ZVFHMIN-ZFH-RV64-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v9, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 28(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 24(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa3, fa2
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 6(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 22(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa0, 20(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft0, 16(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft1, 18(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
-; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft2, 2(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft3, 0(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h ft1, ft2, ft1
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, ft1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h ft0, ft3, ft0
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, ft0
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft0, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa4, fa4, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa2
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 28(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft1, 6(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa0, ft0, fa0
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa1, ft1, fa1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa0, 10(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 8(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa2, fa0, fa2
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa2
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa3, fa1, fa3
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa3
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 30(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v9, a2
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 14(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa4, fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa4
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
@@ -2127,83 +2051,76 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 16
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 28(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 26(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a1, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a2, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a2)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 18(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa4
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a2, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 18(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a4, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not t1, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a6, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a5, sp
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a5)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a7, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not t0, t0
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not t2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 4(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui t3, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a3, t3, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t4, a5, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a5, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t2, t2, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or t2, t4, t2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t4, t4, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t0, t0, t3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or t0, t4, t0
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, t0
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, t2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, a1, -1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t0, t0, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a7, t0, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and t1, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a3, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, t1, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t1, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a6, a7, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a7, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, t0, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, t1, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a6, a6, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, t3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, t2, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, t1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a7, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, t0, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a4, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a4, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
@@ -2219,83 +2136,76 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 16
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 28(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 26(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a1, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a2, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a2, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 18(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa4
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a2, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 18(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a4, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not t1, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a6, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a5, sp
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a5)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a7, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not t0, t0
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not t2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 4(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui t3, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a3, t3, -1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t4, a5, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a5, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t2, t2, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or t2, t4, t2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t4, t4, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t0, t0, t3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or t0, t4, t0
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, t0
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, t2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a2, a1, -1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t0, t0, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a7, t0, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and t1, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a3, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, t1, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t1, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a6, a7, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a7, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, t0, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, t1, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a6, a6, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, t3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, t2, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, t1, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a7, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, t0, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a4, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
@@ -2328,49 +2238,51 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    lui a1, 8
 ; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 28(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 24(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 26(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 22(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa0, 20(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft0, 16(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft1, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft2, 2(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft3, 0(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h ft1, ft2, ft1
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, ft1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h ft0, ft3, ft0
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, ft0
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft0, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa3, fa2
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa4, fa4, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa2
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 28(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft1, 6(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa0, ft0, fa0
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa1, ft1, fa1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa0, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 8(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa2, fa0, fa2
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa2
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa3, fa1, fa3
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa3
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 30(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v9, a2
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 14(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa4, fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa4
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
@@ -2387,49 +2299,51 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    lui a1, 8
 ; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 28(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 24(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 26(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 22(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa0, 20(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft0, 16(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft1, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft2, 2(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft3, 0(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h ft1, ft2, ft1
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, ft1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h ft0, ft3, ft0
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, ft0
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft0, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa3, fa2
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa4, fa4, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa2
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 28(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft1, 6(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa0, ft0, fa0
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa1, ft1, fa1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa0, 10(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 8(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa2, fa0, fa2
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa2
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa3, fa1, fa3
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa3
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 30(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v9, a2
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 14(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa4, fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa4
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
@@ -2446,84 +2360,77 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a1, 8
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 28(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 26(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a2, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a2)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 18(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa4
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a2, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 18(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a4, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not t1, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a6, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a5, sp
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a5)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a7, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not t0, t0
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not t2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 4(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui t3, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a3, t3, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t4, a5, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a5, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t2, t2, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or t2, t4, t2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t4, t4, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t0, t0, t3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or t0, t4, t0
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, t0
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, t2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, a1, -1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t0, t0, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a7, t0, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and t1, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a3, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, t1, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t1, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a6, a7, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a7, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, t0, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, t1, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a6, a6, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, t3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, t2, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, t1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a7, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, t0, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a4, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a4, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
@@ -2540,84 +2447,77 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a1, 8
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 28(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 26(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a2, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a2, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 18(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa4
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a2, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 18(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a4, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not t1, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a6, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a5, sp
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a5)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a7, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not t0, t0
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not t2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 4(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui t3, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a3, t3, -1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t4, a5, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a5, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t2, t2, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or t2, t4, t2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t4, t4, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t0, t0, t3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or t0, t4, t0
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, t0
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, t2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a2, a1, -1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t0, t0, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a7, t0, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and t1, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a3, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, t1, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t1, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a6, a7, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a7, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, t0, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, t1, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a6, a6, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, t3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, t2, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, t1, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a7, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, t0, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a4, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
@@ -2698,26 +2598,28 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT:    lui a1, 8
+; ZVFHMIN-ZFH-RV32-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 8
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 0(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 8(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 4(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 12(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa3
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa2, fa1
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 14(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v8, a2
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a0)
@@ -2734,26 +2636,28 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFH-RV64-NEXT:    lui a1, 8
+; ZVFHMIN-ZFH-RV64-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 8
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 0(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 8(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 4(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 12(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa3
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa2, fa1
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 14(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v8, a2
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a0)
@@ -2770,44 +2674,41 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a1, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, sp, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a2)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a2, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a4, a2, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a3, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a5, a1, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a6, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a3, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a7, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a5, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a4, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a3, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
@@ -2825,44 +2726,41 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a1, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a2, sp, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a2)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a2, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a4, a2, -1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a3, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a5, a1, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a6, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a3, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a7, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a5, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a4, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a3, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
@@ -2903,26 +2801,28 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT:    lui a1, 8
+; ZVFHMIN-ZFH-RV32-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 8
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 0(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 8(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 4(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 12(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa3
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa2, fa1
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 14(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v8, a2
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 3, e16, mf4, ta, ma
@@ -2941,26 +2841,28 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFH-RV64-NEXT:    lui a1, 8
+; ZVFHMIN-ZFH-RV64-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 8
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 0(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 8(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 4(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 12(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa3
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa2, fa1
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 14(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v8, a2
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 3, e16, mf4, ta, ma
@@ -2979,44 +2881,41 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a1, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, sp, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a2)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a2, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a4, a2, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a3, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a5, a1, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a6, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a3, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a7, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a5, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a4, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a3, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
@@ -3036,44 +2935,41 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a1, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a2, sp, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a2)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a2, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a4, a2, -1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a3, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a5, a1, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a6, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a3, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a7, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a5, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a4, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a3, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
@@ -3336,59 +3232,20 @@ define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) {
 ;
 ; ZVFHMIN-LABEL: fmsub_v8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a2)
 ; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    mv a1, sp
-; ZVFHMIN-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-NEXT:    flh fa4, 0(sp)
-; ZVFHMIN-NEXT:    flh fa3, 4(sp)
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-NEXT:    lui a3, 1048568
-; ZVFHMIN-NEXT:    fmv.x.h a4, fa3
-; ZVFHMIN-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-NEXT:    lui a5, 8
-; ZVFHMIN-NEXT:    xor a2, a2, a5
-; ZVFHMIN-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-NEXT:    xor a4, a4, a3
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-NEXT:    xor a2, a2, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    xor a2, a2, a5
-; ZVFHMIN-NEXT:    vmv.v.x v11, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
-; ZVFHMIN-NEXT:    xor a2, a2, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a2
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    vmv.v.i v0, 15
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
-; ZVFHMIN-NEXT:    vslidedown.vi v11, v8, 4, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v9, v11, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v9, v8, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -3412,60 +3269,21 @@ define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
 ;
 ; ZVFHMIN-LABEL: fmsub_v6f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a2)
 ; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    mv a1, sp
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
-; ZVFHMIN-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-NEXT:    flh fa4, 0(sp)
-; ZVFHMIN-NEXT:    flh fa3, 4(sp)
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-NEXT:    lui a3, 1048568
-; ZVFHMIN-NEXT:    fmv.x.h a4, fa3
-; ZVFHMIN-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-NEXT:    lui a5, 8
-; ZVFHMIN-NEXT:    xor a2, a2, a5
-; ZVFHMIN-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-NEXT:    xor a4, a4, a3
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-NEXT:    xor a2, a2, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    xor a2, a2, a5
-; ZVFHMIN-NEXT:    vmv.v.x v11, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
-; ZVFHMIN-NEXT:    xor a2, a2, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a2
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    vmv.v.i v0, 15
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
-; ZVFHMIN-NEXT:    vslidedown.vi v11, v8, 4, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v9, v11, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v9, v8, v11
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
@@ -3854,187 +3672,14 @@ define void @fneg_v16f16(ptr %x) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-RV32-LABEL: fneg_v16f16:
-; ZVFHMIN-RV32:       # %bb.0:
-; ZVFHMIN-RV32-NEXT:    addi sp, sp, -64
-; ZVFHMIN-RV32-NEXT:    .cfi_def_cfa_offset 64
-; ZVFHMIN-RV32-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
-; ZVFHMIN-RV32-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
-; ZVFHMIN-RV32-NEXT:    .cfi_offset ra, -4
-; ZVFHMIN-RV32-NEXT:    .cfi_offset s0, -8
-; ZVFHMIN-RV32-NEXT:    addi s0, sp, 64
-; ZVFHMIN-RV32-NEXT:    .cfi_def_cfa s0, 0
-; ZVFHMIN-RV32-NEXT:    andi sp, sp, -32
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 16, e16, m1, ta, mu
-; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT:    mv a1, sp
-; ZVFHMIN-RV32-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-RV32-NEXT:    flh fa4, 0(sp)
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-RV32-NEXT:    flh fa3, 6(sp)
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-RV32-NEXT:    flh fa4, 8(sp)
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a5, fa3
-; ZVFHMIN-RV32-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a6, fa4
-; ZVFHMIN-RV32-NEXT:    lui a1, 1048568
-; ZVFHMIN-RV32-NEXT:    xor a2, a2, a1
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-RV32-NEXT:    lui t0, 8
-; ZVFHMIN-RV32-NEXT:    xor a3, a3, t0
-; ZVFHMIN-RV32-NEXT:    vmv.v.x v8, a3
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-RV32-NEXT:    xor a4, a4, a1
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 18(sp)
-; ZVFHMIN-RV32-NEXT:    xor a5, a5, a1
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
-; ZVFHMIN-RV32-NEXT:    xor a4, a6, a1
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 16(sp)
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV32-NEXT:    xor a4, a7, a1
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-RV32-NEXT:    xor a3, a3, a1
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-RV32-NEXT:    xor a2, a2, a1
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 22(sp)
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-RV32-NEXT:    xor a2, a4, t0
-; ZVFHMIN-RV32-NEXT:    vmv.v.x v9, a2
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-RV32-NEXT:    xor a5, a5, a1
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a5
-; ZVFHMIN-RV32-NEXT:    xor a3, a3, a1
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 26(sp)
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a3
-; ZVFHMIN-RV32-NEXT:    xor a2, a2, a1
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 28(sp)
-; ZVFHMIN-RV32-NEXT:    xor a4, a4, a1
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a4
-; ZVFHMIN-RV32-NEXT:    xor a2, a2, a1
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-RV32-NEXT:    xor a3, a3, a1
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a3
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT:    xor a1, a2, a1
-; ZVFHMIN-RV32-NEXT:    li a2, 255
-; ZVFHMIN-RV32-NEXT:    vmv.s.x v0, a2
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-RV32-NEXT:    vslidedown.vi v9, v8, 8, v0.t
-; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT:    addi sp, s0, -64
-; ZVFHMIN-RV32-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
-; ZVFHMIN-RV32-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
-; ZVFHMIN-RV32-NEXT:    addi sp, sp, 64
-; ZVFHMIN-RV32-NEXT:    ret
-;
-; ZVFHMIN-RV64-LABEL: fneg_v16f16:
-; ZVFHMIN-RV64:       # %bb.0:
-; ZVFHMIN-RV64-NEXT:    addi sp, sp, -64
-; ZVFHMIN-RV64-NEXT:    .cfi_def_cfa_offset 64
-; ZVFHMIN-RV64-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
-; ZVFHMIN-RV64-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
-; ZVFHMIN-RV64-NEXT:    .cfi_offset ra, -8
-; ZVFHMIN-RV64-NEXT:    .cfi_offset s0, -16
-; ZVFHMIN-RV64-NEXT:    addi s0, sp, 64
-; ZVFHMIN-RV64-NEXT:    .cfi_def_cfa s0, 0
-; ZVFHMIN-RV64-NEXT:    andi sp, sp, -32
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 16, e16, m1, ta, mu
-; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT:    mv a1, sp
-; ZVFHMIN-RV64-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-RV64-NEXT:    flh fa4, 0(sp)
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-RV64-NEXT:    flh fa3, 6(sp)
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-RV64-NEXT:    flh fa4, 8(sp)
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a5, fa3
-; ZVFHMIN-RV64-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a6, fa4
-; ZVFHMIN-RV64-NEXT:    lui a1, 1048568
-; ZVFHMIN-RV64-NEXT:    xor a2, a2, a1
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-RV64-NEXT:    lui t0, 8
-; ZVFHMIN-RV64-NEXT:    xor a3, a3, t0
-; ZVFHMIN-RV64-NEXT:    vmv.v.x v8, a3
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-RV64-NEXT:    xor a4, a4, a1
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 18(sp)
-; ZVFHMIN-RV64-NEXT:    xor a5, a5, a1
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
-; ZVFHMIN-RV64-NEXT:    xor a4, a6, a1
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 16(sp)
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV64-NEXT:    xor a4, a7, a1
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-RV64-NEXT:    xor a3, a3, a1
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-RV64-NEXT:    xor a2, a2, a1
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 22(sp)
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-RV64-NEXT:    xor a2, a4, t0
-; ZVFHMIN-RV64-NEXT:    vmv.v.x v9, a2
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-RV64-NEXT:    xor a5, a5, a1
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a5
-; ZVFHMIN-RV64-NEXT:    xor a3, a3, a1
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 26(sp)
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a3
-; ZVFHMIN-RV64-NEXT:    xor a2, a2, a1
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 28(sp)
-; ZVFHMIN-RV64-NEXT:    xor a4, a4, a1
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a4
-; ZVFHMIN-RV64-NEXT:    xor a2, a2, a1
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-RV64-NEXT:    xor a3, a3, a1
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a3
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT:    xor a1, a2, a1
-; ZVFHMIN-RV64-NEXT:    li a2, 255
-; ZVFHMIN-RV64-NEXT:    vmv.s.x v0, a2
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-RV64-NEXT:    vslidedown.vi v9, v8, 8, v0.t
-; ZVFHMIN-RV64-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT:    addi sp, s0, -64
-; ZVFHMIN-RV64-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
-; ZVFHMIN-RV64-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
-; ZVFHMIN-RV64-NEXT:    addi sp, sp, 64
-; ZVFHMIN-RV64-NEXT:    ret
+; ZVFHMIN-LABEL: fneg_v16f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = fneg <16 x half> %a
   store <16 x half> %b, ptr %x
@@ -5286,60 +4931,21 @@ define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) {
 ;
 ; ZVFHMIN-LABEL: fmsub_vf_v8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
-; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    mv a1, sp
-; ZVFHMIN-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    lui a1, 1048568
-; ZVFHMIN-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-NEXT:    lui a5, 8
-; ZVFHMIN-NEXT:    xor a3, a3, a5
-; ZVFHMIN-NEXT:    vmv.v.x v10, a3
-; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-NEXT:    xor a2, a2, a1
-; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a2
-; ZVFHMIN-NEXT:    xor a4, a4, a1
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a4
-; ZVFHMIN-NEXT:    xor a3, a3, a1
-; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a3
-; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-NEXT:    xor a2, a2, a1
-; ZVFHMIN-NEXT:    xor a3, a3, a5
-; ZVFHMIN-NEXT:    vmv.v.x v11, a3
-; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a2
-; ZVFHMIN-NEXT:    xor a3, a3, a1
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a3
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    xor a1, a2, a1
-; ZVFHMIN-NEXT:    vmv.v.i v0, 15
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
-; ZVFHMIN-NEXT:    vslidedown.vi v11, v10, 4, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
 ; ZVFHMIN-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -5363,65 +4969,26 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
 ;
 ; ZVFHMIN-LABEL: fmsub_vf_v6f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v9, (a1)
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    mv a1, sp
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
-; ZVFHMIN-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-NEXT:    li a4, 192
-; ZVFHMIN-NEXT:    vmv.s.x v0, a4
-; ZVFHMIN-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vmerge.vxm v9, v9, a3, v0
-; ZVFHMIN-NEXT:    lui a1, 1048568
-; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-NEXT:    lui a5, 8
-; ZVFHMIN-NEXT:    xor a4, a4, a5
-; ZVFHMIN-NEXT:    vmv.v.x v10, a4
-; ZVFHMIN-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-NEXT:    xor a2, a2, a1
-; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a2
-; ZVFHMIN-NEXT:    xor a3, a3, a1
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a3
-; ZVFHMIN-NEXT:    xor a4, a4, a1
-; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a4
-; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-NEXT:    xor a2, a2, a1
-; ZVFHMIN-NEXT:    xor a3, a3, a5
-; ZVFHMIN-NEXT:    vmv.v.x v11, a3
-; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a2
-; ZVFHMIN-NEXT:    xor a3, a3, a1
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a3
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    xor a1, a2, a1
-; ZVFHMIN-NEXT:    vmv.v.i v0, 15
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
-; ZVFHMIN-NEXT:    vslidedown.vi v11, v10, 4, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-NEXT:    li a2, 192
+; ZVFHMIN-NEXT:    vmv.s.x v0, a2
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a2
+; ZVFHMIN-NEXT:    vmerge.vxm v10, v10, a1, v0
+; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v9, v11, v10
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y


        


More information about the llvm-commits mailing list