[llvm] da46244 - Revert "[LegalizeVectorOps] Make the AArch64 hack in ExpandFNEG more specific."

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 17 09:08:33 PDT 2024


Author: Craig Topper
Date: 2024-09-17T09:04:43-07:00
New Revision: da46244e49b1e4b90e51635cff2134d1664841df

URL: https://github.com/llvm/llvm-project/commit/da46244e49b1e4b90e51635cff2134d1664841df
DIFF: https://github.com/llvm/llvm-project/commit/da46244e49b1e4b90e51635cff2134d1664841df.diff

LOG: Revert "[LegalizeVectorOps] Make the AArch64 hack in ExpandFNEG more specific."

This reverts commit 884ff9e3f9741ac282b6cf8087b8d3f62b8e138a.

Regression was reported in Halide for arm32.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
    llvm/test/CodeGen/NVPTX/f16-instructions.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
    llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index bef9cf8eb6b1ae..3dc5affacc5a76 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1804,13 +1804,9 @@ SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
   EVT VT = Node->getValueType(0);
   EVT IntVT = VT.changeVectorElementTypeToInteger();
 
-  if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT))
-    return SDValue();
-
-  // FIXME: This is to force unrolling v1f64 vectors for AArch64.
-  if (VT.isFixedLengthVector() && VT.getVectorNumElements() == 1 &&
-      !TLI.isOperationLegalOrCustom(ISD::FSUB, VT) &&
-      TLI.isOperationLegal(ISD::FNEG, VT.getVectorElementType()))
+  // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
+  if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) ||
+      !(TLI.isOperationLegalOrCustom(ISD::FSUB, VT) || VT.isScalableVector()))
     return SDValue();
 
   SDLoc DL(Node);

diff  --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index 72e3bbfa91cd81..14e02a49f6e5e4 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -1192,7 +1192,8 @@ define half @test_neg_f16(half noundef %arg) #0 {
 ; CHECK-LABEL: test_neg_f16x2(
 ; CHECK-F16-NOFTZ: neg.f16x2
 ; CHECK-F16-FTZ: neg.ftz.f16x2
-; CHECK-NOF16: xor.b32 %r{{.*}}, %r{{.*}}, -2147450880
+; CHECK-NOF16: xor.b16  	%rs{{.*}}, %rs{{.*}}, -32768
+; CHECK-NOF16: xor.b16  	%rs{{.*}}, %rs{{.*}}, -32768
 define <2 x half> @test_neg_f16x2(<2 x half> noundef %arg) #0 {
   %res = fneg <2 x half> %arg
   ret <2 x half> %res

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index 34a45c57441789..d996a9c05aca4d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -428,11 +428,50 @@ define void @fneg_v8f16(ptr %x) {
 ;
 ; ZVFHMIN-LABEL: fneg_v8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-NEXT:    mv a1, sp
+; ZVFHMIN-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-NEXT:    flh fa3, 4(sp)
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa4
+; ZVFHMIN-NEXT:    lui a3, 1048568
+; ZVFHMIN-NEXT:    fmv.x.h a4, fa3
+; ZVFHMIN-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-NEXT:    lui a5, 8
+; ZVFHMIN-NEXT:    xor a2, a2, a5
+; ZVFHMIN-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-NEXT:    xor a1, a1, a3
+; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-NEXT:    xor a4, a4, a3
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-NEXT:    xor a2, a2, a3
+; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a2
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-NEXT:    xor a1, a1, a3
+; ZVFHMIN-NEXT:    xor a2, a2, a5
+; ZVFHMIN-NEXT:    vmv.v.x v9, a2
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-NEXT:    xor a2, a2, a3
+; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a2
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-NEXT:    xor a1, a1, a3
+; ZVFHMIN-NEXT:    vmv.v.i v0, 15
+; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-NEXT:    vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = fneg <8 x half> %a
@@ -451,11 +490,52 @@ define void @fneg_v6f16(ptr %x) {
 ;
 ; ZVFHMIN-LABEL: fneg_v6f16:
 ; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-NEXT:    mv a1, sp
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-NEXT:    flh fa3, 4(sp)
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa4
+; ZVFHMIN-NEXT:    lui a3, 1048568
+; ZVFHMIN-NEXT:    fmv.x.h a4, fa3
+; ZVFHMIN-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-NEXT:    lui a5, 8
+; ZVFHMIN-NEXT:    xor a2, a2, a5
+; ZVFHMIN-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-NEXT:    xor a1, a1, a3
+; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-NEXT:    xor a4, a4, a3
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-NEXT:    xor a2, a2, a3
+; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a2
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-NEXT:    xor a1, a1, a3
+; ZVFHMIN-NEXT:    xor a2, a2, a5
+; ZVFHMIN-NEXT:    vmv.v.x v9, a2
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-NEXT:    xor a2, a2, a3
+; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a2
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-NEXT:    xor a1, a1, a3
+; ZVFHMIN-NEXT:    vmv.v.i v0, 15
+; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, mu
+; ZVFHMIN-NEXT:    vslidedown.vi v9, v8, 4, v0.t
+; ZVFHMIN-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = fneg <6 x half> %a
@@ -1933,50 +2013,48 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
 ; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT:    lui a1, 8
-; ZVFHMIN-ZFH-RV32-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
-; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 18(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 0(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 16(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa3, fa2
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 4(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 20(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 26(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 22(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa3
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa2, fa1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 26(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 24(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa4, fa4, fa3
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa0, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft0, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft1, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft2, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft3, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h ft1, ft2, ft1
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, ft1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h ft0, ft3, ft0
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, ft0
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft0, 4(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa2
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 28(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft1, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa0, ft0, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa1, ft1, fa1
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa0, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa1
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa2, fa0, fa2
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa2
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa3, fa1, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 12(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v9, a2
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa2, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
@@ -1992,50 +2070,48 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
 ; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT:    lui a1, 8
-; ZVFHMIN-ZFH-RV64-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
-; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v9, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 18(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 0(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 16(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa3, fa2
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 4(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 20(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 26(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 22(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa3
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa2, fa1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 26(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 24(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa4, fa4, fa3
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa0, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft0, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft1, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft2, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft3, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h ft1, ft2, ft1
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, ft1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h ft0, ft3, ft0
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, ft0
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft0, 4(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa2
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 28(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft1, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa0, ft0, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa1, ft1, fa1
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa0, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa1
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa2, fa0, fa2
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa2
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa3, fa1, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 12(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v9, a2
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa2, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
@@ -2051,76 +2127,83 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a1, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a2, sp
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a2)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, sp, 16
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a2)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 22(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a2, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not t1, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a6, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a5, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a5)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a7, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not t0, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not t2, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, a1, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui t3, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a3, t3, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and t4, a5, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a5, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and t2, t2, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or t2, t4, t2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and t4, t4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and t0, t0, t3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or t0, t4, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, t2
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and t0, t0, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t1, a3, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a3, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 22(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, t1, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a7, t0, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 26(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a7, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, t0, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a6, a7, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, t2, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, t1, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a5, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, t0, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a6, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, t1, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a6, a6, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, t3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a7, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a5, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a4, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a4, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
@@ -2136,76 +2219,83 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a1, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a2, sp
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a2)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a2, sp, 16
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a2)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 22(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a2, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not t1, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a6, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a5, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a5)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a7, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not t0, t0
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not t2, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a2, a1, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui t3, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a3, t3, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and t4, a5, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a5, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and t2, t2, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or t2, t4, t2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and t4, t4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and t0, t0, t3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or t0, t4, t0
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, t0
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, t2
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and t0, t0, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t1, a3, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a3, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 22(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, t1, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a7, t0, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 26(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a7, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, t0, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a6, a7, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, t2, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, t1, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a5, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 28(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, t0, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a6, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, t1, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a6, a6, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, t3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a7, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a5, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a4, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
@@ -2238,51 +2328,49 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT:    lui a1, 8
-; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
-; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 18(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 0(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 16(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa3, fa2
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 4(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 20(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 26(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 22(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa3
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa2, fa1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 26(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 24(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa4, fa4, fa3
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa0, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft0, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft1, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft2, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft3, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h ft1, ft2, ft1
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, ft1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h ft0, ft3, ft0
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, ft0
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft0, 4(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa2
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 28(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh ft1, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa0, ft0, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa1, ft1, fa1
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa0, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa1
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa2, fa0, fa2
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa2
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa3, fa1, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 12(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v9, a2
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa2, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
@@ -2299,51 +2387,49 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT:    lui a1, 8
-; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
-; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v9, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 18(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 0(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 16(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa3, fa2
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 4(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 20(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 26(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 22(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa3
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa2, fa1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 26(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 24(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa4, fa4, fa3
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa0, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft0, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft1, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft2, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft3, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h ft1, ft2, ft1
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, ft1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h ft0, ft3, ft0
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, ft0
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft0, 4(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa2
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 28(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh ft1, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa0, ft0, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa1, ft1, fa1
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa0, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa1
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa2, fa0, fa2
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa2
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa3, fa1, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 12(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v9, a2
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa4, fa3, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa2, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
@@ -2360,77 +2446,84 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a1, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 16
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a2, sp
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a2)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, sp, 16
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a2)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 22(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a2, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not t1, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a6, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a5, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a5)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a7, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not t0, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not t2, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, a1, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui t3, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a3, t3, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and t4, a5, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a5, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and t2, t2, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or t2, t4, t2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and t4, t4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and t0, t0, t3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or t0, t4, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, t0
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, t2
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and t0, t0, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t1, a3, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a3, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 22(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, t1, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a7, t0, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 26(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a7, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, t0, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a6, a7, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, t2, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, t1, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a5, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, t0, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a6, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, t1, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a6, a6, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, t3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a7, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a5, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a4, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a4, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
@@ -2447,77 +2540,84 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a1, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 16
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a2, sp
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a2)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a2, sp, 16
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a2)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 22(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a2, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not t1, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a6, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a5, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a5)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a7, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not t0, t0
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not t2, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a2, a1, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui t3, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a3, t3, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and t4, a5, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a5, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and t2, t2, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or t2, t4, t2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and t4, t4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and t0, t0, t3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or t0, t4, t0
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, t0
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, t2
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and t0, t0, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t1, a3, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a3, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 22(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, t1, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a7, t0, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 26(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a7, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, t0, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a6, a7, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, t2, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, t1, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a5, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 28(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, t0, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a6, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, t1, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a6, a6, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, t3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a7, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a5, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a4, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
@@ -2598,28 +2698,26 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFH-RV32-NEXT:    lui a1, 8
-; ZVFHMIN-ZFH-RV32-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 8
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 0(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 8(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 4(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 12(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa4, fa3
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa2, fa1
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 14(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v8, a2
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a0)
@@ -2636,28 +2734,26 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFH-RV64-NEXT:    lui a1, 8
-; ZVFHMIN-ZFH-RV64-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 8
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 0(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 8(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 4(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 12(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa4, fa3
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa2, fa1
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 14(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v8, a2
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a0)
@@ -2674,41 +2770,44 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a1, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, sp, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a2, 8
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a4, a2, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a3, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a5, a1, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a6, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a2, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a7, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a3, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a3, a2
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a4, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a5, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a3, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a4
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
@@ -2726,41 +2825,44 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a1, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a2, sp, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a2, 8
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a4, a2, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a3, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a5, a1, -1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a6, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a2, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a7, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a3, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a3, a2
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a4, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a5, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a3, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a4
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
@@ -2801,28 +2903,26 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFH-RV32-NEXT:    lui a1, 8
-; ZVFHMIN-ZFH-RV32-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 8
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 0(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 8(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 4(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 12(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa4, fa3
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa2, fa1
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 14(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v8, a2
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 3, e16, mf4, ta, ma
@@ -2841,28 +2941,26 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFH-RV64-NEXT:    lui a1, 8
-; ZVFHMIN-ZFH-RV64-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 8
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 0(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 8(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 4(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 12(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa4, fa3
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa2, fa1
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 14(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v8, a2
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 3, e16, mf4, ta, ma
@@ -2881,41 +2979,44 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a1, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, sp, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a2, 8
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a4, a2, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a3, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a5, a1, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a6, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a2, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a7, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a3, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a3, a2
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a4, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a5, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a3, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a4
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
@@ -2935,41 +3036,44 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a1, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a2, sp, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a2, 8
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a4, a2, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a3, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a5, a1, -1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a6, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a2, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a7, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a3, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a3, a2
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a4, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a5, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a3, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a4
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
@@ -3232,20 +3336,59 @@ define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) {
 ;
 ; ZVFHMIN-LABEL: fmsub_v8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
 ; ZVFHMIN-NEXT:    vle16.v v8, (a2)
 ; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    mv a1, sp
+; ZVFHMIN-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-NEXT:    flh fa3, 4(sp)
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa4
+; ZVFHMIN-NEXT:    lui a3, 1048568
+; ZVFHMIN-NEXT:    fmv.x.h a4, fa3
+; ZVFHMIN-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-NEXT:    lui a5, 8
+; ZVFHMIN-NEXT:    xor a2, a2, a5
+; ZVFHMIN-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-NEXT:    xor a1, a1, a3
+; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-NEXT:    xor a4, a4, a3
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-NEXT:    xor a2, a2, a3
+; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a2
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-NEXT:    xor a1, a1, a3
+; ZVFHMIN-NEXT:    xor a2, a2, a5
+; ZVFHMIN-NEXT:    vmv.v.x v11, a2
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
+; ZVFHMIN-NEXT:    xor a2, a2, a3
+; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a2
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-NEXT:    xor a1, a1, a3
+; ZVFHMIN-NEXT:    vmv.v.i v0, 15
+; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
+; ZVFHMIN-NEXT:    vslidedown.vi v11, v8, 4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v9, v8, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v9, v11, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -3269,21 +3412,60 @@ define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
 ;
 ; ZVFHMIN-LABEL: fmsub_v6f16:
 ; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a2)
 ; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    mv a1, sp
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-NEXT:    flh fa3, 4(sp)
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa4
+; ZVFHMIN-NEXT:    lui a3, 1048568
+; ZVFHMIN-NEXT:    fmv.x.h a4, fa3
+; ZVFHMIN-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-NEXT:    lui a5, 8
+; ZVFHMIN-NEXT:    xor a2, a2, a5
+; ZVFHMIN-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-NEXT:    xor a1, a1, a3
+; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-NEXT:    xor a4, a4, a3
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-NEXT:    xor a2, a2, a3
+; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a2
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-NEXT:    xor a1, a1, a3
+; ZVFHMIN-NEXT:    xor a2, a2, a5
+; ZVFHMIN-NEXT:    vmv.v.x v11, a2
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
+; ZVFHMIN-NEXT:    xor a2, a2, a3
+; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a2
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-NEXT:    xor a1, a1, a3
+; ZVFHMIN-NEXT:    vmv.v.i v0, 15
+; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
+; ZVFHMIN-NEXT:    vslidedown.vi v11, v8, 4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v9, v8, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v9, v11, v8
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
@@ -3672,14 +3854,187 @@ define void @fneg_v16f16(ptr %x) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-LABEL: fneg_v16f16:
-; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    ret
+; ZVFHMIN-RV32-LABEL: fneg_v16f16:
+; ZVFHMIN-RV32:       # %bb.0:
+; ZVFHMIN-RV32-NEXT:    addi sp, sp, -64
+; ZVFHMIN-RV32-NEXT:    .cfi_def_cfa_offset 64
+; ZVFHMIN-RV32-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
+; ZVFHMIN-RV32-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
+; ZVFHMIN-RV32-NEXT:    .cfi_offset ra, -4
+; ZVFHMIN-RV32-NEXT:    .cfi_offset s0, -8
+; ZVFHMIN-RV32-NEXT:    addi s0, sp, 64
+; ZVFHMIN-RV32-NEXT:    .cfi_def_cfa s0, 0
+; ZVFHMIN-RV32-NEXT:    andi sp, sp, -32
+; ZVFHMIN-RV32-NEXT:    vsetivli zero, 16, e16, m1, ta, mu
+; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-RV32-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-RV32-NEXT:    flh fa3, 6(sp)
+; ZVFHMIN-RV32-NEXT:    fmv.x.h a3, fa4
+; ZVFHMIN-RV32-NEXT:    flh fa4, 8(sp)
+; ZVFHMIN-RV32-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-RV32-NEXT:    fmv.x.h a5, fa3
+; ZVFHMIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-RV32-NEXT:    fmv.x.h a6, fa4
+; ZVFHMIN-RV32-NEXT:    lui a1, 1048568
+; ZVFHMIN-RV32-NEXT:    xor a2, a2, a1
+; ZVFHMIN-RV32-NEXT:    fmv.x.h a7, fa5
+; ZVFHMIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-RV32-NEXT:    lui t0, 8
+; ZVFHMIN-RV32-NEXT:    xor a3, a3, t0
+; ZVFHMIN-RV32-NEXT:    vmv.v.x v8, a3
+; ZVFHMIN-RV32-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
+; ZVFHMIN-RV32-NEXT:    xor a4, a4, a1
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-RV32-NEXT:    xor a5, a5, a1
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-RV32-NEXT:    xor a4, a6, a1
+; ZVFHMIN-RV32-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-RV32-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV32-NEXT:    xor a4, a7, a1
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV32-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-RV32-NEXT:    xor a3, a3, a1
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-RV32-NEXT:    xor a2, a2, a1
+; ZVFHMIN-RV32-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-RV32-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
+; ZVFHMIN-RV32-NEXT:    xor a2, a4, t0
+; ZVFHMIN-RV32-NEXT:    vmv.v.x v9, a2
+; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-RV32-NEXT:    xor a5, a5, a1
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a5
+; ZVFHMIN-RV32-NEXT:    xor a3, a3, a1
+; ZVFHMIN-RV32-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-RV32-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a3
+; ZVFHMIN-RV32-NEXT:    xor a2, a2, a1
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
+; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-RV32-NEXT:    xor a4, a4, a1
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a4
+; ZVFHMIN-RV32-NEXT:    xor a2, a2, a1
+; ZVFHMIN-RV32-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-RV32-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
+; ZVFHMIN-RV32-NEXT:    xor a3, a3, a1
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a3
+; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-RV32-NEXT:    xor a1, a2, a1
+; ZVFHMIN-RV32-NEXT:    li a2, 255
+; ZVFHMIN-RV32-NEXT:    vmv.s.x v0, a2
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV32-NEXT:    vslidedown.vi v9, v8, 8, v0.t
+; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV32-NEXT:    addi sp, s0, -64
+; ZVFHMIN-RV32-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; ZVFHMIN-RV32-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
+; ZVFHMIN-RV32-NEXT:    addi sp, sp, 64
+; ZVFHMIN-RV32-NEXT:    ret
+;
+; ZVFHMIN-RV64-LABEL: fneg_v16f16:
+; ZVFHMIN-RV64:       # %bb.0:
+; ZVFHMIN-RV64-NEXT:    addi sp, sp, -64
+; ZVFHMIN-RV64-NEXT:    .cfi_def_cfa_offset 64
+; ZVFHMIN-RV64-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
+; ZVFHMIN-RV64-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
+; ZVFHMIN-RV64-NEXT:    .cfi_offset ra, -8
+; ZVFHMIN-RV64-NEXT:    .cfi_offset s0, -16
+; ZVFHMIN-RV64-NEXT:    addi s0, sp, 64
+; ZVFHMIN-RV64-NEXT:    .cfi_def_cfa s0, 0
+; ZVFHMIN-RV64-NEXT:    andi sp, sp, -32
+; ZVFHMIN-RV64-NEXT:    vsetivli zero, 16, e16, m1, ta, mu
+; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-RV64-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-RV64-NEXT:    flh fa3, 6(sp)
+; ZVFHMIN-RV64-NEXT:    fmv.x.h a3, fa4
+; ZVFHMIN-RV64-NEXT:    flh fa4, 8(sp)
+; ZVFHMIN-RV64-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-RV64-NEXT:    fmv.x.h a5, fa3
+; ZVFHMIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-RV64-NEXT:    fmv.x.h a6, fa4
+; ZVFHMIN-RV64-NEXT:    lui a1, 1048568
+; ZVFHMIN-RV64-NEXT:    xor a2, a2, a1
+; ZVFHMIN-RV64-NEXT:    fmv.x.h a7, fa5
+; ZVFHMIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-RV64-NEXT:    lui t0, 8
+; ZVFHMIN-RV64-NEXT:    xor a3, a3, t0
+; ZVFHMIN-RV64-NEXT:    vmv.v.x v8, a3
+; ZVFHMIN-RV64-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
+; ZVFHMIN-RV64-NEXT:    xor a4, a4, a1
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT:    flh fa5, 18(sp)
+; ZVFHMIN-RV64-NEXT:    xor a5, a5, a1
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-RV64-NEXT:    xor a4, a6, a1
+; ZVFHMIN-RV64-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-RV64-NEXT:    flh fa5, 16(sp)
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV64-NEXT:    xor a4, a7, a1
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-RV64-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-RV64-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-RV64-NEXT:    xor a3, a3, a1
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-RV64-NEXT:    xor a2, a2, a1
+; ZVFHMIN-RV64-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-RV64-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
+; ZVFHMIN-RV64-NEXT:    xor a2, a4, t0
+; ZVFHMIN-RV64-NEXT:    vmv.v.x v9, a2
+; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-RV64-NEXT:    xor a5, a5, a1
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a5
+; ZVFHMIN-RV64-NEXT:    xor a3, a3, a1
+; ZVFHMIN-RV64-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-RV64-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a3
+; ZVFHMIN-RV64-NEXT:    xor a2, a2, a1
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
+; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-RV64-NEXT:    xor a4, a4, a1
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a4
+; ZVFHMIN-RV64-NEXT:    xor a2, a2, a1
+; ZVFHMIN-RV64-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
+; ZVFHMIN-RV64-NEXT:    xor a3, a3, a1
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a3
+; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-RV64-NEXT:    xor a1, a2, a1
+; ZVFHMIN-RV64-NEXT:    li a2, 255
+; ZVFHMIN-RV64-NEXT:    vmv.s.x v0, a2
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV64-NEXT:    vslidedown.vi v9, v8, 8, v0.t
+; ZVFHMIN-RV64-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-RV64-NEXT:    addi sp, s0, -64
+; ZVFHMIN-RV64-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; ZVFHMIN-RV64-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
+; ZVFHMIN-RV64-NEXT:    addi sp, sp, 64
+; ZVFHMIN-RV64-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = fneg <16 x half> %a
   store <16 x half> %b, ptr %x
@@ -4931,21 +5286,60 @@ define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) {
 ;
 ; ZVFHMIN-LABEL: fmsub_vf_v8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
 ; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-NEXT:    mv a1, sp
+; ZVFHMIN-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-NEXT:    flh fa4, 0(sp)
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    vmv.v.x v10, a1
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-NEXT:    fmv.x.h a3, fa4
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    lui a1, 1048568
+; ZVFHMIN-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-NEXT:    lui a5, 8
+; ZVFHMIN-NEXT:    xor a3, a3, a5
+; ZVFHMIN-NEXT:    vmv.v.x v10, a3
+; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-NEXT:    xor a2, a2, a1
+; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a2
+; ZVFHMIN-NEXT:    xor a4, a4, a1
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a4
+; ZVFHMIN-NEXT:    xor a3, a3, a1
+; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a3
+; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-NEXT:    xor a2, a2, a1
+; ZVFHMIN-NEXT:    xor a3, a3, a5
+; ZVFHMIN-NEXT:    vmv.v.x v11, a3
+; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a2
+; ZVFHMIN-NEXT:    xor a3, a3, a1
+; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a3
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    xor a1, a2, a1
+; ZVFHMIN-NEXT:    vmv.v.i v0, 15
+; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
+; ZVFHMIN-NEXT:    vslidedown.vi v11, v10, 4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
 ; ZVFHMIN-NEXT:    vse16.v v9, (a0)
+; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -4969,26 +5363,65 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
 ;
 ; ZVFHMIN-LABEL: fmsub_vf_v6f16:
 ; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    addi sp, sp, -16
+; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v8, (a0)
 ; ZVFHMIN-NEXT:    vle16.v v9, (a1)
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    li a2, 192
-; ZVFHMIN-NEXT:    vmv.s.x v0, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa0
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vmv.v.x v10, a2
-; ZVFHMIN-NEXT:    vmerge.vxm v10, v10, a1, v0
-; ZVFHMIN-NEXT:    lui a1, 8
-; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-NEXT:    mv a1, sp
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-NEXT:    li a4, 192
+; ZVFHMIN-NEXT:    vmv.s.x v0, a4
+; ZVFHMIN-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-NEXT:    flh fa5, 4(sp)
+; ZVFHMIN-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-NEXT:    vmerge.vxm v9, v9, a3, v0
+; ZVFHMIN-NEXT:    lui a1, 1048568
+; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-NEXT:    flh fa5, 6(sp)
+; ZVFHMIN-NEXT:    lui a5, 8
+; ZVFHMIN-NEXT:    xor a4, a4, a5
+; ZVFHMIN-NEXT:    vmv.v.x v10, a4
+; ZVFHMIN-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-NEXT:    xor a2, a2, a1
+; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a2
+; ZVFHMIN-NEXT:    xor a3, a3, a1
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a3
+; ZVFHMIN-NEXT:    xor a4, a4, a1
+; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a4
+; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-NEXT:    xor a2, a2, a1
+; ZVFHMIN-NEXT:    xor a3, a3, a5
+; ZVFHMIN-NEXT:    vmv.v.x v11, a3
+; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a2
+; ZVFHMIN-NEXT:    xor a3, a3, a1
+; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a3
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-NEXT:    xor a1, a2, a1
+; ZVFHMIN-NEXT:    vmv.v.i v0, 15
+; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
+; ZVFHMIN-NEXT:    vslidedown.vi v11, v10, 4, v0.t
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v9, v11, v10
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y

diff  --git a/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll b/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll
index 8b8b897a32eda4..0e993f35ce85df 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll
@@ -5,8 +5,22 @@
 define arm_aapcs_vfpcc <8 x half> @fneg_float16_t(<8 x half> %src) {
 ; CHECK-MVE-LABEL: fneg_float16_t:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vmov.i16 q1, #0x8000
-; CHECK-MVE-NEXT:    veor q0, q0, q1
+; CHECK-MVE-NEXT:    vmovx.f16 s4, s0
+; CHECK-MVE-NEXT:    vneg.f16 s0, s0
+; CHECK-MVE-NEXT:    vneg.f16 s4, s4
+; CHECK-MVE-NEXT:    vins.f16 s0, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s4, s1
+; CHECK-MVE-NEXT:    vneg.f16 s4, s4
+; CHECK-MVE-NEXT:    vneg.f16 s1, s1
+; CHECK-MVE-NEXT:    vins.f16 s1, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s4, s2
+; CHECK-MVE-NEXT:    vneg.f16 s4, s4
+; CHECK-MVE-NEXT:    vneg.f16 s2, s2
+; CHECK-MVE-NEXT:    vins.f16 s2, s4
+; CHECK-MVE-NEXT:    vmovx.f16 s4, s3
+; CHECK-MVE-NEXT:    vneg.f16 s4, s4
+; CHECK-MVE-NEXT:    vneg.f16 s3, s3
+; CHECK-MVE-NEXT:    vins.f16 s3, s4
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: fneg_float16_t:
@@ -21,8 +35,10 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @fneg_float32_t(<4 x float> %src) {
 ; CHECK-MVE-LABEL: fneg_float32_t:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vmov.i32 q1, #0x80000000
-; CHECK-MVE-NEXT:    veor q0, q0, q1
+; CHECK-MVE-NEXT:    vneg.f32 s3, s3
+; CHECK-MVE-NEXT:    vneg.f32 s2, s2
+; CHECK-MVE-NEXT:    vneg.f32 s1, s1
+; CHECK-MVE-NEXT:    vneg.f32 s0, s0
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: fneg_float32_t:
@@ -37,17 +53,20 @@ entry:
 define arm_aapcs_vfpcc <2 x double> @fneg_float64_t(<2 x double> %src) {
 ; CHECK-LABEL: fneg_float64_t:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    adr r0, .LCPI2_0
-; CHECK-NEXT:    vldrw.u32 q1, [r0]
-; CHECK-NEXT:    veor q0, q0, q1
+; CHECK-NEXT:    .pad #16
+; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    vstr d1, [sp]
+; CHECK-NEXT:    ldrb.w r0, [sp, #7]
+; CHECK-NEXT:    vstr d0, [sp, #8]
+; CHECK-NEXT:    ldrb.w r1, [sp, #15]
+; CHECK-NEXT:    eor r0, r0, #128
+; CHECK-NEXT:    strb.w r0, [sp, #7]
+; CHECK-NEXT:    vldr d1, [sp]
+; CHECK-NEXT:    eor r0, r1, #128
+; CHECK-NEXT:    strb.w r0, [sp, #15]
+; CHECK-NEXT:    vldr d0, [sp, #8]
+; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    bx lr
-; CHECK-NEXT:    .p2align 4
-; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:  .LCPI2_0:
-; CHECK-NEXT:    .long 0 @ 0x0
-; CHECK-NEXT:    .long 2147483648 @ 0x80000000
-; CHECK-NEXT:    .long 0 @ 0x0
-; CHECK-NEXT:    .long 2147483648 @ 0x80000000
 entry:
   %0 = fsub nnan ninf nsz <2 x double> <double 0.0e0, double 0.0e0>, %src
   ret <2 x double> %0


        


More information about the llvm-commits mailing list