[llvm] 884ff9e - [LegalizeVectorOps] Make the AArch64 hack in ExpandFNEG more specific.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 16 21:49:17 PDT 2024


Author: Craig Topper
Date: 2024-09-16T21:48:42-07:00
New Revision: 884ff9e3f9741ac282b6cf8087b8d3f62b8e138a

URL: https://github.com/llvm/llvm-project/commit/884ff9e3f9741ac282b6cf8087b8d3f62b8e138a
DIFF: https://github.com/llvm/llvm-project/commit/884ff9e3f9741ac282b6cf8087b8d3f62b8e138a.diff

LOG: [LegalizeVectorOps] Make the AArch64 hack in ExpandFNEG more specific.

Only scalarize single element vectors when vector FSUB is not
supported and scalar FNEG is supported.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
    llvm/test/CodeGen/NVPTX/f16-instructions.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
    llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 29dae4e27c7689..b8ec162895105d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1793,9 +1793,13 @@ SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
   EVT VT = Node->getValueType(0);
   EVT IntVT = VT.changeVectorElementTypeToInteger();
 
-  // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
-  if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) ||
-      !(TLI.isOperationLegalOrCustom(ISD::FSUB, VT) || VT.isScalableVector()))
+  if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT))
+    return SDValue();
+
+  // FIXME: This is to force unrolling v1f64 vectors for AArch64.
+  if (VT.isFixedLengthVector() && VT.getVectorNumElements() == 1 &&
+      !TLI.isOperationLegalOrCustom(ISD::FSUB, VT) &&
+      TLI.isOperationLegal(ISD::FNEG, VT.getVectorElementType()))
     return SDValue();
 
   SDLoc DL(Node);

diff  --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index 14e02a49f6e5e4..72e3bbfa91cd81 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -1192,8 +1192,7 @@ define half @test_neg_f16(half noundef %arg) #0 {
 ; CHECK-LABEL: test_neg_f16x2(
 ; CHECK-F16-NOFTZ: neg.f16x2
 ; CHECK-F16-FTZ: neg.ftz.f16x2
-; CHECK-NOF16: xor.b16  	%rs{{.*}}, %rs{{.*}}, -32768
-; CHECK-NOF16: xor.b16  	%rs{{.*}}, %rs{{.*}}, -32768
+; CHECK-NOF16: xor.b32 %r{{.*}}, %r{{.*}}, -2147450880
 define <2 x half> @test_neg_f16x2(<2 x half> noundef %arg) #0 {
   %res = fneg <2 x half> %arg
   ret <2 x half> %res

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index d996a9c05aca4d..34a45c57441789 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -428,50 +428,11 @@ define void @fneg_v8f16(ptr %x) {
 ;
 ; ZVFHMIN-LABEL: fneg_v8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    mv a1, sp
-; ZVFHMIN-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-NEXT:    flh fa4, 0(sp)
-; ZVFHMIN-NEXT:    flh fa3, 4(sp)
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-NEXT:    lui a3, 1048568
-; ZVFHMIN-NEXT:    fmv.x.h a4, fa3
-; ZVFHMIN-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-NEXT:    lui a5, 8
-; ZVFHMIN-NEXT:    xor a2, a2, a5
-; ZVFHMIN-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-NEXT:    xor a4, a4, a3
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-NEXT:    xor a2, a2, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    xor a2, a2, a5
-; ZVFHMIN-NEXT:    vmv.v.x v9, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-NEXT:    xor a2, a2, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    vmv.v.i v0, 15
-; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-NEXT:    vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = fneg <8 x half> %a
@@ -490,52 +451,11 @@ define void @fneg_v6f16(ptr %x) {
 ;
 ; ZVFHMIN-LABEL: fneg_v6f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    mv a1, sp
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-NEXT:    flh fa4, 0(sp)
-; ZVFHMIN-NEXT:    flh fa3, 4(sp)
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-NEXT:    lui a3, 1048568
-; ZVFHMIN-NEXT:    fmv.x.h a4, fa3
-; ZVFHMIN-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-NEXT:    lui a5, 8
-; ZVFHMIN-NEXT:    xor a2, a2, a5
-; ZVFHMIN-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-NEXT:    xor a4, a4, a3
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-NEXT:    xor a2, a2, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    xor a2, a2, a5
-; ZVFHMIN-NEXT:    vmv.v.x v9, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-NEXT:    xor a2, a2, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    vmv.v.i v0, 15
-; ZVFHMIN-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, mu
-; ZVFHMIN-NEXT:    vslidedown.vi v9, v8, 4, v0.t
-; ZVFHMIN-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-NEXT:    addi sp, sp, 16
+; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = fneg <6 x half> %a
@@ -2013,48 +1933,50 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
 ; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT:    lui a1, 8
+; ZVFHMIN-ZFH-RV32-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 28(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 24(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa3, fa2
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 6(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 22(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa0, 20(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft0, 16(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft1, 18(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
-; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft2, 2(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft3, 0(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h ft1, ft2, ft1
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, ft1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h ft0, ft3, ft0
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, ft0
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft0, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa4, fa4, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa2
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 28(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft1, 6(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa0, ft0, fa0
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa1, ft1, fa1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa0, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 8(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa2, fa0, fa2
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa2
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa3, fa1, fa3
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa3
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 30(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v9, a2
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 14(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa4, fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa4
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
@@ -2070,48 +1992,50 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
 ; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v9, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT:    lui a1, 8
+; ZVFHMIN-ZFH-RV64-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v9, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 28(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 24(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa3, fa2
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 6(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 22(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa0, 20(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft0, 16(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft1, 18(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
-; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft2, 2(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft3, 0(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h ft1, ft2, ft1
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, ft1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h ft0, ft3, ft0
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, ft0
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft0, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa4, fa4, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa2
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 28(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft1, 6(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa0, ft0, fa0
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa1, ft1, fa1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa0, 10(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 8(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa2, fa0, fa2
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa2
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa3, fa1, fa3
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa3
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 30(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v9, a2
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 14(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa4, fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa4
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
@@ -2127,83 +2051,76 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 16
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 28(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 26(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a1, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a2, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a2)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 18(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa4
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a2, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 18(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a4, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not t1, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a6, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a5, sp
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a5)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a7, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not t0, t0
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not t2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 4(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui t3, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a3, t3, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t4, a5, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a5, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t2, t2, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or t2, t4, t2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t4, t4, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t0, t0, t3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or t0, t4, t0
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, t0
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, t2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, a1, -1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t0, t0, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a7, t0, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and t1, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a3, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, t1, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t1, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a6, a7, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a7, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, t0, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, t1, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a6, a6, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, t3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, t2, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, t1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a7, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, t0, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a4, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a4, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
@@ -2219,83 +2136,76 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 16
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 28(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 26(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a1, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a2, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a2, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 18(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa4
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a2, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 18(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a4, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not t1, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a6, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a5, sp
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a5)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a7, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not t0, t0
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not t2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 4(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui t3, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a3, t3, -1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t4, a5, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a5, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t2, t2, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or t2, t4, t2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t4, t4, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t0, t0, t3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or t0, t4, t0
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, t0
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, t2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a2, a1, -1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t0, t0, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a7, t0, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and t1, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a3, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, t1, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t1, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a6, a7, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a7, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, t0, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, t1, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a6, a6, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, t3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, t2, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, t1, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a7, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, t0, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a4, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
@@ -2328,49 +2238,51 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    lui a1, 8
 ; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 28(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 24(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 26(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 22(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa0, 20(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft0, 16(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft1, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft2, 2(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft3, 0(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h ft1, ft2, ft1
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, ft1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h ft0, ft3, ft0
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, ft0
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft0, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa3, fa2
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa4, fa4, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa2
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 28(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh ft1, 6(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa0, ft0, fa0
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa1, ft1, fa1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa0, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 8(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa2, fa0, fa2
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa2
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa3, fa1, fa3
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa3
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 30(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v9, a2
+; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 14(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa4, fa3, fa4
-; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa4
-; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa2, fa5
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v9, v9, a1
@@ -2387,49 +2299,51 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    lui a1, 8
 ; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 28(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 24(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 26(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 22(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa0, 20(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft0, 16(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft1, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft2, 2(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft3, 0(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h ft1, ft2, ft1
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, ft1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h ft0, ft3, ft0
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, ft0
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft0, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa3, fa2
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa4, fa4, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa2
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 28(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh ft1, 6(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa0, ft0, fa0
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa1, ft1, fa1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa0, 10(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 8(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa2, fa0, fa2
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa2
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa3, fa1, fa3
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa3
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 30(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v9, a2
+; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 14(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa4, fa3, fa4
-; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa4
-; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa2, fa5
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v9, v9, a1
@@ -2446,84 +2360,77 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a1, 8
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 28(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 26(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a2, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a2)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 18(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa4
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a2, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 18(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a4, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not t1, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a6, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a5, sp
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a5)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a7, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not t0, t0
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not t2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 4(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui t3, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a3, t3, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t4, a5, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a5, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t2, t2, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or t2, t4, t2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t4, t4, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t0, t0, t3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or t0, t4, t0
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, t0
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, t2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, a1, -1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t0, t0, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a7, t0, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and t1, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a3, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, t1, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t1, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a6, a7, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a7, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, t0, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, t1, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a6, a6, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a7, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, t3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, t2, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, t1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a7
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a7, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, t0, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a4, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a4, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
@@ -2540,84 +2447,77 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a1, 8
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 28(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 26(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a2, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a2, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a2)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 18(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa4
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a2, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 18(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a4, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not t1, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a6, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a5, sp
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a5)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a7, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not t0, t0
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not t2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 4(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui t3, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a3, t3, -1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t4, a5, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a5, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t2, t2, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or t2, t4, t2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t4, t4, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t0, t0, t3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or t0, t4, t0
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, t0
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, t2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a2, a1, -1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t0, t0, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a7, t0, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and t1, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a3, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, t1, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t1, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a6, a7, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a7, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, t0, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, t1, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a6, a6, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a7, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, t3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, t2, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, t1, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a7
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a7, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, t0, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a4, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
@@ -2698,26 +2598,28 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV32-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT:    lui a1, 8
+; ZVFHMIN-ZFH-RV32-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 8
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 0(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 8(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 4(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 12(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa3
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa2, fa1
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 14(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v8, a2
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a0)
@@ -2734,26 +2636,28 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV64-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFH-RV64-NEXT:    lui a1, 8
+; ZVFHMIN-ZFH-RV64-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 8
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 0(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 8(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 4(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 12(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa3
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa2, fa1
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 14(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v8, a2
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a0)
@@ -2770,44 +2674,41 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a1, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, sp, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a2)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a2, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a4, a2, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a3, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a5, a1, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a6, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a3, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a7, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a5, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a4, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a3, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
@@ -2825,44 +2726,41 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a1, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a2, sp, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a2)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a2, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a4, a2, -1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a3, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a5, a1, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a6, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a3, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a7, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a5, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a4, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a3, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
@@ -2903,26 +2801,28 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFH-RV32-NEXT:    lui a1, 8
+; ZVFHMIN-ZFH-RV32-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    addi a1, sp, 8
 ; ZVFHMIN-ZFH-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 0(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa3, 8(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa2, 4(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa1, 12(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa4, fa3
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa2, fa1
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    flh fa4, 14(sp)
 ; ZVFHMIN-ZFH-RV32-NEXT:    vmv.v.x v8, a2
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV32-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV32-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV32-NEXT:    vsetivli zero, 3, e16, mf4, ta, ma
@@ -2941,26 +2841,28 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vfncvt.f.f.w v8, v9
+; ZVFHMIN-ZFH-RV64-NEXT:    lui a1, 8
+; ZVFHMIN-ZFH-RV64-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    addi a1, sp, 8
 ; ZVFHMIN-ZFH-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 0(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa3, 8(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa2, 4(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa1, 12(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa4, fa3
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa4, fa3
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa2, fa1
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa2, fa1
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    flh fa4, 14(sp)
 ; ZVFHMIN-ZFH-RV64-NEXT:    vmv.v.x v8, a2
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFH-RV64-NEXT:    fsgnjn.h fa5, fa5, fa4
+; ZVFHMIN-ZFH-RV64-NEXT:    fsgnj.h fa5, fa5, fa4
 ; ZVFHMIN-ZFH-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFH-RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFH-RV64-NEXT:    vsetivli zero, 3, e16, mf4, ta, ma
@@ -2979,44 +2881,41 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a1, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, sp, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a2)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a2, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a4, a2, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a3, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a5, a1, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a6, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a3, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a7, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a5, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a4, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a3, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    not a1, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
@@ -3036,44 +2935,41 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 4, e16, mf4, ta, ma
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vfncvt.f.f.w v8, v9
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a1, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a2, sp, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a2)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a2, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a4, a2, -1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a3, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a5, a1, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a6, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a3, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a7, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a5, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a4, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a3, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    not a1, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a6
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
@@ -3336,59 +3232,20 @@ define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) {
 ;
 ; ZVFHMIN-LABEL: fmsub_v8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a2)
 ; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    mv a1, sp
-; ZVFHMIN-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-NEXT:    flh fa4, 0(sp)
-; ZVFHMIN-NEXT:    flh fa3, 4(sp)
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-NEXT:    lui a3, 1048568
-; ZVFHMIN-NEXT:    fmv.x.h a4, fa3
-; ZVFHMIN-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-NEXT:    lui a5, 8
-; ZVFHMIN-NEXT:    xor a2, a2, a5
-; ZVFHMIN-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-NEXT:    xor a4, a4, a3
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-NEXT:    xor a2, a2, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    xor a2, a2, a5
-; ZVFHMIN-NEXT:    vmv.v.x v11, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
-; ZVFHMIN-NEXT:    xor a2, a2, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a2
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    vmv.v.i v0, 15
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
-; ZVFHMIN-NEXT:    vslidedown.vi v11, v8, 4, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v9, v11, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v9, v8, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -3412,60 +3269,21 @@ define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
 ;
 ; ZVFHMIN-LABEL: fmsub_v6f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a2)
 ; ZVFHMIN-NEXT:    vle16.v v9, (a0)
 ; ZVFHMIN-NEXT:    vle16.v v10, (a1)
-; ZVFHMIN-NEXT:    mv a1, sp
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
-; ZVFHMIN-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-NEXT:    flh fa4, 0(sp)
-; ZVFHMIN-NEXT:    flh fa3, 4(sp)
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-NEXT:    lui a3, 1048568
-; ZVFHMIN-NEXT:    fmv.x.h a4, fa3
-; ZVFHMIN-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-NEXT:    lui a5, 8
-; ZVFHMIN-NEXT:    xor a2, a2, a5
-; ZVFHMIN-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-NEXT:    xor a4, a4, a3
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-NEXT:    xor a2, a2, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    xor a2, a2, a5
-; ZVFHMIN-NEXT:    vmv.v.x v11, a2
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
-; ZVFHMIN-NEXT:    xor a2, a2, a3
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a2
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-NEXT:    xor a1, a1, a3
-; ZVFHMIN-NEXT:    vmv.v.i v0, 15
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
-; ZVFHMIN-NEXT:    vslidedown.vi v11, v8, 4, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v9, v11, v8
+; ZVFHMIN-NEXT:    vfmadd.vv v9, v8, v11
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y
@@ -3854,187 +3672,14 @@ define void @fneg_v16f16(ptr %x) {
 ; ZVFH-NEXT:    vse16.v v8, (a0)
 ; ZVFH-NEXT:    ret
 ;
-; ZVFHMIN-RV32-LABEL: fneg_v16f16:
-; ZVFHMIN-RV32:       # %bb.0:
-; ZVFHMIN-RV32-NEXT:    addi sp, sp, -64
-; ZVFHMIN-RV32-NEXT:    .cfi_def_cfa_offset 64
-; ZVFHMIN-RV32-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
-; ZVFHMIN-RV32-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
-; ZVFHMIN-RV32-NEXT:    .cfi_offset ra, -4
-; ZVFHMIN-RV32-NEXT:    .cfi_offset s0, -8
-; ZVFHMIN-RV32-NEXT:    addi s0, sp, 64
-; ZVFHMIN-RV32-NEXT:    .cfi_def_cfa s0, 0
-; ZVFHMIN-RV32-NEXT:    andi sp, sp, -32
-; ZVFHMIN-RV32-NEXT:    vsetivli zero, 16, e16, m1, ta, mu
-; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT:    mv a1, sp
-; ZVFHMIN-RV32-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-RV32-NEXT:    flh fa4, 0(sp)
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-RV32-NEXT:    flh fa3, 6(sp)
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-RV32-NEXT:    flh fa4, 8(sp)
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a5, fa3
-; ZVFHMIN-RV32-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a6, fa4
-; ZVFHMIN-RV32-NEXT:    lui a1, 1048568
-; ZVFHMIN-RV32-NEXT:    xor a2, a2, a1
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-RV32-NEXT:    lui t0, 8
-; ZVFHMIN-RV32-NEXT:    xor a3, a3, t0
-; ZVFHMIN-RV32-NEXT:    vmv.v.x v8, a3
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-RV32-NEXT:    xor a4, a4, a1
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 18(sp)
-; ZVFHMIN-RV32-NEXT:    xor a5, a5, a1
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
-; ZVFHMIN-RV32-NEXT:    xor a4, a6, a1
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 16(sp)
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV32-NEXT:    xor a4, a7, a1
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-RV32-NEXT:    xor a3, a3, a1
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-RV32-NEXT:    xor a2, a2, a1
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 22(sp)
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-RV32-NEXT:    xor a2, a4, t0
-; ZVFHMIN-RV32-NEXT:    vmv.v.x v9, a2
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-RV32-NEXT:    xor a5, a5, a1
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a5
-; ZVFHMIN-RV32-NEXT:    xor a3, a3, a1
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 26(sp)
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a3
-; ZVFHMIN-RV32-NEXT:    xor a2, a2, a1
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 28(sp)
-; ZVFHMIN-RV32-NEXT:    xor a4, a4, a1
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a4
-; ZVFHMIN-RV32-NEXT:    xor a2, a2, a1
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-RV32-NEXT:    xor a3, a3, a1
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a3
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT:    xor a1, a2, a1
-; ZVFHMIN-RV32-NEXT:    li a2, 255
-; ZVFHMIN-RV32-NEXT:    vmv.s.x v0, a2
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-RV32-NEXT:    vslidedown.vi v9, v8, 8, v0.t
-; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT:    addi sp, s0, -64
-; ZVFHMIN-RV32-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
-; ZVFHMIN-RV32-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
-; ZVFHMIN-RV32-NEXT:    addi sp, sp, 64
-; ZVFHMIN-RV32-NEXT:    ret
-;
-; ZVFHMIN-RV64-LABEL: fneg_v16f16:
-; ZVFHMIN-RV64:       # %bb.0:
-; ZVFHMIN-RV64-NEXT:    addi sp, sp, -64
-; ZVFHMIN-RV64-NEXT:    .cfi_def_cfa_offset 64
-; ZVFHMIN-RV64-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
-; ZVFHMIN-RV64-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
-; ZVFHMIN-RV64-NEXT:    .cfi_offset ra, -8
-; ZVFHMIN-RV64-NEXT:    .cfi_offset s0, -16
-; ZVFHMIN-RV64-NEXT:    addi s0, sp, 64
-; ZVFHMIN-RV64-NEXT:    .cfi_def_cfa s0, 0
-; ZVFHMIN-RV64-NEXT:    andi sp, sp, -32
-; ZVFHMIN-RV64-NEXT:    vsetivli zero, 16, e16, m1, ta, mu
-; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT:    mv a1, sp
-; ZVFHMIN-RV64-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-RV64-NEXT:    flh fa4, 0(sp)
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-RV64-NEXT:    flh fa3, 6(sp)
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-RV64-NEXT:    flh fa4, 8(sp)
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a5, fa3
-; ZVFHMIN-RV64-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a6, fa4
-; ZVFHMIN-RV64-NEXT:    lui a1, 1048568
-; ZVFHMIN-RV64-NEXT:    xor a2, a2, a1
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-RV64-NEXT:    lui t0, 8
-; ZVFHMIN-RV64-NEXT:    xor a3, a3, t0
-; ZVFHMIN-RV64-NEXT:    vmv.v.x v8, a3
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-RV64-NEXT:    xor a4, a4, a1
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 18(sp)
-; ZVFHMIN-RV64-NEXT:    xor a5, a5, a1
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
-; ZVFHMIN-RV64-NEXT:    xor a4, a6, a1
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 16(sp)
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV64-NEXT:    xor a4, a7, a1
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-RV64-NEXT:    xor a3, a3, a1
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-RV64-NEXT:    xor a2, a2, a1
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 22(sp)
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-RV64-NEXT:    xor a2, a4, t0
-; ZVFHMIN-RV64-NEXT:    vmv.v.x v9, a2
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-RV64-NEXT:    xor a5, a5, a1
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a5
-; ZVFHMIN-RV64-NEXT:    xor a3, a3, a1
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 26(sp)
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a3
-; ZVFHMIN-RV64-NEXT:    xor a2, a2, a1
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 28(sp)
-; ZVFHMIN-RV64-NEXT:    xor a4, a4, a1
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a4
-; ZVFHMIN-RV64-NEXT:    xor a2, a2, a1
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-RV64-NEXT:    xor a3, a3, a1
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a3
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT:    xor a1, a2, a1
-; ZVFHMIN-RV64-NEXT:    li a2, 255
-; ZVFHMIN-RV64-NEXT:    vmv.s.x v0, a2
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-RV64-NEXT:    vslidedown.vi v9, v8, 8, v0.t
-; ZVFHMIN-RV64-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-RV64-NEXT:    addi sp, s0, -64
-; ZVFHMIN-RV64-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
-; ZVFHMIN-RV64-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
-; ZVFHMIN-RV64-NEXT:    addi sp, sp, 64
-; ZVFHMIN-RV64-NEXT:    ret
+; ZVFHMIN-LABEL: fneg_v16f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT:    vse16.v v8, (a0)
+; ZVFHMIN-NEXT:    ret
   %a = load <16 x half>, ptr %x
   %b = fneg <16 x half> %a
   store <16 x half> %b, ptr %x
@@ -5286,60 +4931,21 @@ define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) {
 ;
 ; ZVFHMIN-LABEL: fmsub_vf_v8f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
-; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    mv a1, sp
-; ZVFHMIN-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-NEXT:    flh fa4, 0(sp)
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    lui a1, 1048568
-; ZVFHMIN-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-NEXT:    lui a5, 8
-; ZVFHMIN-NEXT:    xor a3, a3, a5
-; ZVFHMIN-NEXT:    vmv.v.x v10, a3
-; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-NEXT:    xor a2, a2, a1
-; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a2
-; ZVFHMIN-NEXT:    xor a4, a4, a1
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a4
-; ZVFHMIN-NEXT:    xor a3, a3, a1
-; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a3
-; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-NEXT:    xor a2, a2, a1
-; ZVFHMIN-NEXT:    xor a3, a3, a5
-; ZVFHMIN-NEXT:    vmv.v.x v11, a3
-; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a2
-; ZVFHMIN-NEXT:    xor a3, a3, a1
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a3
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    xor a1, a2, a1
-; ZVFHMIN-NEXT:    vmv.v.i v0, 15
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
-; ZVFHMIN-NEXT:    vslidedown.vi v11, v10, 4, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
+; ZVFHMIN-NEXT:    vmv.v.x v10, a1
+; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10
+; ZVFHMIN-NEXT:    vfmadd.vv v8, v9, v11
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v9, v8
 ; ZVFHMIN-NEXT:    vse16.v v9, (a0)
-; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %a = load <8 x half>, ptr %x
   %b = load <8 x half>, ptr %y
@@ -5363,65 +4969,26 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
 ;
 ; ZVFHMIN-LABEL: fmsub_vf_v6f16:
 ; ZVFHMIN:       # %bb.0:
-; ZVFHMIN-NEXT:    addi sp, sp, -16
-; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
-; ZVFHMIN-NEXT:    vle16.v v9, (a1)
 ; ZVFHMIN-NEXT:    vle16.v v8, (a0)
-; ZVFHMIN-NEXT:    mv a1, sp
-; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
-; ZVFHMIN-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-NEXT:    li a4, 192
-; ZVFHMIN-NEXT:    vmv.s.x v0, a4
-; ZVFHMIN-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-NEXT:    vmerge.vxm v9, v9, a3, v0
-; ZVFHMIN-NEXT:    lui a1, 1048568
-; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-NEXT:    lui a5, 8
-; ZVFHMIN-NEXT:    xor a4, a4, a5
-; ZVFHMIN-NEXT:    vmv.v.x v10, a4
-; ZVFHMIN-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-NEXT:    xor a2, a2, a1
-; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a2
-; ZVFHMIN-NEXT:    xor a3, a3, a1
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a3
-; ZVFHMIN-NEXT:    xor a4, a4, a1
-; ZVFHMIN-NEXT:    vslide1down.vx v10, v10, a4
-; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-NEXT:    xor a2, a2, a1
-; ZVFHMIN-NEXT:    xor a3, a3, a5
-; ZVFHMIN-NEXT:    vmv.v.x v11, a3
-; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a2
-; ZVFHMIN-NEXT:    xor a3, a3, a1
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a3
-; ZVFHMIN-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-NEXT:    xor a1, a2, a1
-; ZVFHMIN-NEXT:    vmv.v.i v0, 15
-; ZVFHMIN-NEXT:    vslide1down.vx v11, v11, a1
-; ZVFHMIN-NEXT:    vslidedown.vi v11, v10, 4, v0.t
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v11
-; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v9
+; ZVFHMIN-NEXT:    vle16.v v9, (a1)
+; ZVFHMIN-NEXT:    fmv.x.h a1, fa5
+; ZVFHMIN-NEXT:    li a2, 192
+; ZVFHMIN-NEXT:    vmv.s.x v0, a2
+; ZVFHMIN-NEXT:    fmv.x.h a2, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v10, a2
+; ZVFHMIN-NEXT:    vmerge.vxm v10, v10, a1, v0
+; ZVFHMIN-NEXT:    lui a1, 8
+; ZVFHMIN-NEXT:    vxor.vx v9, v9, a1
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v10
+; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v9
 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; ZVFHMIN-NEXT:    vfmadd.vv v9, v11, v10
 ; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-NEXT:    vfncvt.f.f.w v8, v9
 ; ZVFHMIN-NEXT:    vse16.v v8, (a0)
-; ZVFHMIN-NEXT:    addi sp, sp, 16
 ; ZVFHMIN-NEXT:    ret
   %a = load <6 x half>, ptr %x
   %b = load <6 x half>, ptr %y

diff  --git a/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll b/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll
index 0e993f35ce85df..8b8b897a32eda4 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll
@@ -5,22 +5,8 @@
 define arm_aapcs_vfpcc <8 x half> @fneg_float16_t(<8 x half> %src) {
 ; CHECK-MVE-LABEL: fneg_float16_t:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vmovx.f16 s4, s0
-; CHECK-MVE-NEXT:    vneg.f16 s0, s0
-; CHECK-MVE-NEXT:    vneg.f16 s4, s4
-; CHECK-MVE-NEXT:    vins.f16 s0, s4
-; CHECK-MVE-NEXT:    vmovx.f16 s4, s1
-; CHECK-MVE-NEXT:    vneg.f16 s4, s4
-; CHECK-MVE-NEXT:    vneg.f16 s1, s1
-; CHECK-MVE-NEXT:    vins.f16 s1, s4
-; CHECK-MVE-NEXT:    vmovx.f16 s4, s2
-; CHECK-MVE-NEXT:    vneg.f16 s4, s4
-; CHECK-MVE-NEXT:    vneg.f16 s2, s2
-; CHECK-MVE-NEXT:    vins.f16 s2, s4
-; CHECK-MVE-NEXT:    vmovx.f16 s4, s3
-; CHECK-MVE-NEXT:    vneg.f16 s4, s4
-; CHECK-MVE-NEXT:    vneg.f16 s3, s3
-; CHECK-MVE-NEXT:    vins.f16 s3, s4
+; CHECK-MVE-NEXT:    vmov.i16 q1, #0x8000
+; CHECK-MVE-NEXT:    veor q0, q0, q1
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: fneg_float16_t:
@@ -35,10 +21,8 @@ entry:
 define arm_aapcs_vfpcc <4 x float> @fneg_float32_t(<4 x float> %src) {
 ; CHECK-MVE-LABEL: fneg_float32_t:
 ; CHECK-MVE:       @ %bb.0: @ %entry
-; CHECK-MVE-NEXT:    vneg.f32 s3, s3
-; CHECK-MVE-NEXT:    vneg.f32 s2, s2
-; CHECK-MVE-NEXT:    vneg.f32 s1, s1
-; CHECK-MVE-NEXT:    vneg.f32 s0, s0
+; CHECK-MVE-NEXT:    vmov.i32 q1, #0x80000000
+; CHECK-MVE-NEXT:    veor q0, q0, q1
 ; CHECK-MVE-NEXT:    bx lr
 ;
 ; CHECK-MVEFP-LABEL: fneg_float32_t:
@@ -53,20 +37,17 @@ entry:
 define arm_aapcs_vfpcc <2 x double> @fneg_float64_t(<2 x double> %src) {
 ; CHECK-LABEL: fneg_float64_t:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .pad #16
-; CHECK-NEXT:    sub sp, #16
-; CHECK-NEXT:    vstr d1, [sp]
-; CHECK-NEXT:    ldrb.w r0, [sp, #7]
-; CHECK-NEXT:    vstr d0, [sp, #8]
-; CHECK-NEXT:    ldrb.w r1, [sp, #15]
-; CHECK-NEXT:    eor r0, r0, #128
-; CHECK-NEXT:    strb.w r0, [sp, #7]
-; CHECK-NEXT:    vldr d1, [sp]
-; CHECK-NEXT:    eor r0, r1, #128
-; CHECK-NEXT:    strb.w r0, [sp, #15]
-; CHECK-NEXT:    vldr d0, [sp, #8]
-; CHECK-NEXT:    add sp, #16
+; CHECK-NEXT:    adr r0, .LCPI2_0
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    veor q0, q0, q1
 ; CHECK-NEXT:    bx lr
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI2_0:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 2147483648 @ 0x80000000
 entry:
   %0 = fsub nnan ninf nsz <2 x double> <double 0.0e0, double 0.0e0>, %src
   ret <2 x double> %0


        


More information about the llvm-commits mailing list