[llvm] [RISCV] Fold (fmv_x_h/w (load)) to an integer load. (PR #109900)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 24 20:38:39 PDT 2024


https://github.com/topperc created https://github.com/llvm/llvm-project/pull/109900

None

>From b030b966c126cf5b477a0a20ee649b4d3b21281b Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Tue, 24 Sep 2024 20:37:10 -0700
Subject: [PATCH] [RISCV] Fold (fmv_x_h/w (load)) to an integer load.

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |   11 +
 .../CodeGen/RISCV/fastcc-without-f-reg.ll     |  620 ++++----
 llvm/test/CodeGen/RISCV/half-arith.ll         |   30 +-
 .../rvv/fixed-vectors-fp-buildvec-bf16.ll     |    6 +-
 .../RISCV/rvv/fixed-vectors-fp-buildvec.ll    |    6 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll     | 1408 +++++++----------
 6 files changed, 911 insertions(+), 1170 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7b00b2514c4ef1..56c9ba67bb35e0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16984,6 +16984,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
       return Op0.getOperand(0);
     }
 
+    if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
+        cast<LoadSDNode>(Op0)->isSimple()) {
+      MVT IVT = MVT::getIntegerVT(Op0.getValueSizeInBits());
+      auto *LN0 = cast<LoadSDNode>(Op0);
+      SDValue Load =
+          DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
+                         LN0->getBasePtr(), IVT, LN0->getMemOperand());
+      DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
+      return Load;
+    }
+
     // This is a target-specific version of a DAGCombine performed in
     // DAGCombiner::visitBITCAST. It performs the equivalent of:
     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
index 8e2fdfc4ba94c3..ca40ba03999739 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
@@ -246,32 +246,28 @@ define fastcc half @callee_half_32(<32 x half> %A) nounwind {
 define half @caller_half_32(<32 x half> %A) nounwind {
 ; ZHINX32-LABEL: caller_half_32:
 ; ZHINX32:       # %bb.0:
-; ZHINX32-NEXT:    addi sp, sp, -112
-; ZHINX32-NEXT:    sw ra, 108(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s0, 104(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s1, 100(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s2, 96(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s3, 92(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s4, 88(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s5, 84(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s6, 80(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s7, 76(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s8, 72(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s9, 68(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s10, 64(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s11, 60(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    lh t0, 124(sp)
-; ZHINX32-NEXT:    sw t0, 56(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    lh t0, 120(sp)
-; ZHINX32-NEXT:    sw t0, 52(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    lh t0, 116(sp)
-; ZHINX32-NEXT:    sw t0, 48(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    addi sp, sp, -96
+; ZHINX32-NEXT:    sw ra, 92(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s0, 88(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s1, 84(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s2, 80(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s3, 76(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s4, 72(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s5, 68(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s6, 64(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s7, 60(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s8, 56(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s9, 52(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s10, 48(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s11, 44(sp) # 4-byte Folded Spill
 ; ZHINX32-NEXT:    lh t0, 112(sp)
-; ZHINX32-NEXT:    sw t0, 44(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    lh t6, 128(sp)
-; ZHINX32-NEXT:    lh t5, 132(sp)
-; ZHINX32-NEXT:    lh t4, 136(sp)
-; ZHINX32-NEXT:    lh s0, 140(sp)
+; ZHINX32-NEXT:    lh t1, 116(sp)
+; ZHINX32-NEXT:    lh t2, 120(sp)
+; ZHINX32-NEXT:    lh s0, 124(sp)
+; ZHINX32-NEXT:    lh t3, 128(sp)
+; ZHINX32-NEXT:    lh t4, 132(sp)
+; ZHINX32-NEXT:    lh t5, 136(sp)
+; ZHINX32-NEXT:    lh t6, 140(sp)
 ; ZHINX32-NEXT:    lh s1, 144(sp)
 ; ZHINX32-NEXT:    lh s2, 148(sp)
 ; ZHINX32-NEXT:    lh s3, 152(sp)
@@ -284,79 +280,71 @@ define half @caller_half_32(<32 x half> %A) nounwind {
 ; ZHINX32-NEXT:    lh s10, 180(sp)
 ; ZHINX32-NEXT:    lh s11, 184(sp)
 ; ZHINX32-NEXT:    lh ra, 188(sp)
-; ZHINX32-NEXT:    lh t3, 192(sp)
-; ZHINX32-NEXT:    lh t2, 196(sp)
-; ZHINX32-NEXT:    lh t1, 200(sp)
-; ZHINX32-NEXT:    lh t0, 204(sp)
-; ZHINX32-NEXT:    sh t0, 38(sp)
-; ZHINX32-NEXT:    sh t1, 36(sp)
-; ZHINX32-NEXT:    sh t2, 34(sp)
-; ZHINX32-NEXT:    sh t3, 32(sp)
-; ZHINX32-NEXT:    sh ra, 30(sp)
-; ZHINX32-NEXT:    sh s11, 28(sp)
-; ZHINX32-NEXT:    sh s10, 26(sp)
-; ZHINX32-NEXT:    sh s9, 24(sp)
-; ZHINX32-NEXT:    sh s8, 22(sp)
-; ZHINX32-NEXT:    sh s7, 20(sp)
-; ZHINX32-NEXT:    sh s6, 18(sp)
-; ZHINX32-NEXT:    sh s5, 16(sp)
-; ZHINX32-NEXT:    sh s4, 14(sp)
-; ZHINX32-NEXT:    sh s3, 12(sp)
-; ZHINX32-NEXT:    sh s2, 10(sp)
-; ZHINX32-NEXT:    sh s1, 8(sp)
+; ZHINX32-NEXT:    sh ra, 38(sp)
+; ZHINX32-NEXT:    sh s11, 36(sp)
+; ZHINX32-NEXT:    sh s10, 34(sp)
+; ZHINX32-NEXT:    sh s9, 32(sp)
+; ZHINX32-NEXT:    sh s8, 30(sp)
+; ZHINX32-NEXT:    sh s7, 28(sp)
+; ZHINX32-NEXT:    sh s6, 26(sp)
+; ZHINX32-NEXT:    sh s5, 24(sp)
+; ZHINX32-NEXT:    sh s4, 22(sp)
+; ZHINX32-NEXT:    sh s3, 20(sp)
+; ZHINX32-NEXT:    sh s2, 18(sp)
+; ZHINX32-NEXT:    sh s1, 16(sp)
+; ZHINX32-NEXT:    sh t6, 14(sp)
+; ZHINX32-NEXT:    sh t5, 12(sp)
+; ZHINX32-NEXT:    sh t4, 10(sp)
+; ZHINX32-NEXT:    sh t3, 8(sp)
+; ZHINX32-NEXT:    lh t3, 96(sp)
+; ZHINX32-NEXT:    lh t4, 100(sp)
+; ZHINX32-NEXT:    lh t5, 104(sp)
+; ZHINX32-NEXT:    lh t6, 108(sp)
 ; ZHINX32-NEXT:    sh s0, 6(sp)
-; ZHINX32-NEXT:    sh t4, 4(sp)
-; ZHINX32-NEXT:    sh t5, 2(sp)
-; ZHINX32-NEXT:    sh t6, 0(sp)
-; ZHINX32-NEXT:    lw t3, 44(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw t4, 48(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw t5, 52(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw t6, 56(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    sh t2, 4(sp)
+; ZHINX32-NEXT:    sh t1, 2(sp)
+; ZHINX32-NEXT:    sh t0, 0(sp)
 ; ZHINX32-NEXT:    call callee_half_32
-; ZHINX32-NEXT:    lw ra, 108(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s0, 104(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s1, 100(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s2, 96(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s3, 92(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s4, 88(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s5, 84(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s6, 80(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s7, 76(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s8, 72(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s9, 68(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s10, 64(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s11, 60(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    addi sp, sp, 112
+; ZHINX32-NEXT:    lw ra, 92(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s0, 88(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s1, 84(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s2, 80(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s3, 76(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s4, 72(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s5, 68(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s6, 64(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s7, 60(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s8, 56(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s9, 52(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s10, 48(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s11, 44(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    addi sp, sp, 96
 ; ZHINX32-NEXT:    ret
 ;
 ; ZHINX64-LABEL: caller_half_32:
 ; ZHINX64:       # %bb.0:
-; ZHINX64-NEXT:    addi sp, sp, -176
-; ZHINX64-NEXT:    sd ra, 168(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s0, 160(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s1, 152(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s2, 144(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s3, 136(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s4, 128(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s5, 120(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s6, 112(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s7, 104(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s8, 96(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s9, 88(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s10, 80(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s11, 72(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    lh t0, 200(sp)
-; ZHINX64-NEXT:    sd t0, 64(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    lh t0, 192(sp)
-; ZHINX64-NEXT:    sd t0, 56(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    lh t0, 184(sp)
-; ZHINX64-NEXT:    sd t0, 48(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    addi sp, sp, -144
+; ZHINX64-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s1, 120(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s2, 112(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s3, 104(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s4, 96(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s5, 88(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s6, 80(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s7, 72(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s8, 64(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s9, 56(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s10, 48(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s11, 40(sp) # 8-byte Folded Spill
 ; ZHINX64-NEXT:    lh t0, 176(sp)
-; ZHINX64-NEXT:    sd t0, 40(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    lh t6, 208(sp)
-; ZHINX64-NEXT:    lh t5, 216(sp)
-; ZHINX64-NEXT:    lh t4, 224(sp)
-; ZHINX64-NEXT:    lh s0, 232(sp)
+; ZHINX64-NEXT:    lh t1, 184(sp)
+; ZHINX64-NEXT:    lh t2, 192(sp)
+; ZHINX64-NEXT:    lh s0, 200(sp)
+; ZHINX64-NEXT:    lh t3, 208(sp)
+; ZHINX64-NEXT:    lh t4, 216(sp)
+; ZHINX64-NEXT:    lh t5, 224(sp)
+; ZHINX64-NEXT:    lh t6, 232(sp)
 ; ZHINX64-NEXT:    lh s1, 240(sp)
 ; ZHINX64-NEXT:    lh s2, 248(sp)
 ; ZHINX64-NEXT:    lh s3, 256(sp)
@@ -369,49 +357,45 @@ define half @caller_half_32(<32 x half> %A) nounwind {
 ; ZHINX64-NEXT:    lh s10, 312(sp)
 ; ZHINX64-NEXT:    lh s11, 320(sp)
 ; ZHINX64-NEXT:    lh ra, 328(sp)
-; ZHINX64-NEXT:    lh t3, 336(sp)
-; ZHINX64-NEXT:    lh t2, 344(sp)
-; ZHINX64-NEXT:    lh t1, 352(sp)
-; ZHINX64-NEXT:    lh t0, 360(sp)
-; ZHINX64-NEXT:    sh t0, 38(sp)
-; ZHINX64-NEXT:    sh t1, 36(sp)
-; ZHINX64-NEXT:    sh t2, 34(sp)
-; ZHINX64-NEXT:    sh t3, 32(sp)
-; ZHINX64-NEXT:    sh ra, 30(sp)
-; ZHINX64-NEXT:    sh s11, 28(sp)
-; ZHINX64-NEXT:    sh s10, 26(sp)
-; ZHINX64-NEXT:    sh s9, 24(sp)
-; ZHINX64-NEXT:    sh s8, 22(sp)
-; ZHINX64-NEXT:    sh s7, 20(sp)
-; ZHINX64-NEXT:    sh s6, 18(sp)
-; ZHINX64-NEXT:    sh s5, 16(sp)
-; ZHINX64-NEXT:    sh s4, 14(sp)
-; ZHINX64-NEXT:    sh s3, 12(sp)
-; ZHINX64-NEXT:    sh s2, 10(sp)
-; ZHINX64-NEXT:    sh s1, 8(sp)
+; ZHINX64-NEXT:    sh ra, 38(sp)
+; ZHINX64-NEXT:    sh s11, 36(sp)
+; ZHINX64-NEXT:    sh s10, 34(sp)
+; ZHINX64-NEXT:    sh s9, 32(sp)
+; ZHINX64-NEXT:    sh s8, 30(sp)
+; ZHINX64-NEXT:    sh s7, 28(sp)
+; ZHINX64-NEXT:    sh s6, 26(sp)
+; ZHINX64-NEXT:    sh s5, 24(sp)
+; ZHINX64-NEXT:    sh s4, 22(sp)
+; ZHINX64-NEXT:    sh s3, 20(sp)
+; ZHINX64-NEXT:    sh s2, 18(sp)
+; ZHINX64-NEXT:    sh s1, 16(sp)
+; ZHINX64-NEXT:    sh t6, 14(sp)
+; ZHINX64-NEXT:    sh t5, 12(sp)
+; ZHINX64-NEXT:    sh t4, 10(sp)
+; ZHINX64-NEXT:    sh t3, 8(sp)
+; ZHINX64-NEXT:    lh t3, 144(sp)
+; ZHINX64-NEXT:    lh t4, 152(sp)
+; ZHINX64-NEXT:    lh t5, 160(sp)
+; ZHINX64-NEXT:    lh t6, 168(sp)
 ; ZHINX64-NEXT:    sh s0, 6(sp)
-; ZHINX64-NEXT:    sh t4, 4(sp)
-; ZHINX64-NEXT:    sh t5, 2(sp)
-; ZHINX64-NEXT:    sh t6, 0(sp)
-; ZHINX64-NEXT:    ld t3, 40(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t4, 48(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t5, 56(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t6, 64(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    sh t2, 4(sp)
+; ZHINX64-NEXT:    sh t1, 2(sp)
+; ZHINX64-NEXT:    sh t0, 0(sp)
 ; ZHINX64-NEXT:    call callee_half_32
-; ZHINX64-NEXT:    ld ra, 168(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s0, 160(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s1, 152(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s2, 144(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s3, 136(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s4, 128(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s5, 120(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s6, 112(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s7, 104(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s8, 96(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s9, 88(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s10, 80(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s11, 72(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    addi sp, sp, 176
+; ZHINX64-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s1, 120(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s2, 112(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s3, 104(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s4, 96(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s6, 80(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s7, 72(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s8, 64(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s9, 56(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s10, 48(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s11, 40(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    addi sp, sp, 144
 ; ZHINX64-NEXT:    ret
 ;
 ; ZFINX32-LABEL: caller_half_32:
@@ -917,32 +901,28 @@ define float @caller_float_32(<32 x float> %A) nounwind {
 ;
 ; ZHINX64-LABEL: caller_float_32:
 ; ZHINX64:       # %bb.0:
-; ZHINX64-NEXT:    addi sp, sp, -224
-; ZHINX64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    lw t0, 248(sp)
-; ZHINX64-NEXT:    sd t0, 112(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    lw t0, 240(sp)
-; ZHINX64-NEXT:    sd t0, 104(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    lw t0, 232(sp)
-; ZHINX64-NEXT:    sd t0, 96(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    addi sp, sp, -192
+; ZHINX64-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s1, 168(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s2, 160(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s3, 152(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s4, 144(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s5, 136(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s6, 128(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s7, 120(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s8, 112(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s9, 104(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s10, 96(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s11, 88(sp) # 8-byte Folded Spill
 ; ZHINX64-NEXT:    lw t0, 224(sp)
-; ZHINX64-NEXT:    sd t0, 88(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    lw t6, 256(sp)
-; ZHINX64-NEXT:    lw t5, 264(sp)
-; ZHINX64-NEXT:    lw t4, 272(sp)
-; ZHINX64-NEXT:    lw s0, 280(sp)
+; ZHINX64-NEXT:    lw t1, 232(sp)
+; ZHINX64-NEXT:    lw t2, 240(sp)
+; ZHINX64-NEXT:    lw s0, 248(sp)
+; ZHINX64-NEXT:    lw t3, 256(sp)
+; ZHINX64-NEXT:    lw t4, 264(sp)
+; ZHINX64-NEXT:    lw t5, 272(sp)
+; ZHINX64-NEXT:    lw t6, 280(sp)
 ; ZHINX64-NEXT:    lw s1, 288(sp)
 ; ZHINX64-NEXT:    lw s2, 296(sp)
 ; ZHINX64-NEXT:    lw s3, 304(sp)
@@ -955,49 +935,45 @@ define float @caller_float_32(<32 x float> %A) nounwind {
 ; ZHINX64-NEXT:    lw s10, 360(sp)
 ; ZHINX64-NEXT:    lw s11, 368(sp)
 ; ZHINX64-NEXT:    lw ra, 376(sp)
-; ZHINX64-NEXT:    lw t3, 384(sp)
-; ZHINX64-NEXT:    lw t2, 392(sp)
-; ZHINX64-NEXT:    lw t1, 400(sp)
-; ZHINX64-NEXT:    lw t0, 408(sp)
-; ZHINX64-NEXT:    sw t0, 76(sp)
-; ZHINX64-NEXT:    sw t1, 72(sp)
-; ZHINX64-NEXT:    sw t2, 68(sp)
-; ZHINX64-NEXT:    sw t3, 64(sp)
-; ZHINX64-NEXT:    sw ra, 60(sp)
-; ZHINX64-NEXT:    sw s11, 56(sp)
-; ZHINX64-NEXT:    sw s10, 52(sp)
-; ZHINX64-NEXT:    sw s9, 48(sp)
-; ZHINX64-NEXT:    sw s8, 44(sp)
-; ZHINX64-NEXT:    sw s7, 40(sp)
-; ZHINX64-NEXT:    sw s6, 36(sp)
-; ZHINX64-NEXT:    sw s5, 32(sp)
-; ZHINX64-NEXT:    sw s4, 28(sp)
-; ZHINX64-NEXT:    sw s3, 24(sp)
-; ZHINX64-NEXT:    sw s2, 20(sp)
-; ZHINX64-NEXT:    sw s1, 16(sp)
+; ZHINX64-NEXT:    sw ra, 76(sp)
+; ZHINX64-NEXT:    sw s11, 72(sp)
+; ZHINX64-NEXT:    sw s10, 68(sp)
+; ZHINX64-NEXT:    sw s9, 64(sp)
+; ZHINX64-NEXT:    sw s8, 60(sp)
+; ZHINX64-NEXT:    sw s7, 56(sp)
+; ZHINX64-NEXT:    sw s6, 52(sp)
+; ZHINX64-NEXT:    sw s5, 48(sp)
+; ZHINX64-NEXT:    sw s4, 44(sp)
+; ZHINX64-NEXT:    sw s3, 40(sp)
+; ZHINX64-NEXT:    sw s2, 36(sp)
+; ZHINX64-NEXT:    sw s1, 32(sp)
+; ZHINX64-NEXT:    sw t6, 28(sp)
+; ZHINX64-NEXT:    sw t5, 24(sp)
+; ZHINX64-NEXT:    sw t4, 20(sp)
+; ZHINX64-NEXT:    sw t3, 16(sp)
+; ZHINX64-NEXT:    lw t3, 192(sp)
+; ZHINX64-NEXT:    lw t4, 200(sp)
+; ZHINX64-NEXT:    lw t5, 208(sp)
+; ZHINX64-NEXT:    lw t6, 216(sp)
 ; ZHINX64-NEXT:    sw s0, 12(sp)
-; ZHINX64-NEXT:    sw t4, 8(sp)
-; ZHINX64-NEXT:    sw t5, 4(sp)
-; ZHINX64-NEXT:    sw t6, 0(sp)
-; ZHINX64-NEXT:    ld t3, 88(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t4, 96(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t5, 104(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t6, 112(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    sw t2, 8(sp)
+; ZHINX64-NEXT:    sw t1, 4(sp)
+; ZHINX64-NEXT:    sw t0, 0(sp)
 ; ZHINX64-NEXT:    call callee_float_32
-; ZHINX64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    addi sp, sp, 224
+; ZHINX64-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s1, 168(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s2, 160(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s3, 152(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s4, 144(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s5, 136(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s6, 128(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s7, 120(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s8, 112(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s9, 104(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s10, 96(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s11, 88(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    addi sp, sp, 192
 ; ZHINX64-NEXT:    ret
 ;
 ; ZFINX32-LABEL: caller_float_32:
@@ -1087,32 +1063,28 @@ define float @caller_float_32(<32 x float> %A) nounwind {
 ;
 ; ZFINX64-LABEL: caller_float_32:
 ; ZFINX64:       # %bb.0:
-; ZFINX64-NEXT:    addi sp, sp, -224
-; ZFINX64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    lw t0, 248(sp)
-; ZFINX64-NEXT:    sd t0, 112(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    lw t0, 240(sp)
-; ZFINX64-NEXT:    sd t0, 104(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    lw t0, 232(sp)
-; ZFINX64-NEXT:    sd t0, 96(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    addi sp, sp, -192
+; ZFINX64-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s1, 168(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s2, 160(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s3, 152(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s4, 144(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s5, 136(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s6, 128(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s7, 120(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s8, 112(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s9, 104(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s10, 96(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s11, 88(sp) # 8-byte Folded Spill
 ; ZFINX64-NEXT:    lw t0, 224(sp)
-; ZFINX64-NEXT:    sd t0, 88(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    lw t6, 256(sp)
-; ZFINX64-NEXT:    lw t5, 264(sp)
-; ZFINX64-NEXT:    lw t4, 272(sp)
-; ZFINX64-NEXT:    lw s0, 280(sp)
+; ZFINX64-NEXT:    lw t1, 232(sp)
+; ZFINX64-NEXT:    lw t2, 240(sp)
+; ZFINX64-NEXT:    lw s0, 248(sp)
+; ZFINX64-NEXT:    lw t3, 256(sp)
+; ZFINX64-NEXT:    lw t4, 264(sp)
+; ZFINX64-NEXT:    lw t5, 272(sp)
+; ZFINX64-NEXT:    lw t6, 280(sp)
 ; ZFINX64-NEXT:    lw s1, 288(sp)
 ; ZFINX64-NEXT:    lw s2, 296(sp)
 ; ZFINX64-NEXT:    lw s3, 304(sp)
@@ -1125,49 +1097,45 @@ define float @caller_float_32(<32 x float> %A) nounwind {
 ; ZFINX64-NEXT:    lw s10, 360(sp)
 ; ZFINX64-NEXT:    lw s11, 368(sp)
 ; ZFINX64-NEXT:    lw ra, 376(sp)
-; ZFINX64-NEXT:    lw t3, 384(sp)
-; ZFINX64-NEXT:    lw t2, 392(sp)
-; ZFINX64-NEXT:    lw t1, 400(sp)
-; ZFINX64-NEXT:    lw t0, 408(sp)
-; ZFINX64-NEXT:    sw t0, 76(sp)
-; ZFINX64-NEXT:    sw t1, 72(sp)
-; ZFINX64-NEXT:    sw t2, 68(sp)
-; ZFINX64-NEXT:    sw t3, 64(sp)
-; ZFINX64-NEXT:    sw ra, 60(sp)
-; ZFINX64-NEXT:    sw s11, 56(sp)
-; ZFINX64-NEXT:    sw s10, 52(sp)
-; ZFINX64-NEXT:    sw s9, 48(sp)
-; ZFINX64-NEXT:    sw s8, 44(sp)
-; ZFINX64-NEXT:    sw s7, 40(sp)
-; ZFINX64-NEXT:    sw s6, 36(sp)
-; ZFINX64-NEXT:    sw s5, 32(sp)
-; ZFINX64-NEXT:    sw s4, 28(sp)
-; ZFINX64-NEXT:    sw s3, 24(sp)
-; ZFINX64-NEXT:    sw s2, 20(sp)
-; ZFINX64-NEXT:    sw s1, 16(sp)
+; ZFINX64-NEXT:    sw ra, 76(sp)
+; ZFINX64-NEXT:    sw s11, 72(sp)
+; ZFINX64-NEXT:    sw s10, 68(sp)
+; ZFINX64-NEXT:    sw s9, 64(sp)
+; ZFINX64-NEXT:    sw s8, 60(sp)
+; ZFINX64-NEXT:    sw s7, 56(sp)
+; ZFINX64-NEXT:    sw s6, 52(sp)
+; ZFINX64-NEXT:    sw s5, 48(sp)
+; ZFINX64-NEXT:    sw s4, 44(sp)
+; ZFINX64-NEXT:    sw s3, 40(sp)
+; ZFINX64-NEXT:    sw s2, 36(sp)
+; ZFINX64-NEXT:    sw s1, 32(sp)
+; ZFINX64-NEXT:    sw t6, 28(sp)
+; ZFINX64-NEXT:    sw t5, 24(sp)
+; ZFINX64-NEXT:    sw t4, 20(sp)
+; ZFINX64-NEXT:    sw t3, 16(sp)
+; ZFINX64-NEXT:    lw t3, 192(sp)
+; ZFINX64-NEXT:    lw t4, 200(sp)
+; ZFINX64-NEXT:    lw t5, 208(sp)
+; ZFINX64-NEXT:    lw t6, 216(sp)
 ; ZFINX64-NEXT:    sw s0, 12(sp)
-; ZFINX64-NEXT:    sw t4, 8(sp)
-; ZFINX64-NEXT:    sw t5, 4(sp)
-; ZFINX64-NEXT:    sw t6, 0(sp)
-; ZFINX64-NEXT:    ld t3, 88(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld t4, 96(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld t5, 104(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld t6, 112(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    sw t2, 8(sp)
+; ZFINX64-NEXT:    sw t1, 4(sp)
+; ZFINX64-NEXT:    sw t0, 0(sp)
 ; ZFINX64-NEXT:    call callee_float_32
-; ZFINX64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    addi sp, sp, 224
+; ZFINX64-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s1, 168(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s2, 160(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s3, 152(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s4, 144(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s5, 136(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s6, 128(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s7, 120(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s8, 112(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s9, 104(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s10, 96(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s11, 88(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    addi sp, sp, 192
 ; ZFINX64-NEXT:    ret
 ;
 ; ZDINX32-LABEL: caller_float_32:
@@ -1257,32 +1225,28 @@ define float @caller_float_32(<32 x float> %A) nounwind {
 ;
 ; ZDINX64-LABEL: caller_float_32:
 ; ZDINX64:       # %bb.0:
-; ZDINX64-NEXT:    addi sp, sp, -224
-; ZDINX64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    lw t0, 248(sp)
-; ZDINX64-NEXT:    sd t0, 112(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    lw t0, 240(sp)
-; ZDINX64-NEXT:    sd t0, 104(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    lw t0, 232(sp)
-; ZDINX64-NEXT:    sd t0, 96(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    addi sp, sp, -192
+; ZDINX64-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s1, 168(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s2, 160(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s3, 152(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s4, 144(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s5, 136(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s6, 128(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s7, 120(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s8, 112(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s9, 104(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s10, 96(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s11, 88(sp) # 8-byte Folded Spill
 ; ZDINX64-NEXT:    lw t0, 224(sp)
-; ZDINX64-NEXT:    sd t0, 88(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    lw t6, 256(sp)
-; ZDINX64-NEXT:    lw t5, 264(sp)
-; ZDINX64-NEXT:    lw t4, 272(sp)
-; ZDINX64-NEXT:    lw s0, 280(sp)
+; ZDINX64-NEXT:    lw t1, 232(sp)
+; ZDINX64-NEXT:    lw t2, 240(sp)
+; ZDINX64-NEXT:    lw s0, 248(sp)
+; ZDINX64-NEXT:    lw t3, 256(sp)
+; ZDINX64-NEXT:    lw t4, 264(sp)
+; ZDINX64-NEXT:    lw t5, 272(sp)
+; ZDINX64-NEXT:    lw t6, 280(sp)
 ; ZDINX64-NEXT:    lw s1, 288(sp)
 ; ZDINX64-NEXT:    lw s2, 296(sp)
 ; ZDINX64-NEXT:    lw s3, 304(sp)
@@ -1295,49 +1259,45 @@ define float @caller_float_32(<32 x float> %A) nounwind {
 ; ZDINX64-NEXT:    lw s10, 360(sp)
 ; ZDINX64-NEXT:    lw s11, 368(sp)
 ; ZDINX64-NEXT:    lw ra, 376(sp)
-; ZDINX64-NEXT:    lw t3, 384(sp)
-; ZDINX64-NEXT:    lw t2, 392(sp)
-; ZDINX64-NEXT:    lw t1, 400(sp)
-; ZDINX64-NEXT:    lw t0, 408(sp)
-; ZDINX64-NEXT:    sw t0, 76(sp)
-; ZDINX64-NEXT:    sw t1, 72(sp)
-; ZDINX64-NEXT:    sw t2, 68(sp)
-; ZDINX64-NEXT:    sw t3, 64(sp)
-; ZDINX64-NEXT:    sw ra, 60(sp)
-; ZDINX64-NEXT:    sw s11, 56(sp)
-; ZDINX64-NEXT:    sw s10, 52(sp)
-; ZDINX64-NEXT:    sw s9, 48(sp)
-; ZDINX64-NEXT:    sw s8, 44(sp)
-; ZDINX64-NEXT:    sw s7, 40(sp)
-; ZDINX64-NEXT:    sw s6, 36(sp)
-; ZDINX64-NEXT:    sw s5, 32(sp)
-; ZDINX64-NEXT:    sw s4, 28(sp)
-; ZDINX64-NEXT:    sw s3, 24(sp)
-; ZDINX64-NEXT:    sw s2, 20(sp)
-; ZDINX64-NEXT:    sw s1, 16(sp)
+; ZDINX64-NEXT:    sw ra, 76(sp)
+; ZDINX64-NEXT:    sw s11, 72(sp)
+; ZDINX64-NEXT:    sw s10, 68(sp)
+; ZDINX64-NEXT:    sw s9, 64(sp)
+; ZDINX64-NEXT:    sw s8, 60(sp)
+; ZDINX64-NEXT:    sw s7, 56(sp)
+; ZDINX64-NEXT:    sw s6, 52(sp)
+; ZDINX64-NEXT:    sw s5, 48(sp)
+; ZDINX64-NEXT:    sw s4, 44(sp)
+; ZDINX64-NEXT:    sw s3, 40(sp)
+; ZDINX64-NEXT:    sw s2, 36(sp)
+; ZDINX64-NEXT:    sw s1, 32(sp)
+; ZDINX64-NEXT:    sw t6, 28(sp)
+; ZDINX64-NEXT:    sw t5, 24(sp)
+; ZDINX64-NEXT:    sw t4, 20(sp)
+; ZDINX64-NEXT:    sw t3, 16(sp)
+; ZDINX64-NEXT:    lw t3, 192(sp)
+; ZDINX64-NEXT:    lw t4, 200(sp)
+; ZDINX64-NEXT:    lw t5, 208(sp)
+; ZDINX64-NEXT:    lw t6, 216(sp)
 ; ZDINX64-NEXT:    sw s0, 12(sp)
-; ZDINX64-NEXT:    sw t4, 8(sp)
-; ZDINX64-NEXT:    sw t5, 4(sp)
-; ZDINX64-NEXT:    sw t6, 0(sp)
-; ZDINX64-NEXT:    ld t3, 88(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld t4, 96(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld t5, 104(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld t6, 112(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    sw t2, 8(sp)
+; ZDINX64-NEXT:    sw t1, 4(sp)
+; ZDINX64-NEXT:    sw t0, 0(sp)
 ; ZDINX64-NEXT:    call callee_float_32
-; ZDINX64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    addi sp, sp, 224
+; ZDINX64-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s1, 168(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s2, 160(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s3, 152(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s4, 144(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s5, 136(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s6, 128(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s7, 120(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s8, 112(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s9, 104(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s10, 96(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s11, 88(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    addi sp, sp, 192
 ; ZDINX64-NEXT:    ret
 	%C = call fastcc float @callee_float_32(<32 x float> %A)
 	ret float %C
diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll
index b033c75eeadd8b..27829f2b657593 100644
--- a/llvm/test/CodeGen/RISCV/half-arith.ll
+++ b/llvm/test/CodeGen/RISCV/half-arith.ll
@@ -2877,14 +2877,13 @@ define half @fsgnjx_f16(half %x, half %y) nounwind {
 ; RV32IZFHMIN-LABEL: fsgnjx_f16:
 ; RV32IZFHMIN:       # %bb.0:
 ; RV32IZFHMIN-NEXT:    lui a0, %hi(.LCPI23_0)
-; RV32IZFHMIN-NEXT:    flh fa5, %lo(.LCPI23_0)(a0)
-; RV32IZFHMIN-NEXT:    fmv.x.h a0, fa0
-; RV32IZFHMIN-NEXT:    lui a1, 1048568
-; RV32IZFHMIN-NEXT:    and a0, a0, a1
-; RV32IZFHMIN-NEXT:    fmv.x.h a1, fa5
-; RV32IZFHMIN-NEXT:    slli a1, a1, 17
-; RV32IZFHMIN-NEXT:    srli a1, a1, 17
-; RV32IZFHMIN-NEXT:    or a0, a1, a0
+; RV32IZFHMIN-NEXT:    lhu a0, %lo(.LCPI23_0)(a0)
+; RV32IZFHMIN-NEXT:    fmv.x.h a1, fa0
+; RV32IZFHMIN-NEXT:    lui a2, 1048568
+; RV32IZFHMIN-NEXT:    and a1, a1, a2
+; RV32IZFHMIN-NEXT:    slli a0, a0, 17
+; RV32IZFHMIN-NEXT:    srli a0, a0, 17
+; RV32IZFHMIN-NEXT:    or a0, a0, a1
 ; RV32IZFHMIN-NEXT:    fmv.h.x fa5, a0
 ; RV32IZFHMIN-NEXT:    fcvt.s.h fa5, fa5
 ; RV32IZFHMIN-NEXT:    fcvt.s.h fa4, fa1
@@ -2895,14 +2894,13 @@ define half @fsgnjx_f16(half %x, half %y) nounwind {
 ; RV64IZFHMIN-LABEL: fsgnjx_f16:
 ; RV64IZFHMIN:       # %bb.0:
 ; RV64IZFHMIN-NEXT:    lui a0, %hi(.LCPI23_0)
-; RV64IZFHMIN-NEXT:    flh fa5, %lo(.LCPI23_0)(a0)
-; RV64IZFHMIN-NEXT:    fmv.x.h a0, fa0
-; RV64IZFHMIN-NEXT:    lui a1, 1048568
-; RV64IZFHMIN-NEXT:    and a0, a0, a1
-; RV64IZFHMIN-NEXT:    fmv.x.h a1, fa5
-; RV64IZFHMIN-NEXT:    slli a1, a1, 49
-; RV64IZFHMIN-NEXT:    srli a1, a1, 49
-; RV64IZFHMIN-NEXT:    or a0, a1, a0
+; RV64IZFHMIN-NEXT:    lhu a0, %lo(.LCPI23_0)(a0)
+; RV64IZFHMIN-NEXT:    fmv.x.h a1, fa0
+; RV64IZFHMIN-NEXT:    lui a2, 1048568
+; RV64IZFHMIN-NEXT:    and a1, a1, a2
+; RV64IZFHMIN-NEXT:    slli a0, a0, 49
+; RV64IZFHMIN-NEXT:    srli a0, a0, 49
+; RV64IZFHMIN-NEXT:    or a0, a0, a1
 ; RV64IZFHMIN-NEXT:    fmv.h.x fa5, a0
 ; RV64IZFHMIN-NEXT:    fcvt.s.h fa5, fa5
 ; RV64IZFHMIN-NEXT:    fcvt.s.h fa4, fa1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll
index 170e71af09b49d..727e03125176aa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec-bf16.ll
@@ -40,8 +40,7 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) {
 ; RV32-ZFBFMIN-NEXT:    vl1r.v v8, (a2) # Unknown-size Folded Reload
 ; RV32-ZFBFMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; RV32-ZFBFMIN-NEXT:    vse16.v v8, (a1)
-; RV32-ZFBFMIN-NEXT:    flh fa5, 0(a0)
-; RV32-ZFBFMIN-NEXT:    fmv.x.h a0, fa5
+; RV32-ZFBFMIN-NEXT:    lh a0, 0(a0)
 ; RV32-ZFBFMIN-NEXT:    vmv.v.x v8, a0
 ; RV32-ZFBFMIN-NEXT:    csrr a0, vlenb
 ; RV32-ZFBFMIN-NEXT:    slli a0, a0, 1
@@ -71,8 +70,7 @@ define <4 x bfloat> @splat_idx_v4bf16(<4 x bfloat> %v, i64 %idx) {
 ; RV64-ZFBFMIN-NEXT:    vl1r.v v8, (a2) # Unknown-size Folded Reload
 ; RV64-ZFBFMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; RV64-ZFBFMIN-NEXT:    vse16.v v8, (a1)
-; RV64-ZFBFMIN-NEXT:    flh fa5, 0(a0)
-; RV64-ZFBFMIN-NEXT:    fmv.x.h a0, fa5
+; RV64-ZFBFMIN-NEXT:    lh a0, 0(a0)
 ; RV64-ZFBFMIN-NEXT:    vmv.v.x v8, a0
 ; RV64-ZFBFMIN-NEXT:    csrr a0, vlenb
 ; RV64-ZFBFMIN-NEXT:    slli a0, a0, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index b5d3e2cd776f27..bf2eb3ff0261a5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -220,8 +220,7 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) {
 ; RV32-ZFHMIN-NEXT:    vl1r.v v8, (a2) # Unknown-size Folded Reload
 ; RV32-ZFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; RV32-ZFHMIN-NEXT:    vse16.v v8, (a1)
-; RV32-ZFHMIN-NEXT:    flh fa5, 0(a0)
-; RV32-ZFHMIN-NEXT:    fmv.x.h a0, fa5
+; RV32-ZFHMIN-NEXT:    lh a0, 0(a0)
 ; RV32-ZFHMIN-NEXT:    vmv.v.x v8, a0
 ; RV32-ZFHMIN-NEXT:    csrr a0, vlenb
 ; RV32-ZFHMIN-NEXT:    slli a0, a0, 1
@@ -251,8 +250,7 @@ define <4 x half> @splat_idx_v4f16(<4 x half> %v, i64 %idx) {
 ; RV64-ZFHMIN-NEXT:    vl1r.v v8, (a2) # Unknown-size Folded Reload
 ; RV64-ZFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; RV64-ZFHMIN-NEXT:    vse16.v v8, (a1)
-; RV64-ZFHMIN-NEXT:    flh fa5, 0(a0)
-; RV64-ZFHMIN-NEXT:    fmv.x.h a0, fa5
+; RV64-ZFHMIN-NEXT:    lh a0, 0(a0)
 ; RV64-ZFHMIN-NEXT:    vmv.v.x v8, a0
 ; RV64-ZFHMIN-NEXT:    csrr a0, vlenb
 ; RV64-ZFHMIN-NEXT:    slli a0, a0, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index 5ab8eab091c2e4..d665d23dec68a4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -516,41 +516,33 @@ define void @fabs_v8f16(ptr %x) {
 ; ZVFHMIN-RV32-NEXT:    vle16.v v8, (a0)
 ; ZVFHMIN-RV32-NEXT:    mv a1, sp
 ; ZVFHMIN-RV32-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-RV32-NEXT:    flh fa4, 0(sp)
-; ZVFHMIN-RV32-NEXT:    flh fa3, 4(sp)
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-RV32-NEXT:    lui a3, 8
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a4, fa3
-; ZVFHMIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-RV32-NEXT:    addi a3, a3, -1
-; ZVFHMIN-RV32-NEXT:    and a2, a2, a3
-; ZVFHMIN-RV32-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-RV32-NEXT:    and a1, a1, a3
+; ZVFHMIN-RV32-NEXT:    lhu a1, 2(sp)
+; ZVFHMIN-RV32-NEXT:    lui a2, 8
+; ZVFHMIN-RV32-NEXT:    lhu a3, 0(sp)
+; ZVFHMIN-RV32-NEXT:    addi a2, a2, -1
+; ZVFHMIN-RV32-NEXT:    and a1, a1, a2
+; ZVFHMIN-RV32-NEXT:    lhu a4, 4(sp)
+; ZVFHMIN-RV32-NEXT:    and a3, a3, a2
+; ZVFHMIN-RV32-NEXT:    vmv.v.x v8, a3
 ; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-RV32-NEXT:    and a4, a4, a3
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-RV32-NEXT:    and a4, a4, a2
+; ZVFHMIN-RV32-NEXT:    lhu a1, 6(sp)
 ; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV32-NEXT:    and a2, a2, a3
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-RV32-NEXT:    and a1, a1, a3
-; ZVFHMIN-RV32-NEXT:    and a2, a2, a3
-; ZVFHMIN-RV32-NEXT:    vmv.v.x v9, a2
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-RV32-NEXT:    lhu a3, 10(sp)
+; ZVFHMIN-RV32-NEXT:    lhu a4, 8(sp)
+; ZVFHMIN-RV32-NEXT:    and a1, a1, a2
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-RV32-NEXT:    and a3, a3, a2
+; ZVFHMIN-RV32-NEXT:    and a4, a4, a2
+; ZVFHMIN-RV32-NEXT:    lhu a1, 12(sp)
+; ZVFHMIN-RV32-NEXT:    vmv.v.x v9, a4
+; ZVFHMIN-RV32-NEXT:    lhu a4, 14(sp)
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a3
+; ZVFHMIN-RV32-NEXT:    and a1, a1, a2
 ; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-RV32-NEXT:    and a2, a2, a3
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT:    and a1, a1, a3
+; ZVFHMIN-RV32-NEXT:    and a2, a4, a2
 ; ZVFHMIN-RV32-NEXT:    vmv.v.i v0, 15
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
 ; ZVFHMIN-RV32-NEXT:    vslidedown.vi v9, v8, 4, v0.t
 ; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
 ; ZVFHMIN-RV32-NEXT:    addi sp, sp, 16
@@ -564,41 +556,33 @@ define void @fabs_v8f16(ptr %x) {
 ; ZVFHMIN-RV64-NEXT:    vle16.v v8, (a0)
 ; ZVFHMIN-RV64-NEXT:    mv a1, sp
 ; ZVFHMIN-RV64-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-RV64-NEXT:    flh fa4, 0(sp)
-; ZVFHMIN-RV64-NEXT:    flh fa3, 4(sp)
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-RV64-NEXT:    lui a3, 8
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a4, fa3
-; ZVFHMIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-RV64-NEXT:    addiw a3, a3, -1
-; ZVFHMIN-RV64-NEXT:    and a2, a2, a3
-; ZVFHMIN-RV64-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-RV64-NEXT:    and a1, a1, a3
+; ZVFHMIN-RV64-NEXT:    lhu a1, 2(sp)
+; ZVFHMIN-RV64-NEXT:    lui a2, 8
+; ZVFHMIN-RV64-NEXT:    lhu a3, 0(sp)
+; ZVFHMIN-RV64-NEXT:    addiw a2, a2, -1
+; ZVFHMIN-RV64-NEXT:    and a1, a1, a2
+; ZVFHMIN-RV64-NEXT:    lhu a4, 4(sp)
+; ZVFHMIN-RV64-NEXT:    and a3, a3, a2
+; ZVFHMIN-RV64-NEXT:    vmv.v.x v8, a3
 ; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-RV64-NEXT:    and a4, a4, a3
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-RV64-NEXT:    and a4, a4, a2
+; ZVFHMIN-RV64-NEXT:    lhu a1, 6(sp)
 ; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV64-NEXT:    and a2, a2, a3
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-RV64-NEXT:    and a1, a1, a3
-; ZVFHMIN-RV64-NEXT:    and a2, a2, a3
-; ZVFHMIN-RV64-NEXT:    vmv.v.x v9, a2
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-RV64-NEXT:    lhu a3, 10(sp)
+; ZVFHMIN-RV64-NEXT:    lhu a4, 8(sp)
+; ZVFHMIN-RV64-NEXT:    and a1, a1, a2
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-RV64-NEXT:    and a3, a3, a2
+; ZVFHMIN-RV64-NEXT:    and a4, a4, a2
+; ZVFHMIN-RV64-NEXT:    lhu a1, 12(sp)
+; ZVFHMIN-RV64-NEXT:    vmv.v.x v9, a4
+; ZVFHMIN-RV64-NEXT:    lhu a4, 14(sp)
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a3
+; ZVFHMIN-RV64-NEXT:    and a1, a1, a2
 ; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-RV64-NEXT:    and a2, a2, a3
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT:    and a1, a1, a3
+; ZVFHMIN-RV64-NEXT:    and a2, a4, a2
 ; ZVFHMIN-RV64-NEXT:    vmv.v.i v0, 15
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
 ; ZVFHMIN-RV64-NEXT:    vslidedown.vi v9, v8, 4, v0.t
 ; ZVFHMIN-RV64-NEXT:    vse16.v v9, (a0)
 ; ZVFHMIN-RV64-NEXT:    addi sp, sp, 16
@@ -628,41 +612,33 @@ define void @fabs_v6f16(ptr %x) {
 ; ZVFHMIN-RV32-NEXT:    mv a1, sp
 ; ZVFHMIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
 ; ZVFHMIN-RV32-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-RV32-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-RV32-NEXT:    flh fa4, 0(sp)
-; ZVFHMIN-RV32-NEXT:    flh fa3, 4(sp)
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-RV32-NEXT:    lui a3, 8
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a4, fa3
-; ZVFHMIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-RV32-NEXT:    addi a3, a3, -1
-; ZVFHMIN-RV32-NEXT:    and a2, a2, a3
-; ZVFHMIN-RV32-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-RV32-NEXT:    and a1, a1, a3
+; ZVFHMIN-RV32-NEXT:    lhu a1, 2(sp)
+; ZVFHMIN-RV32-NEXT:    lui a2, 8
+; ZVFHMIN-RV32-NEXT:    lhu a3, 0(sp)
+; ZVFHMIN-RV32-NEXT:    addi a2, a2, -1
+; ZVFHMIN-RV32-NEXT:    and a1, a1, a2
+; ZVFHMIN-RV32-NEXT:    lhu a4, 4(sp)
+; ZVFHMIN-RV32-NEXT:    and a3, a3, a2
+; ZVFHMIN-RV32-NEXT:    vmv.v.x v8, a3
 ; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-RV32-NEXT:    and a4, a4, a3
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-RV32-NEXT:    and a4, a4, a2
+; ZVFHMIN-RV32-NEXT:    lhu a1, 6(sp)
 ; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV32-NEXT:    and a2, a2, a3
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-RV32-NEXT:    and a1, a1, a3
-; ZVFHMIN-RV32-NEXT:    and a2, a2, a3
-; ZVFHMIN-RV32-NEXT:    vmv.v.x v9, a2
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-RV32-NEXT:    lhu a3, 10(sp)
+; ZVFHMIN-RV32-NEXT:    lhu a4, 8(sp)
+; ZVFHMIN-RV32-NEXT:    and a1, a1, a2
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-RV32-NEXT:    and a3, a3, a2
+; ZVFHMIN-RV32-NEXT:    and a4, a4, a2
+; ZVFHMIN-RV32-NEXT:    lhu a1, 12(sp)
+; ZVFHMIN-RV32-NEXT:    vmv.v.x v9, a4
+; ZVFHMIN-RV32-NEXT:    lhu a4, 14(sp)
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a3
+; ZVFHMIN-RV32-NEXT:    and a1, a1, a2
 ; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-RV32-NEXT:    and a2, a2, a3
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-RV32-NEXT:    and a1, a1, a3
+; ZVFHMIN-RV32-NEXT:    and a2, a4, a2
 ; ZVFHMIN-RV32-NEXT:    vmv.v.i v0, 15
-; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
 ; ZVFHMIN-RV32-NEXT:    vsetivli zero, 6, e16, mf2, ta, mu
 ; ZVFHMIN-RV32-NEXT:    vslidedown.vi v9, v8, 4, v0.t
 ; ZVFHMIN-RV32-NEXT:    vse16.v v9, (a0)
@@ -678,41 +654,33 @@ define void @fabs_v6f16(ptr %x) {
 ; ZVFHMIN-RV64-NEXT:    mv a1, sp
 ; ZVFHMIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
 ; ZVFHMIN-RV64-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-RV64-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-RV64-NEXT:    flh fa4, 0(sp)
-; ZVFHMIN-RV64-NEXT:    flh fa3, 4(sp)
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa4
-; ZVFHMIN-RV64-NEXT:    lui a3, 8
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a4, fa3
-; ZVFHMIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-RV64-NEXT:    addiw a3, a3, -1
-; ZVFHMIN-RV64-NEXT:    and a2, a2, a3
-; ZVFHMIN-RV64-NEXT:    vmv.v.x v8, a2
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-RV64-NEXT:    and a1, a1, a3
+; ZVFHMIN-RV64-NEXT:    lhu a1, 2(sp)
+; ZVFHMIN-RV64-NEXT:    lui a2, 8
+; ZVFHMIN-RV64-NEXT:    lhu a3, 0(sp)
+; ZVFHMIN-RV64-NEXT:    addiw a2, a2, -1
+; ZVFHMIN-RV64-NEXT:    and a1, a1, a2
+; ZVFHMIN-RV64-NEXT:    lhu a4, 4(sp)
+; ZVFHMIN-RV64-NEXT:    and a3, a3, a2
+; ZVFHMIN-RV64-NEXT:    vmv.v.x v8, a3
 ; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-RV64-NEXT:    and a4, a4, a3
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-RV64-NEXT:    and a4, a4, a2
+; ZVFHMIN-RV64-NEXT:    lhu a1, 6(sp)
 ; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-RV64-NEXT:    and a2, a2, a3
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-RV64-NEXT:    and a1, a1, a3
-; ZVFHMIN-RV64-NEXT:    and a2, a2, a3
-; ZVFHMIN-RV64-NEXT:    vmv.v.x v9, a2
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-RV64-NEXT:    lhu a3, 10(sp)
+; ZVFHMIN-RV64-NEXT:    lhu a4, 8(sp)
+; ZVFHMIN-RV64-NEXT:    and a1, a1, a2
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
+; ZVFHMIN-RV64-NEXT:    and a3, a3, a2
+; ZVFHMIN-RV64-NEXT:    and a4, a4, a2
+; ZVFHMIN-RV64-NEXT:    lhu a1, 12(sp)
+; ZVFHMIN-RV64-NEXT:    vmv.v.x v9, a4
+; ZVFHMIN-RV64-NEXT:    lhu a4, 14(sp)
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a3
+; ZVFHMIN-RV64-NEXT:    and a1, a1, a2
 ; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-RV64-NEXT:    and a2, a2, a3
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-RV64-NEXT:    and a1, a1, a3
+; ZVFHMIN-RV64-NEXT:    and a2, a4, a2
 ; ZVFHMIN-RV64-NEXT:    vmv.v.i v0, 15
-; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
 ; ZVFHMIN-RV64-NEXT:    vsetivli zero, 6, e16, mf2, ta, mu
 ; ZVFHMIN-RV64-NEXT:    vslidedown.vi v9, v8, 4, v0.t
 ; ZVFHMIN-RV64-NEXT:    vse16.v v9, (a0)
@@ -898,71 +866,55 @@ define void @copysign_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 18(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 20(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a2, 18(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a1, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a2, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 22(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui t1, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, t1, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a3, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, t1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 26(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a5, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a4, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a5, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, a5, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a7, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a6, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a7, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a6, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a6, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a7, 4(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a4
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a7, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a6, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a7, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a7, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a6, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a3, 10(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a6, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, t0, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a7, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a3, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a6, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a7, a2
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a5, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, t2, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a7, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a6, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a5, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a7, t1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 12(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a6, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a6, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a4, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a3, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a5, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a4, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a6, a2
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
@@ -982,71 +934,55 @@ define void @copysign_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 18(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 20(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a2, 18(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a1, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a2, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 22(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui t1, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a2, t1, -1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a3, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, t1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 26(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a5, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a4, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a5, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a2, a5, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a7, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a6, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a7, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a6, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a6, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a7, 4(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a4
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a7, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a6, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a7, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a7, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a6, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a3, 10(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a6, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, t0, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a7, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a3, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a6, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a7, a2
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a5, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, t2, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a7, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a6, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 28(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a5, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a7, t1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 12(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a6, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a6, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a4, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a3, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a5, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a4, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a6, a2
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
@@ -1202,71 +1138,55 @@ define void @copysign_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 18(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 20(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a2, 18(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a1, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, a2, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 22(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui t1, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, t1, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a3, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, t1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 26(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a5, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a4, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a5, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, a5, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a7, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a6, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a7, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a6, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a6, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a7, 4(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a4
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a7, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a6, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a7, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a7, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a6, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a3, 10(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a6, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, t0, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a7, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a3, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a6, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a7, a2
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a5, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, t2, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a7, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a6, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a5, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a7, t1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 12(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a6, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a6, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a4, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a3, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a5, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a4, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a6, a2
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
@@ -1288,71 +1208,55 @@ define void @copysign_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a1)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 18(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 20(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a2, 18(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a1, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, a2, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 22(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui t1, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a2, t1, -1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a3, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, t1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 26(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a5, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a4, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a5, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a2, a5, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 16(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a7, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a6, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a7, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a6, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a6, 20(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a7, 4(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a4
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a7, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a6, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a7, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a7, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a6, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a3, 10(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a6, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, t0, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 24(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a7, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a3, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a6, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a7, a2
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a5, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, t2, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a7, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a6, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 28(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a5, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a7, t1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a5, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 12(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a6, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a6, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a4, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a3, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a5, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a4, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a6, a2
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
@@ -1521,50 +1425,42 @@ define void @copysign_vf_v8f16(ptr %x, half %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a0)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a2, 1048568
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a2, 2(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a3, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a4, 0(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a3, a3, -1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a2, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a5, 4(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a2, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a4, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a5, 8(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a2, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a2, 12(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a2, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a2
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a4, 14(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a2, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a3, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslidedown.vi v9, v8, 4, v0.t
@@ -1580,50 +1476,42 @@ define void @copysign_vf_v8f16(ptr %x, half %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a0)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a2, 1048568
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a2, 2(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a3, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a4, 0(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a3, a3, -1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a2, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a5, 4(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a2, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a4, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a5, 8(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a2, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a2, 12(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a2, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a2
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a4, 14(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a2, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a3, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslidedown.vi v9, v8, 4, v0.t
@@ -1752,54 +1640,46 @@ define void @copysign_vf_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a2, 1048568
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a3, 2(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a4, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a5, 0(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a4, a4, -1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a4
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a3, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a4
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 4(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a3, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a6, a4
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a5, 10(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a3, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 8(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a4
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a6, a4
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a3, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a2, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a1, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a3, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a3, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a2
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 6, e16, mf2, ta, mu
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslidedown.vi v9, v8, 4, v0.t
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a0)
@@ -1815,54 +1695,46 @@ define void @copysign_vf_v6f16(ptr %x, half %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa0
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a2, 1048568
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a3, 2(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a4, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a5, 0(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a4, a4, -1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a4
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a3, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a4
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 4(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a3, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a6, a4
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a5, 10(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a3, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 8(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a4
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a6, a4
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a3, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a2, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a1, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a3, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a3, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.i v0, 15
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a2
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 6, e16, mf2, ta, mu
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslidedown.vi v9, v8, 4, v0.t
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a0)
@@ -2051,77 +1923,61 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a1, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a2, sp
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a2)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, sp, 16
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a2)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 18(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, a1, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t1, a3, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a3, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 22(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, t1, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a3, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a2, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, a3, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a2, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a7, 16(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a7, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a7, 20(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a6, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a7, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, t0, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, t2, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, t1, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a7, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a7, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a4, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a7, 24(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a5, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, t0, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a6, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a5, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a7, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a5, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a6, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a5, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a4, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a6, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a4, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a6, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a5, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a4, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a6, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslidedown.vi v9, v8, 4, v0.t
@@ -2136,77 +1992,61 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a1, 8
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a2, sp
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a2)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a2, sp, 16
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a2)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 18(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a2, a1, -1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t1, a3, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a3, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 22(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, t1, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a3, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a2, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a1, a3, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a2, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a7, 16(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a7, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a7, 20(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a6, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a7, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, t0, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, t2, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, t1, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a7, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a7, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a4, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a7, 24(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a5, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 28(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, t0, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a6, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a5, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a7, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a5, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a6, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a5, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a4, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a6, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a4, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a6, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a5, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a4, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a6, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslidedown.vi v9, v8, 4, v0.t
@@ -2360,78 +2200,62 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a1, 8
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a3, 8
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a2, sp
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a2)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, sp, 16
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a2)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 18(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, a1, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and t1, a3, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a3, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 22(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, t1, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a2, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a1, a3, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a5, 18(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a2, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a7, 16(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a7, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a7, 20(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a6, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a7, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, t0, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a6, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, t2, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a7, t1, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a5, a5, a7
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a7, 22(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a7, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a4, 26(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 8(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a7, 24(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, a6, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a4, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a5, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 28(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a6, t0, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a6, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a4, a5, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a6, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a7, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a5, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a6, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v9, a3
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a5, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a4, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a5, a5, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a6, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a4, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a6, 30(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a3, a5, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a4, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a6, a2
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a1, a2
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v9, v9, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 6, e16, mf2, ta, mu
@@ -2447,78 +2271,62 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 6, e16, mf2, ta, ma
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v8, (a1)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vle16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a1, 8
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a3, 8
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a2, sp
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a2)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a2, sp, 16
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a2)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 18(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 16(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 20(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a2, a1, -1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and t1, a3, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a3, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 22(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, t1, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    mv a1, sp
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v9, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a1, sp, 16
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a2, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a1, a3, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a5, 18(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a2, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a7, 16(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a7, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a7, 20(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a6, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a7, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, t0, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a6, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, t2, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a7, t1, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h t0, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 24(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a5, a5, a7
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a7, 22(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a7, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a4, 26(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 8(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a7, 24(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, a6, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a4, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a5, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a5, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 28(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a6, t0, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a6, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a4, a5, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a6, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a7, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a5, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a6, 28(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v9, a3
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 30(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a5, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a4, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a5, a5, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a6, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a4, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a6, 30(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a3, a5, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a4, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a6, a2
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a1, a2
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.i v0, 15
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v9, v9, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 6, e16, mf2, ta, mu
@@ -2678,38 +2486,30 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, sp, 8
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a2)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a5, a1, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a6, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a2, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a7, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a2, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a3, a1, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a4, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a5, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a7, 8(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a2, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a6, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a7, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a4, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a4, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a6, 12(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a3, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a6, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a2, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a6, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a4, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a6, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a0)
@@ -2730,38 +2530,30 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a2, sp, 8
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a2)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a5, a1, -1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a6, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a2, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a7, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a2, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a3, a1, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a4, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a5, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a7, 8(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a2, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a6, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a7, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a4, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a4, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a6, 12(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a3, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a6, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a2, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a6, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a4, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a6, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a0)
@@ -2885,38 +2677,30 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a2, sp, 8
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vse16.v v8, (a2)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa3, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a4, fa3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a5, a1, -1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a6, 1048568
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a2, a3
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a7, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a2, 2(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    addi a3, a1, -1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a4, 10(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lui a5, 1048568
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a6, 0(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a7, 8(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a2, a2, a4
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a6, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a7, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a4, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a4, 4(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a6, 12(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a3, a3, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a6
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a3, a1
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a6, a5
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lhu a2, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    lh a6, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a4, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a1, a6
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV32-NEXT:    and a1, a6, a5
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFHIN-RV32-NEXT:    vsetivli zero, 3, e16, mf4, ta, ma
@@ -2939,38 +2723,30 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vxor.vx v8, v8, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    addi a2, sp, 8
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vse16.v v8, (a2)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa4, 10(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa3, 0(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa4
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a4, fa3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a5, a1, -1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a6, 1048568
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a7, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a2, a3
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a7, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a3, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a2, 2(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    addiw a3, a1, -1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a4, 10(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lui a5, 1048568
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a6, 0(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a7, 8(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a2, a2, a4
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a6, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a7, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a4, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a4, 4(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a6, 12(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vmv.v.x v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 6(sp)
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a2
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a3, a3, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a6
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a2, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    flh fa5, 14(sp)
-; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a3, a1
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a4, a4, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a6, a5
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lhu a2, 6(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    lh a6, 14(sp)
+; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a4, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    fmv.x.h a1, fa5
-; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a1, a6
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a2, a2, a3
+; ZVFHMIN-ZFHIN-RV64-NEXT:    and a1, a6, a5
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    or a1, a2, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vslide1down.vx v8, v8, a1
 ; ZVFHMIN-ZFHIN-RV64-NEXT:    vsetivli zero, 3, e16, mf4, ta, ma



More information about the llvm-commits mailing list