[llvm] 5ba40c7 - [RISCV] Custom lower FP_TO_FP16 and FP16_TO_FP to correct ABI of of libcall
Alex Bradbury via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 30 08:42:18 PDT 2023
Author: Alex Bradbury
Date: 2023-06-30T16:41:49+01:00
New Revision: 5ba40c7be35679a26d7b2c29affaad09aabf5e22
URL: https://github.com/llvm/llvm-project/commit/5ba40c7be35679a26d7b2c29affaad09aabf5e22
DIFF: https://github.com/llvm/llvm-project/commit/5ba40c7be35679a26d7b2c29affaad09aabf5e22.diff
LOG: [RISCV] Custom lower FP_TO_FP16 and FP16_TO_FP to correct ABI of of libcall
As introduced in D99148, RISC-V uses the softPromoteHalf legalisation
for fp16 values without zfh, with logic ensuring that f16 values are
passed in lower bits of FPRs (see D98670) when F or D support is
present. This legalisation produces ISD::FP_TO_FP16 and ISD::FP16_TO_FP
nodes which (as described in ISDOpcodes.h) provide a "semi-softened
interface for dealing with f16 (as an i16)". i.e. the return type of the
FP_TO_FP16 is an integer rather than a float (and the arg of FP16_TO_FP
is an integer). The remainder of the description focuses primarily on
FP_TO_FP16 for ease of explanation.
FP_TO_FP16 is lowered to a libcall to `__truncsfhf2 (float)` or
`__truncdfhf2 (double)`. As of D92241, `_Float16` is used as the return
type of these libcalls if the host compiler accepts `_Float16` in a test
input (i.e. dst_t is set to `_Float16`). `_Float16` is enabled for the
RISC-V target as of D105001 and so the return value should be passed in
an FPR on hard float ABIs.
This patch fixes the ABI issue in what appears to be a minimally
invasive way - leaving the softPromoteHalf logic undisturbed, and
lowering FP_TO_FP16 to an f32-returning libcall, converting its result
to an XLen integer value.
As can be seen in the test changes, the custom lowering for FP16_TO_FP
means the libcall is no longer tail-callable.
Although this patch fixes the issue, there are two open items:
* Redundant fmv.x.w and fmv.w.x pairs are now somtimes produced during
lowering (not a correctness issue).
* Now coverage for STRICT variants of FP16 conversion opcodes.
Differential Revision: https://reviews.llvm.org/D151284
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/calling-conv-half.ll
llvm/test/CodeGen/RISCV/copysign-casts.ll
llvm/test/CodeGen/RISCV/fp16-promote.ll
llvm/test/CodeGen/RISCV/fpclamptosat.ll
llvm/test/CodeGen/RISCV/half-convert.ll
llvm/test/CodeGen/RISCV/libcall-tail-calls.ll
llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
llvm/test/CodeGen/RISCV/rvv/pr63596.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 539aa2e5cce7e7..78e00fc06b6d66 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -352,7 +352,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
static const unsigned FPOpToExpand[] = {
ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
- ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16};
+ ISD::FREM};
static const unsigned FPRndMode[] = {
ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
@@ -430,6 +430,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
setOperationAction(ISD::FP_TO_BF16, MVT::f32,
Subtarget.isSoftFPABI() ? LibCall : Custom);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
if (Subtarget.hasStdExtZfa())
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
@@ -467,6 +469,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_BF16, MVT::f64,
Subtarget.isSoftFPABI() ? LibCall : Custom);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
}
if (Subtarget.is64Bit()) {
@@ -4960,6 +4964,35 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
return Res;
}
+ case ISD::FP_TO_FP16: {
+ // Custom lower to ensure the libcall return is passed in an FPR on hard
+ // float ABIs.
+ assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
+ SDLoc DL(Op);
+ MakeLibCallOptions CallOptions;
+ RTLIB::Libcall LC =
+ RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
+ SDValue Res =
+ makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
+ if (Subtarget.is64Bit())
+ return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
+ return DAG.getBitcast(MVT::i32, Res);
+ }
+ case ISD::FP16_TO_FP: {
+ // Custom lower to ensure the libcall argument is passed in an FPR on hard
+ // float ABIs.
+ assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
+ SDLoc DL(Op);
+ MakeLibCallOptions CallOptions;
+ SDValue Arg = Subtarget.is64Bit()
+ ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
+ Op.getOperand(0))
+ : DAG.getBitcast(MVT::f32, Op.getOperand(0));
+ SDValue Res =
+ makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
+ .first;
+ return Res;
+ }
case ISD::FTRUNC:
case ISD::FCEIL:
case ISD::FFLOOR:
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/calling-conv-half.ll
index 3b89158d7bfef2..dfc91ca145ad5e 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-half.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-half.ll
@@ -83,7 +83,6 @@ define i32 @callee_half_in_regs(i32 %a, half %b) nounwind {
; RV32-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-ILP32F-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32-ILP32F-NEXT: mv s0, a0
-; RV32-ILP32F-NEXT: fmv.x.w a0, fa0
; RV32-ILP32F-NEXT: call __extendhfsf2 at plt
; RV32-ILP32F-NEXT: fcvt.w.s a0, fa0, rtz
; RV32-ILP32F-NEXT: add a0, s0, a0
@@ -99,6 +98,7 @@ define i32 @callee_half_in_regs(i32 %a, half %b) nounwind {
; RV64-LP64F-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64-LP64F-NEXT: mv s0, a0
; RV64-LP64F-NEXT: fmv.x.w a0, fa0
+; RV64-LP64F-NEXT: fmv.w.x fa0, a0
; RV64-LP64F-NEXT: call __extendhfsf2 at plt
; RV64-LP64F-NEXT: fcvt.l.s a0, fa0, rtz
; RV64-LP64F-NEXT: addw a0, s0, a0
@@ -292,7 +292,6 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
; RV32-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-ILP32F-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32-ILP32F-NEXT: mv s0, a7
-; RV32-ILP32F-NEXT: fmv.x.w a0, fa0
; RV32-ILP32F-NEXT: call __extendhfsf2 at plt
; RV32-ILP32F-NEXT: fcvt.w.s a0, fa0, rtz
; RV32-ILP32F-NEXT: add a0, s0, a0
@@ -308,6 +307,7 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
; RV64-LP64F-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
; RV64-LP64F-NEXT: mv s0, a7
; RV64-LP64F-NEXT: fmv.x.w a0, fa0
+; RV64-LP64F-NEXT: fmv.w.x fa0, a0
; RV64-LP64F-NEXT: call __extendhfsf2 at plt
; RV64-LP64F-NEXT: fcvt.l.s a0, fa0, rtz
; RV64-LP64F-NEXT: addw a0, s0, a0
@@ -602,7 +602,6 @@ define i32 @caller_half_ret() nounwind {
; RV32-ILP32F-NEXT: addi sp, sp, -16
; RV32-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-ILP32F-NEXT: call callee_half_ret at plt
-; RV32-ILP32F-NEXT: fmv.x.w a0, fa0
; RV32-ILP32F-NEXT: call __extendhfsf2 at plt
; RV32-ILP32F-NEXT: fcvt.w.s a0, fa0, rtz
; RV32-ILP32F-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -615,6 +614,7 @@ define i32 @caller_half_ret() nounwind {
; RV64-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-LP64F-NEXT: call callee_half_ret at plt
; RV64-LP64F-NEXT: fmv.x.w a0, fa0
+; RV64-LP64F-NEXT: fmv.w.x fa0, a0
; RV64-LP64F-NEXT: call __extendhfsf2 at plt
; RV64-LP64F-NEXT: fcvt.l.s a0, fa0, rtz
; RV64-LP64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll
index 373edfaae782fe..d93f998eb3c165 100644
--- a/llvm/test/CodeGen/RISCV/copysign-casts.ll
+++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll
@@ -163,7 +163,7 @@ define double @fold_promote_d_h(double %a, half %b) nounwind {
; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fmv.d fs0, fa0
-; RV32IFD-NEXT: fmv.x.w a0, fa1
+; RV32IFD-NEXT: fmv.s fa0, fa1
; RV32IFD-NEXT: call __extendhfsf2 at plt
; RV32IFD-NEXT: fcvt.d.s fa5, fa0
; RV32IFD-NEXT: fsgnj.d fa0, fs0, fa5
@@ -179,6 +179,7 @@ define double @fold_promote_d_h(double %a, half %b) nounwind {
; RV64IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV64IFD-NEXT: fmv.d fs0, fa0
; RV64IFD-NEXT: fmv.x.w a0, fa1
+; RV64IFD-NEXT: fmv.w.x fa0, a0
; RV64IFD-NEXT: call __extendhfsf2 at plt
; RV64IFD-NEXT: fcvt.d.s fa5, fa0
; RV64IFD-NEXT: fsgnj.d fa0, fs0, fa5
@@ -264,7 +265,7 @@ define float @fold_promote_f_h(float %a, half %b) nounwind {
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fmv.s fs0, fa0
-; RV32IF-NEXT: fmv.x.w a0, fa1
+; RV32IF-NEXT: fmv.s fa0, fa1
; RV32IF-NEXT: call __extendhfsf2 at plt
; RV32IF-NEXT: fsgnj.s fa0, fs0, fa0
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -278,7 +279,7 @@ define float @fold_promote_f_h(float %a, half %b) nounwind {
; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fmv.s fs0, fa0
-; RV32IFD-NEXT: fmv.x.w a0, fa1
+; RV32IFD-NEXT: fmv.s fa0, fa1
; RV32IFD-NEXT: call __extendhfsf2 at plt
; RV32IFD-NEXT: fsgnj.s fa0, fs0, fa0
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -293,6 +294,7 @@ define float @fold_promote_f_h(float %a, half %b) nounwind {
; RV64IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV64IFD-NEXT: fmv.s fs0, fa0
; RV64IFD-NEXT: fmv.x.w a0, fa1
+; RV64IFD-NEXT: fmv.w.x fa0, a0
; RV64IFD-NEXT: call __extendhfsf2 at plt
; RV64IFD-NEXT: fsgnj.s fa0, fs0, fa0
; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/fp16-promote.ll b/llvm/test/CodeGen/RISCV/fp16-promote.ll
index ca15974b4101d0..2a03746b1f7eda 100644
--- a/llvm/test/CodeGen/RISCV/fp16-promote.ll
+++ b/llvm/test/CodeGen/RISCV/fp16-promote.ll
@@ -15,8 +15,14 @@ define void @test_load_store(ptr %p, ptr %q) nounwind {
define float @test_fpextend_float(ptr %p) nounwind {
; CHECK-LABEL: test_fpextend_float:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; CHECK-NEXT: lhu a0, 0(a0)
-; CHECK-NEXT: tail __extendhfsf2 at plt
+; CHECK-NEXT: fmv.w.x fa0, a0
+; CHECK-NEXT: call __extendhfsf2 at plt
+; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
%a = load half, ptr %p
%r = fpext half %a to float
ret float %r
@@ -28,6 +34,7 @@ define double @test_fpextend_double(ptr %p) nounwind {
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; CHECK-NEXT: lhu a0, 0(a0)
+; CHECK-NEXT: fmv.w.x fa0, a0
; CHECK-NEXT: call __extendhfsf2 at plt
; CHECK-NEXT: fcvt.d.s fa0, fa0
; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -46,6 +53,7 @@ define void @test_fptrunc_float(float %f, ptr %p) nounwind {
; CHECK-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: call __truncsfhf2 at plt
+; CHECK-NEXT: fmv.x.w a0, fa0
; CHECK-NEXT: sh a0, 0(s0)
; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; CHECK-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -64,6 +72,7 @@ define void @test_fptrunc_double(double %d, ptr %p) nounwind {
; CHECK-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: call __truncdfhf2 at plt
+; CHECK-NEXT: fmv.x.w a0, fa0
; CHECK-NEXT: sh a0, 0(s0)
; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; CHECK-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -80,22 +89,25 @@ define void @test_fadd(ptr %p, ptr %q) nounwind {
; CHECK-NEXT: addi sp, sp, -32
; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; CHECK-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; CHECK-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: mv s0, a0
-; CHECK-NEXT: lhu s1, 0(a0)
-; CHECK-NEXT: lhu a0, 0(a1)
+; CHECK-NEXT: lhu a0, 0(a0)
+; CHECK-NEXT: lhu a1, 0(a1)
+; CHECK-NEXT: fmv.w.x fs0, a0
+; CHECK-NEXT: fmv.w.x fa0, a1
; CHECK-NEXT: call __extendhfsf2 at plt
-; CHECK-NEXT: fmv.s fs0, fa0
-; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: fmv.s fs1, fa0
+; CHECK-NEXT: fmv.s fa0, fs0
; CHECK-NEXT: call __extendhfsf2 at plt
-; CHECK-NEXT: fadd.s fa0, fa0, fs0
+; CHECK-NEXT: fadd.s fa0, fa0, fs1
; CHECK-NEXT: call __truncsfhf2 at plt
+; CHECK-NEXT: fmv.x.w a0, fa0
; CHECK-NEXT: sh a0, 0(s0)
; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; CHECK-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; CHECK-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; CHECK-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: ret
%a = load half, ptr %p
@@ -111,22 +123,25 @@ define void @test_fmul(ptr %p, ptr %q) nounwind {
; CHECK-NEXT: addi sp, sp, -32
; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; CHECK-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; CHECK-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill
; CHECK-NEXT: mv s0, a0
-; CHECK-NEXT: lhu s1, 0(a0)
-; CHECK-NEXT: lhu a0, 0(a1)
+; CHECK-NEXT: lhu a0, 0(a0)
+; CHECK-NEXT: lhu a1, 0(a1)
+; CHECK-NEXT: fmv.w.x fs0, a0
+; CHECK-NEXT: fmv.w.x fa0, a1
; CHECK-NEXT: call __extendhfsf2 at plt
-; CHECK-NEXT: fmv.s fs0, fa0
-; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: fmv.s fs1, fa0
+; CHECK-NEXT: fmv.s fa0, fs0
; CHECK-NEXT: call __extendhfsf2 at plt
-; CHECK-NEXT: fmul.s fa0, fa0, fs0
+; CHECK-NEXT: fmul.s fa0, fa0, fs1
; CHECK-NEXT: call __truncsfhf2 at plt
+; CHECK-NEXT: fmv.x.w a0, fa0
; CHECK-NEXT: sh a0, 0(s0)
; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; CHECK-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; CHECK-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; CHECK-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: ret
%a = load half, ptr %p
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index ac395bab1c203e..854bfaa3e702a2 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -355,7 +355,6 @@ define i32 @stest_f16i32(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: call __fixsfdi at plt
; RV32-NEXT: lui a2, 524288
@@ -397,6 +396,7 @@ define i32 @stest_f16i32(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: fcvt.l.s a0, fa0, rtz
; RV64-NEXT: lui a1, 524288
@@ -429,7 +429,6 @@ define i32 @utesth_f16i32(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: call __fixunssfdi at plt
; RV32-NEXT: sltiu a2, a0, -1
@@ -448,6 +447,7 @@ define i32 @utesth_f16i32(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: fcvt.lu.s a0, fa0, rtz
; RV64-NEXT: li a1, -1
@@ -474,7 +474,6 @@ define i32 @ustest_f16i32(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: call __fixsfdi at plt
; RV32-NEXT: beqz a1, .LBB8_2
@@ -508,6 +507,7 @@ define i32 @ustest_f16i32(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: fcvt.l.s a0, fa0, rtz
; RV64-NEXT: li a1, -1
@@ -891,7 +891,6 @@ define i16 @stest_f16i16(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: fcvt.w.s a0, fa0, rtz
; RV32-NEXT: lui a1, 8
@@ -916,6 +915,7 @@ define i16 @stest_f16i16(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: fcvt.l.s a0, fa0, rtz
; RV64-NEXT: lui a1, 8
@@ -949,7 +949,6 @@ define i16 @utesth_f16i16(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: fcvt.wu.s a0, fa0, rtz
; RV32-NEXT: lui a1, 16
@@ -969,6 +968,7 @@ define i16 @utesth_f16i16(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: fcvt.lu.s a0, fa0, rtz
; RV64-NEXT: lui a1, 16
@@ -995,7 +995,6 @@ define i16 @ustest_f16i16(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: fcvt.w.s a0, fa0, rtz
; RV32-NEXT: lui a1, 16
@@ -1018,6 +1017,7 @@ define i16 @ustest_f16i16(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: fcvt.l.s a0, fa0, rtz
; RV64-NEXT: lui a1, 16
@@ -1666,7 +1666,6 @@ define i64 @stest_f16i64(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti at plt
@@ -1729,6 +1728,7 @@ define i64 @stest_f16i64(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: call __fixsfti at plt
; RV64-NEXT: li a2, -1
@@ -1779,7 +1779,6 @@ define i64 @utesth_f16i64(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixunssfti at plt
@@ -1808,6 +1807,7 @@ define i64 @utesth_f16i64(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: call __fixunssfti at plt
; RV64-NEXT: snez a1, a1
@@ -1831,7 +1831,6 @@ define i64 @ustest_f16i64(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti at plt
@@ -1886,6 +1885,7 @@ define i64 @ustest_f16i64(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: call __fixsfti at plt
; RV64-NEXT: slti a2, a1, 1
@@ -2249,7 +2249,6 @@ define i32 @stest_f16i32_mm(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: call __fixsfdi at plt
; RV32-NEXT: lui a2, 524288
@@ -2291,6 +2290,7 @@ define i32 @stest_f16i32_mm(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: fcvt.l.s a0, fa0, rtz
; RV64-NEXT: lui a1, 524288
@@ -2321,7 +2321,6 @@ define i32 @utesth_f16i32_mm(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: call __fixunssfdi at plt
; RV32-NEXT: seqz a1, a1
@@ -2338,6 +2337,7 @@ define i32 @utesth_f16i32_mm(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: fcvt.lu.s a0, fa0, rtz
; RV64-NEXT: li a1, -1
@@ -2363,7 +2363,6 @@ define i32 @ustest_f16i32_mm(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: call __fixsfdi at plt
; RV32-NEXT: bnez a1, .LBB35_2
@@ -2391,6 +2390,7 @@ define i32 @ustest_f16i32_mm(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: fcvt.l.s a0, fa0, rtz
; RV64-NEXT: li a1, -1
@@ -2762,7 +2762,6 @@ define i16 @stest_f16i16_mm(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: fcvt.w.s a0, fa0, rtz
; RV32-NEXT: lui a1, 8
@@ -2787,6 +2786,7 @@ define i16 @stest_f16i16_mm(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: fcvt.l.s a0, fa0, rtz
; RV64-NEXT: lui a1, 8
@@ -2818,7 +2818,6 @@ define i16 @utesth_f16i16_mm(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: fcvt.wu.s a0, fa0, rtz
; RV32-NEXT: lui a1, 16
@@ -2838,6 +2837,7 @@ define i16 @utesth_f16i16_mm(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: fcvt.lu.s a0, fa0, rtz
; RV64-NEXT: sext.w a0, a0
@@ -2864,7 +2864,6 @@ define i16 @ustest_f16i16_mm(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: fcvt.w.s a0, fa0, rtz
; RV32-NEXT: lui a1, 16
@@ -2887,6 +2886,7 @@ define i16 @ustest_f16i16_mm(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: fcvt.l.s a0, fa0, rtz
; RV64-NEXT: lui a1, 16
@@ -3465,7 +3465,6 @@ define i64 @stest_f16i64_mm(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti at plt
@@ -3528,6 +3527,7 @@ define i64 @stest_f16i64_mm(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: call __fixsfti at plt
; RV64-NEXT: li a2, -1
@@ -3576,7 +3576,6 @@ define i64 @utesth_f16i64_mm(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixunssfti at plt
@@ -3605,6 +3604,7 @@ define i64 @utesth_f16i64_mm(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: call __fixunssfti at plt
; RV64-NEXT: snez a1, a1
@@ -3627,7 +3627,6 @@ define i64 @ustest_f16i64_mm(half %x) {
; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: fmv.x.w a0, fa0
; RV32-NEXT: call __extendhfsf2 at plt
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti at plt
@@ -3666,6 +3665,7 @@ define i64 @ustest_f16i64_mm(half %x) {
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: fmv.w.x fa0, a0
; RV64-NEXT: call __extendhfsf2 at plt
; RV64-NEXT: call __fixsfti at plt
; RV64-NEXT: mv a2, a1
diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll
index afc579c5eb9e64..28cbbcaf87c56c 100644
--- a/llvm/test/CodeGen/RISCV/half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/half-convert.ll
@@ -135,7 +135,6 @@ define i16 @fcvt_si_h(half %a) nounwind {
; RV32ID: # %bb.0:
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: fcvt.w.s a0, fa0, rtz
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -147,6 +146,7 @@ define i16 @fcvt_si_h(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.l.s a0, fa0, rtz
; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -439,7 +439,6 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
; RV32ID: # %bb.0: # %start
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: feq.s a0, fa0, fa0
; RV32ID-NEXT: neg a0, a0
@@ -460,6 +459,7 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: feq.s a0, fa0, fa0
; RV64ID-NEXT: lui a1, %hi(.LCPI1_0)
@@ -657,7 +657,6 @@ define i16 @fcvt_ui_h(half %a) nounwind {
; RV32ID: # %bb.0:
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: fcvt.wu.s a0, fa0, rtz
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -669,6 +668,7 @@ define i16 @fcvt_ui_h(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.lu.s a0, fa0, rtz
; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -911,7 +911,6 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind {
; RV32ID: # %bb.0: # %start
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: lui a0, %hi(.LCPI3_0)
; RV32ID-NEXT: flw fa5, %lo(.LCPI3_0)(a0)
@@ -928,6 +927,7 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: lui a0, %hi(.LCPI3_0)
; RV64ID-NEXT: flw fa5, %lo(.LCPI3_0)(a0)
@@ -1082,7 +1082,6 @@ define i32 @fcvt_w_h(half %a) nounwind {
; RV32ID: # %bb.0:
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: fcvt.w.s a0, fa0, rtz
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -1094,6 +1093,7 @@ define i32 @fcvt_w_h(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.l.s a0, fa0, rtz
; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -1309,7 +1309,6 @@ define i32 @fcvt_w_h_sat(half %a) nounwind {
; RV32ID: # %bb.0: # %start
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: fcvt.w.s a0, fa0, rtz
; RV32ID-NEXT: feq.s a1, fa0, fa0
@@ -1325,6 +1324,7 @@ define i32 @fcvt_w_h_sat(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.w.s a0, fa0, rtz
; RV64ID-NEXT: feq.s a1, fa0, fa0
@@ -1476,7 +1476,6 @@ define i32 @fcvt_wu_h(half %a) nounwind {
; RV32ID: # %bb.0:
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: fcvt.wu.s a0, fa0, rtz
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -1488,6 +1487,7 @@ define i32 @fcvt_wu_h(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.lu.s a0, fa0, rtz
; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -1629,7 +1629,6 @@ define i32 @fcvt_wu_h_multiple_use(half %x, ptr %y) nounwind {
; RV32ID: # %bb.0:
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: fcvt.wu.s a0, fa0, rtz
; RV32ID-NEXT: seqz a1, a0
@@ -1643,6 +1642,7 @@ define i32 @fcvt_wu_h_multiple_use(half %x, ptr %y) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.lu.s a0, fa0, rtz
; RV64ID-NEXT: seqz a1, a0
@@ -1891,7 +1891,6 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
; RV32ID: # %bb.0: # %start
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: fcvt.wu.s a0, fa0, rtz
; RV32ID-NEXT: feq.s a1, fa0, fa0
@@ -1907,6 +1906,7 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.wu.s a0, fa0, rtz
; RV64ID-NEXT: feq.s a1, fa0, fa0
@@ -2096,7 +2096,6 @@ define i64 @fcvt_l_h(half %a) nounwind {
; RV32ID: # %bb.0:
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: call __fixsfdi at plt
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -2108,6 +2107,7 @@ define i64 @fcvt_l_h(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.l.s a0, fa0, rtz
; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -2527,7 +2527,6 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: fmv.s fs0, fa0
; RV32ID-NEXT: lui a0, 913408
@@ -2566,6 +2565,7 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.l.s a0, fa0, rtz
; RV64ID-NEXT: feq.s a1, fa0, fa0
@@ -2883,7 +2883,6 @@ define i64 @fcvt_lu_h(half %a) nounwind {
; RV32ID: # %bb.0:
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: call __fixunssfdi at plt
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -2895,6 +2894,7 @@ define i64 @fcvt_lu_h(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.lu.s a0, fa0, rtz
; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -3195,7 +3195,6 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32ID-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: lui a0, %hi(.LCPI12_0)
; RV32ID-NEXT: flw fa5, %lo(.LCPI12_0)(a0)
@@ -3220,6 +3219,7 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.lu.s a0, fa0, rtz
; RV64ID-NEXT: feq.s a1, fa0, fa0
@@ -3457,6 +3457,7 @@ define half @fcvt_h_si(i16 %a) nounwind {
; RV32ID-NEXT: srai a0, a0, 16
; RV32ID-NEXT: fcvt.s.w fa0, a0
; RV32ID-NEXT: call __truncsfhf2 at plt
+; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: lui a1, 1048560
; RV32ID-NEXT: or a0, a0, a1
; RV32ID-NEXT: fmv.w.x fa0, a0
@@ -3472,6 +3473,7 @@ define half @fcvt_h_si(i16 %a) nounwind {
; RV64ID-NEXT: srai a0, a0, 48
; RV64ID-NEXT: fcvt.s.w fa0, a0
; RV64ID-NEXT: call __truncsfhf2 at plt
+; RV64ID-NEXT: fmv.x.w a0, fa0
; RV64ID-NEXT: lui a1, 1048560
; RV64ID-NEXT: or a0, a0, a1
; RV64ID-NEXT: fmv.w.x fa0, a0
@@ -3608,6 +3610,7 @@ define half @fcvt_h_si_signext(i16 signext %a) nounwind {
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: fcvt.s.w fa0, a0
; RV32ID-NEXT: call __truncsfhf2 at plt
+; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: lui a1, 1048560
; RV32ID-NEXT: or a0, a0, a1
; RV32ID-NEXT: fmv.w.x fa0, a0
@@ -3621,6 +3624,7 @@ define half @fcvt_h_si_signext(i16 signext %a) nounwind {
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fcvt.s.w fa0, a0
; RV64ID-NEXT: call __truncsfhf2 at plt
+; RV64ID-NEXT: fmv.x.w a0, fa0
; RV64ID-NEXT: lui a1, 1048560
; RV64ID-NEXT: or a0, a0, a1
; RV64ID-NEXT: fmv.w.x fa0, a0
@@ -3786,6 +3790,7 @@ define half @fcvt_h_ui(i16 %a) nounwind {
; RV32ID-NEXT: srli a0, a0, 16
; RV32ID-NEXT: fcvt.s.wu fa0, a0
; RV32ID-NEXT: call __truncsfhf2 at plt
+; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: lui a1, 1048560
; RV32ID-NEXT: or a0, a0, a1
; RV32ID-NEXT: fmv.w.x fa0, a0
@@ -3801,6 +3806,7 @@ define half @fcvt_h_ui(i16 %a) nounwind {
; RV64ID-NEXT: srli a0, a0, 48
; RV64ID-NEXT: fcvt.s.wu fa0, a0
; RV64ID-NEXT: call __truncsfhf2 at plt
+; RV64ID-NEXT: fmv.x.w a0, fa0
; RV64ID-NEXT: lui a1, 1048560
; RV64ID-NEXT: or a0, a0, a1
; RV64ID-NEXT: fmv.w.x fa0, a0
@@ -3937,6 +3943,7 @@ define half @fcvt_h_ui_zeroext(i16 zeroext %a) nounwind {
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: fcvt.s.wu fa0, a0
; RV32ID-NEXT: call __truncsfhf2 at plt
+; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: lui a1, 1048560
; RV32ID-NEXT: or a0, a0, a1
; RV32ID-NEXT: fmv.w.x fa0, a0
@@ -3950,6 +3957,7 @@ define half @fcvt_h_ui_zeroext(i16 zeroext %a) nounwind {
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fcvt.s.wu fa0, a0
; RV64ID-NEXT: call __truncsfhf2 at plt
+; RV64ID-NEXT: fmv.x.w a0, fa0
; RV64ID-NEXT: lui a1, 1048560
; RV64ID-NEXT: or a0, a0, a1
; RV64ID-NEXT: fmv.w.x fa0, a0
@@ -4075,6 +4083,7 @@ define half @fcvt_h_w(i32 %a) nounwind {
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: fcvt.s.w fa0, a0
; RV32ID-NEXT: call __truncsfhf2 at plt
+; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: lui a1, 1048560
; RV32ID-NEXT: or a0, a0, a1
; RV32ID-NEXT: fmv.w.x fa0, a0
@@ -4088,6 +4097,7 @@ define half @fcvt_h_w(i32 %a) nounwind {
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fcvt.s.w fa0, a0
; RV64ID-NEXT: call __truncsfhf2 at plt
+; RV64ID-NEXT: fmv.x.w a0, fa0
; RV64ID-NEXT: lui a1, 1048560
; RV64ID-NEXT: or a0, a0, a1
; RV64ID-NEXT: fmv.w.x fa0, a0
@@ -4225,6 +4235,7 @@ define half @fcvt_h_w_load(ptr %p) nounwind {
; RV32ID-NEXT: lw a0, 0(a0)
; RV32ID-NEXT: fcvt.s.w fa0, a0
; RV32ID-NEXT: call __truncsfhf2 at plt
+; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: lui a1, 1048560
; RV32ID-NEXT: or a0, a0, a1
; RV32ID-NEXT: fmv.w.x fa0, a0
@@ -4239,6 +4250,7 @@ define half @fcvt_h_w_load(ptr %p) nounwind {
; RV64ID-NEXT: lw a0, 0(a0)
; RV64ID-NEXT: fcvt.s.w fa0, a0
; RV64ID-NEXT: call __truncsfhf2 at plt
+; RV64ID-NEXT: fmv.x.w a0, fa0
; RV64ID-NEXT: lui a1, 1048560
; RV64ID-NEXT: or a0, a0, a1
; RV64ID-NEXT: fmv.w.x fa0, a0
@@ -4371,6 +4383,7 @@ define half @fcvt_h_wu(i32 %a) nounwind {
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: fcvt.s.wu fa0, a0
; RV32ID-NEXT: call __truncsfhf2 at plt
+; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: lui a1, 1048560
; RV32ID-NEXT: or a0, a0, a1
; RV32ID-NEXT: fmv.w.x fa0, a0
@@ -4384,6 +4397,7 @@ define half @fcvt_h_wu(i32 %a) nounwind {
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fcvt.s.wu fa0, a0
; RV64ID-NEXT: call __truncsfhf2 at plt
+; RV64ID-NEXT: fmv.x.w a0, fa0
; RV64ID-NEXT: lui a1, 1048560
; RV64ID-NEXT: or a0, a0, a1
; RV64ID-NEXT: fmv.w.x fa0, a0
@@ -4542,6 +4556,7 @@ define half @fcvt_h_wu_load(ptr %p) nounwind {
; RV32ID-NEXT: lw a0, 0(a0)
; RV32ID-NEXT: fcvt.s.wu fa0, a0
; RV32ID-NEXT: call __truncsfhf2 at plt
+; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: lui a1, 1048560
; RV32ID-NEXT: or a0, a0, a1
; RV32ID-NEXT: fmv.w.x fa0, a0
@@ -4556,6 +4571,7 @@ define half @fcvt_h_wu_load(ptr %p) nounwind {
; RV64ID-NEXT: lwu a0, 0(a0)
; RV64ID-NEXT: fcvt.s.wu fa0, a0
; RV64ID-NEXT: call __truncsfhf2 at plt
+; RV64ID-NEXT: fmv.x.w a0, fa0
; RV64ID-NEXT: lui a1, 1048560
; RV64ID-NEXT: or a0, a0, a1
; RV64ID-NEXT: fmv.w.x fa0, a0
@@ -4717,6 +4733,7 @@ define half @fcvt_h_l(i64 %a) nounwind {
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: call __floatdisf at plt
; RV32ID-NEXT: call __truncsfhf2 at plt
+; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: lui a1, 1048560
; RV32ID-NEXT: or a0, a0, a1
; RV32ID-NEXT: fmv.w.x fa0, a0
@@ -4730,6 +4747,7 @@ define half @fcvt_h_l(i64 %a) nounwind {
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fcvt.s.l fa0, a0
; RV64ID-NEXT: call __truncsfhf2 at plt
+; RV64ID-NEXT: fmv.x.w a0, fa0
; RV64ID-NEXT: lui a1, 1048560
; RV64ID-NEXT: or a0, a0, a1
; RV64ID-NEXT: fmv.w.x fa0, a0
@@ -4893,6 +4911,7 @@ define half @fcvt_h_lu(i64 %a) nounwind {
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: call __floatundisf at plt
; RV32ID-NEXT: call __truncsfhf2 at plt
+; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: lui a1, 1048560
; RV32ID-NEXT: or a0, a0, a1
; RV32ID-NEXT: fmv.w.x fa0, a0
@@ -4906,6 +4925,7 @@ define half @fcvt_h_lu(i64 %a) nounwind {
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fcvt.s.lu fa0, a0
; RV64ID-NEXT: call __truncsfhf2 at plt
+; RV64ID-NEXT: fmv.x.w a0, fa0
; RV64ID-NEXT: lui a1, 1048560
; RV64ID-NEXT: or a0, a0, a1
; RV64ID-NEXT: fmv.w.x fa0, a0
@@ -5032,6 +5052,7 @@ define half @fcvt_h_s(float %a) nounwind {
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: call __truncsfhf2 at plt
+; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: lui a1, 1048560
; RV32ID-NEXT: or a0, a0, a1
; RV32ID-NEXT: fmv.w.x fa0, a0
@@ -5044,6 +5065,7 @@ define half @fcvt_h_s(float %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: call __truncsfhf2 at plt
+; RV64ID-NEXT: fmv.x.w a0, fa0
; RV64ID-NEXT: lui a1, 1048560
; RV64ID-NEXT: or a0, a0, a1
; RV64ID-NEXT: fmv.w.x fa0, a0
@@ -5130,7 +5152,12 @@ define float @fcvt_s_h(half %a) nounwind {
;
; RV32ID-ILP32-LABEL: fcvt_s_h:
; RV32ID-ILP32: # %bb.0:
-; RV32ID-ILP32-NEXT: tail __extendhfsf2 at plt
+; RV32ID-ILP32-NEXT: addi sp, sp, -16
+; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32ID-ILP32-NEXT: call __extendhfsf2 at plt
+; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32ID-ILP32-NEXT: addi sp, sp, 16
+; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: fcvt_s_h:
; RV64ID-LP64: # %bb.0:
@@ -5143,13 +5170,23 @@ define float @fcvt_s_h(half %a) nounwind {
;
; RV32ID-LABEL: fcvt_s_h:
; RV32ID: # %bb.0:
-; RV32ID-NEXT: fmv.x.w a0, fa0
-; RV32ID-NEXT: tail __extendhfsf2 at plt
+; RV32ID-NEXT: addi sp, sp, -16
+; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: call __extendhfsf2 at plt
+; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32ID-NEXT: addi sp, sp, 16
+; RV32ID-NEXT: ret
;
; RV64ID-LABEL: fcvt_s_h:
; RV64ID: # %bb.0:
+; RV64ID-NEXT: addi sp, sp, -16
+; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
-; RV64ID-NEXT: tail __extendhfsf2 at plt
+; RV64ID-NEXT: fmv.w.x fa0, a0
+; RV64ID-NEXT: call __extendhfsf2 at plt
+; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64ID-NEXT: addi sp, sp, 16
+; RV64ID-NEXT: ret
;
; CHECK32-IZFHMIN-LABEL: fcvt_s_h:
; CHECK32-IZFHMIN: # %bb.0:
@@ -5292,6 +5329,7 @@ define half @fcvt_h_d(double %a) nounwind {
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: call __truncdfhf2 at plt
+; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: lui a1, 1048560
; RV32ID-NEXT: or a0, a0, a1
; RV32ID-NEXT: fmv.w.x fa0, a0
@@ -5304,6 +5342,7 @@ define half @fcvt_h_d(double %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: call __truncdfhf2 at plt
+; RV64ID-NEXT: fmv.x.w a0, fa0
; RV64ID-NEXT: lui a1, 1048560
; RV64ID-NEXT: or a0, a0, a1
; RV64ID-NEXT: fmv.w.x fa0, a0
@@ -5493,7 +5532,6 @@ define double @fcvt_d_h(half %a) nounwind {
; RV32ID: # %bb.0:
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: fcvt.d.s fa0, fa0
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -5505,6 +5543,7 @@ define double @fcvt_d_h(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.d.s fa0, fa0
; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -5894,6 +5933,7 @@ define signext i32 @fcvt_h_w_demanded_bits(i32 signext %0, ptr %1) nounwind {
; RV32ID-NEXT: addi s1, a0, 1
; RV32ID-NEXT: fcvt.s.w fa0, s1
; RV32ID-NEXT: call __truncsfhf2 at plt
+; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: sh a0, 0(s0)
; RV32ID-NEXT: mv a0, s1
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -5912,6 +5952,7 @@ define signext i32 @fcvt_h_w_demanded_bits(i32 signext %0, ptr %1) nounwind {
; RV64ID-NEXT: addiw s1, a0, 1
; RV64ID-NEXT: fcvt.s.w fa0, s1
; RV64ID-NEXT: call __truncsfhf2 at plt
+; RV64ID-NEXT: fmv.x.w a0, fa0
; RV64ID-NEXT: sh a0, 0(s0)
; RV64ID-NEXT: mv a0, s1
; RV64ID-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -6117,6 +6158,7 @@ define signext i32 @fcvt_h_wu_demanded_bits(i32 signext %0, ptr %1) nounwind {
; RV32ID-NEXT: addi s1, a0, 1
; RV32ID-NEXT: fcvt.s.wu fa0, s1
; RV32ID-NEXT: call __truncsfhf2 at plt
+; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: sh a0, 0(s0)
; RV32ID-NEXT: mv a0, s1
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -6135,6 +6177,7 @@ define signext i32 @fcvt_h_wu_demanded_bits(i32 signext %0, ptr %1) nounwind {
; RV64ID-NEXT: addiw s1, a0, 1
; RV64ID-NEXT: fcvt.s.wu fa0, s1
; RV64ID-NEXT: call __truncsfhf2 at plt
+; RV64ID-NEXT: fmv.x.w a0, fa0
; RV64ID-NEXT: sh a0, 0(s0)
; RV64ID-NEXT: mv a0, s1
; RV64ID-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -6293,7 +6336,6 @@ define signext i16 @fcvt_w_s_i16(half %a) nounwind {
; RV32ID: # %bb.0:
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: fcvt.w.s a0, fa0, rtz
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -6305,6 +6347,7 @@ define signext i16 @fcvt_w_s_i16(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.l.s a0, fa0, rtz
; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -6601,7 +6644,6 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV32ID: # %bb.0: # %start
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: feq.s a0, fa0, fa0
; RV32ID-NEXT: neg a0, a0
@@ -6622,6 +6664,7 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: feq.s a0, fa0, fa0
; RV64ID-NEXT: lui a1, %hi(.LCPI32_0)
@@ -6818,7 +6861,6 @@ define zeroext i16 @fcvt_wu_s_i16(half %a) nounwind {
; RV32ID: # %bb.0:
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: fcvt.wu.s a0, fa0, rtz
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -6830,6 +6872,7 @@ define zeroext i16 @fcvt_wu_s_i16(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.lu.s a0, fa0, rtz
; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -7078,7 +7121,6 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind {
; RV32ID: # %bb.0: # %start
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: lui a0, %hi(.LCPI34_0)
; RV32ID-NEXT: flw fa5, %lo(.LCPI34_0)(a0)
@@ -7095,6 +7137,7 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: lui a0, %hi(.LCPI34_0)
; RV64ID-NEXT: flw fa5, %lo(.LCPI34_0)(a0)
@@ -7263,7 +7306,6 @@ define signext i8 @fcvt_w_s_i8(half %a) nounwind {
; RV32ID: # %bb.0:
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: fcvt.w.s a0, fa0, rtz
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -7275,6 +7317,7 @@ define signext i8 @fcvt_w_s_i8(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.l.s a0, fa0, rtz
; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -7563,7 +7606,6 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind {
; RV32ID: # %bb.0: # %start
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: feq.s a0, fa0, fa0
; RV32ID-NEXT: neg a0, a0
@@ -7584,6 +7626,7 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: feq.s a0, fa0, fa0
; RV64ID-NEXT: neg a0, a0
@@ -7777,7 +7820,6 @@ define zeroext i8 @fcvt_wu_s_i8(half %a) nounwind {
; RV32ID: # %bb.0:
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: fcvt.wu.s a0, fa0, rtz
; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -7789,6 +7831,7 @@ define zeroext i8 @fcvt_wu_s_i8(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.lu.s a0, fa0, rtz
; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -8025,7 +8068,6 @@ define zeroext i8 @fcvt_wu_s_sat_i8(half %a) nounwind {
; RV32ID: # %bb.0: # %start
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: fmv.w.x fa5, zero
; RV32ID-NEXT: fmax.s fa5, fa0, fa5
@@ -8042,6 +8084,7 @@ define zeroext i8 @fcvt_wu_s_sat_i8(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fmv.w.x fa5, zero
; RV64ID-NEXT: fmax.s fa5, fa0, fa5
@@ -8305,7 +8348,6 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
; RV32ID: # %bb.0: # %start
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: fcvt.wu.s a0, fa0, rtz
; RV32ID-NEXT: feq.s a1, fa0, fa0
@@ -8321,6 +8363,7 @@ define zeroext i32 @fcvt_wu_h_sat_zext(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.wu.s a0, fa0, rtz
; RV64ID-NEXT: feq.s a1, fa0, fa0
@@ -8574,7 +8617,6 @@ define signext i32 @fcvt_w_h_sat_sext(half %a) nounwind {
; RV32ID: # %bb.0: # %start
; RV32ID-NEXT: addi sp, sp, -16
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: call __extendhfsf2 at plt
; RV32ID-NEXT: fcvt.w.s a0, fa0, rtz
; RV32ID-NEXT: feq.s a1, fa0, fa0
@@ -8590,6 +8632,7 @@ define signext i32 @fcvt_w_h_sat_sext(half %a) nounwind {
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: fmv.x.w a0, fa0
+; RV64ID-NEXT: fmv.w.x fa0, a0
; RV64ID-NEXT: call __extendhfsf2 at plt
; RV64ID-NEXT: fcvt.w.s a0, fa0, rtz
; RV64ID-NEXT: feq.s a1, fa0, fa0
diff --git a/llvm/test/CodeGen/RISCV/libcall-tail-calls.ll b/llvm/test/CodeGen/RISCV/libcall-tail-calls.ll
index 489d44256ba80c..35d75a02ddd554 100644
--- a/llvm/test/CodeGen/RISCV/libcall-tail-calls.ll
+++ b/llvm/test/CodeGen/RISCV/libcall-tail-calls.ll
@@ -120,10 +120,10 @@ define half @sin_f16(half %a) nounwind {
; RV32IFD-ILP32D: # %bb.0:
; RV32IFD-ILP32D-NEXT: addi sp, sp, -16
; RV32IFD-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IFD-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32IFD-ILP32D-NEXT: call __extendhfsf2 at plt
; RV32IFD-ILP32D-NEXT: call sinf at plt
; RV32IFD-ILP32D-NEXT: call __truncsfhf2 at plt
+; RV32IFD-ILP32D-NEXT: fmv.x.w a0, fa0
; RV32IFD-ILP32D-NEXT: lui a1, 1048560
; RV32IFD-ILP32D-NEXT: or a0, a0, a1
; RV32IFD-ILP32D-NEXT: fmv.w.x fa0, a0
@@ -135,10 +135,10 @@ define half @sin_f16(half %a) nounwind {
; RV32IF-ILP32F: # %bb.0:
; RV32IF-ILP32F-NEXT: addi sp, sp, -16
; RV32IF-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IF-ILP32F-NEXT: fmv.x.w a0, fa0
; RV32IF-ILP32F-NEXT: call __extendhfsf2 at plt
; RV32IF-ILP32F-NEXT: call sinf at plt
; RV32IF-ILP32F-NEXT: call __truncsfhf2 at plt
+; RV32IF-ILP32F-NEXT: fmv.x.w a0, fa0
; RV32IF-ILP32F-NEXT: lui a1, 1048560
; RV32IF-ILP32F-NEXT: or a0, a0, a1
; RV32IF-ILP32F-NEXT: fmv.w.x fa0, a0
@@ -177,9 +177,11 @@ define half @sin_f16(half %a) nounwind {
; RV64IFD-LP64D-NEXT: addi sp, sp, -16
; RV64IFD-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IFD-LP64D-NEXT: fmv.x.w a0, fa0
+; RV64IFD-LP64D-NEXT: fmv.w.x fa0, a0
; RV64IFD-LP64D-NEXT: call __extendhfsf2 at plt
; RV64IFD-LP64D-NEXT: call sinf at plt
; RV64IFD-LP64D-NEXT: call __truncsfhf2 at plt
+; RV64IFD-LP64D-NEXT: fmv.x.w a0, fa0
; RV64IFD-LP64D-NEXT: lui a1, 1048560
; RV64IFD-LP64D-NEXT: or a0, a0, a1
; RV64IFD-LP64D-NEXT: fmv.w.x fa0, a0
@@ -192,9 +194,11 @@ define half @sin_f16(half %a) nounwind {
; RV64IF-LP64F-NEXT: addi sp, sp, -16
; RV64IF-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IF-LP64F-NEXT: fmv.x.w a0, fa0
+; RV64IF-LP64F-NEXT: fmv.w.x fa0, a0
; RV64IF-LP64F-NEXT: call __extendhfsf2 at plt
; RV64IF-LP64F-NEXT: call sinf at plt
; RV64IF-LP64F-NEXT: call __truncsfhf2 at plt
+; RV64IF-LP64F-NEXT: fmv.x.w a0, fa0
; RV64IF-LP64F-NEXT: lui a1, 1048560
; RV64IF-LP64F-NEXT: or a0, a0, a1
; RV64IF-LP64F-NEXT: fmv.w.x fa0, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index daa2e911e13171..b64c24456caf30 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -369,17 +369,17 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: lhu s3, 8(a1)
; CHECK-NOV-NEXT: lhu a1, 16(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: mv a0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a1
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: mv a0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fcvt.l.s s2, fs2, rtz
-; CHECK-NOV-NEXT: mv a0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fcvt.l.s s1, fs2, rtz
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-NOV-NEXT: lui a1, 524288
@@ -387,7 +387,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: bge a0, a4, .LBB6_10
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a2, fs1, rtz
-; CHECK-NOV-NEXT: bge s2, a4, .LBB6_11
+; CHECK-NOV-NEXT: bge s1, a4, .LBB6_11
; CHECK-NOV-NEXT: .LBB6_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
; CHECK-NOV-NEXT: bge a2, a4, .LBB6_12
@@ -398,14 +398,14 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB6_5: # %entry
; CHECK-NOV-NEXT: bge a1, a2, .LBB6_15
; CHECK-NOV-NEXT: .LBB6_6: # %entry
-; CHECK-NOV-NEXT: bge a1, s2, .LBB6_16
+; CHECK-NOV-NEXT: bge a1, s1, .LBB6_16
; CHECK-NOV-NEXT: .LBB6_7: # %entry
; CHECK-NOV-NEXT: blt a1, a0, .LBB6_9
; CHECK-NOV-NEXT: .LBB6_8: # %entry
; CHECK-NOV-NEXT: lui a0, 524288
; CHECK-NOV-NEXT: .LBB6_9: # %entry
; CHECK-NOV-NEXT: sw a0, 12(s0)
-; CHECK-NOV-NEXT: sw s2, 8(s0)
+; CHECK-NOV-NEXT: sw s1, 8(s0)
; CHECK-NOV-NEXT: sw a2, 4(s0)
; CHECK-NOV-NEXT: sw a3, 0(s0)
; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
@@ -421,9 +421,9 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB6_10: # %entry
; CHECK-NOV-NEXT: mv a0, a4
; CHECK-NOV-NEXT: fcvt.l.s a2, fs1, rtz
-; CHECK-NOV-NEXT: blt s2, a4, .LBB6_2
+; CHECK-NOV-NEXT: blt s1, a4, .LBB6_2
; CHECK-NOV-NEXT: .LBB6_11: # %entry
-; CHECK-NOV-NEXT: mv s2, a4
+; CHECK-NOV-NEXT: mv s1, a4
; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
; CHECK-NOV-NEXT: blt a2, a4, .LBB6_3
; CHECK-NOV-NEXT: .LBB6_12: # %entry
@@ -437,9 +437,9 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: blt a1, a2, .LBB6_6
; CHECK-NOV-NEXT: .LBB6_15: # %entry
; CHECK-NOV-NEXT: lui a2, 524288
-; CHECK-NOV-NEXT: blt a1, s2, .LBB6_7
+; CHECK-NOV-NEXT: blt a1, s1, .LBB6_7
; CHECK-NOV-NEXT: .LBB6_16: # %entry
-; CHECK-NOV-NEXT: lui s2, 524288
+; CHECK-NOV-NEXT: lui s1, 524288
; CHECK-NOV-NEXT: bge a1, a0, .LBB6_8
; CHECK-NOV-NEXT: j .LBB6_9
;
@@ -463,13 +463,14 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: lhu s1, 16(a0)
; CHECK-V-NEXT: lhu s2, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
+; CHECK-V-NEXT: fmv.w.x fa0, a0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: fmv.w.x fa0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
@@ -478,7 +479,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v10, 1
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s1
+; CHECK-V-NEXT: fmv.w.x fa0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
@@ -487,7 +488,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s0
+; CHECK-V-NEXT: fmv.w.x fa0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
@@ -546,17 +547,17 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: lhu s3, 16(a1)
; CHECK-NOV-NEXT: lhu a1, 8(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: mv a0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a1
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: mv a0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fcvt.lu.s s2, fs2, rtz
-; CHECK-NOV-NEXT: mv a0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fcvt.lu.s s1, fs2, rtz
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-NOV-NEXT: li a1, -1
@@ -564,7 +565,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: bgeu a0, a1, .LBB7_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a2, fs1, rtz
-; CHECK-NOV-NEXT: bgeu s2, a1, .LBB7_7
+; CHECK-NOV-NEXT: bgeu s1, a1, .LBB7_7
; CHECK-NOV-NEXT: .LBB7_2: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a3, fs0, rtz
; CHECK-NOV-NEXT: bgeu a2, a1, .LBB7_8
@@ -575,7 +576,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB7_5: # %entry
; CHECK-NOV-NEXT: sw a3, 12(s0)
; CHECK-NOV-NEXT: sw a2, 8(s0)
-; CHECK-NOV-NEXT: sw s2, 4(s0)
+; CHECK-NOV-NEXT: sw s1, 4(s0)
; CHECK-NOV-NEXT: sw a0, 0(s0)
; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
@@ -590,9 +591,9 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB7_6: # %entry
; CHECK-NOV-NEXT: mv a0, a1
; CHECK-NOV-NEXT: fcvt.lu.s a2, fs1, rtz
-; CHECK-NOV-NEXT: bltu s2, a1, .LBB7_2
+; CHECK-NOV-NEXT: bltu s1, a1, .LBB7_2
; CHECK-NOV-NEXT: .LBB7_7: # %entry
-; CHECK-NOV-NEXT: mv s2, a1
+; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: fcvt.lu.s a3, fs0, rtz
; CHECK-NOV-NEXT: bltu a2, a1, .LBB7_3
; CHECK-NOV-NEXT: .LBB7_8: # %entry
@@ -620,13 +621,14 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: lhu s1, 16(a0)
; CHECK-V-NEXT: lhu s2, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
+; CHECK-V-NEXT: fmv.w.x fa0, a0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: fmv.w.x fa0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
@@ -635,7 +637,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v10, 1
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s1
+; CHECK-V-NEXT: fmv.w.x fa0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
@@ -644,7 +646,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s0
+; CHECK-V-NEXT: fmv.w.x fa0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
@@ -700,17 +702,17 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: lhu s3, 16(a1)
; CHECK-NOV-NEXT: lhu a1, 8(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: mv a0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a1
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: mv a0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fcvt.l.s s2, fs2, rtz
-; CHECK-NOV-NEXT: mv a0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fcvt.l.s s1, fs2, rtz
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-NOV-NEXT: li a2, -1
@@ -718,7 +720,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: bge a0, a2, .LBB8_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
-; CHECK-NOV-NEXT: bge s2, a2, .LBB8_7
+; CHECK-NOV-NEXT: bge s1, a2, .LBB8_7
; CHECK-NOV-NEXT: .LBB8_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
; CHECK-NOV-NEXT: bge a1, a2, .LBB8_8
@@ -729,12 +731,12 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB8_5: # %entry
; CHECK-NOV-NEXT: sgtz a2, a3
; CHECK-NOV-NEXT: sgtz a4, a1
-; CHECK-NOV-NEXT: sgtz a5, s2
+; CHECK-NOV-NEXT: sgtz a5, s1
; CHECK-NOV-NEXT: sgtz a6, a0
; CHECK-NOV-NEXT: negw a6, a6
; CHECK-NOV-NEXT: and a0, a6, a0
; CHECK-NOV-NEXT: negw a5, a5
-; CHECK-NOV-NEXT: and a5, a5, s2
+; CHECK-NOV-NEXT: and a5, a5, s1
; CHECK-NOV-NEXT: negw a4, a4
; CHECK-NOV-NEXT: and a1, a4, a1
; CHECK-NOV-NEXT: negw a2, a2
@@ -756,9 +758,9 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB8_6: # %entry
; CHECK-NOV-NEXT: mv a0, a2
; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
-; CHECK-NOV-NEXT: blt s2, a2, .LBB8_2
+; CHECK-NOV-NEXT: blt s1, a2, .LBB8_2
; CHECK-NOV-NEXT: .LBB8_7: # %entry
-; CHECK-NOV-NEXT: mv s2, a2
+; CHECK-NOV-NEXT: mv s1, a2
; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
; CHECK-NOV-NEXT: blt a1, a2, .LBB8_3
; CHECK-NOV-NEXT: .LBB8_8: # %entry
@@ -786,13 +788,14 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: lhu s1, 16(a0)
; CHECK-V-NEXT: lhu s2, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
+; CHECK-V-NEXT: fmv.w.x fa0, a0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: fmv.w.x fa0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
@@ -801,7 +804,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v10, 1
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s1
+; CHECK-V-NEXT: fmv.w.x fa0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
@@ -810,7 +813,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s0
+; CHECK-V-NEXT: fmv.w.x fa0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
@@ -1231,29 +1234,29 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: lhu s7, 40(a1)
; CHECK-NOV-NEXT: lhu a1, 48(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: mv a0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a1
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs6, fa0
-; CHECK-NOV-NEXT: mv a0, s7
+; CHECK-NOV-NEXT: fmv.w.x fa0, s7
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs5, fa0
-; CHECK-NOV-NEXT: mv a0, s6
+; CHECK-NOV-NEXT: fmv.w.x fa0, s6
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs4, fa0
-; CHECK-NOV-NEXT: mv a0, s5
+; CHECK-NOV-NEXT: fmv.w.x fa0, s5
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs3, fa0
-; CHECK-NOV-NEXT: mv a0, s4
+; CHECK-NOV-NEXT: fmv.w.x fa0, s4
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: mv a0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fcvt.l.s s2, fs6, rtz
-; CHECK-NOV-NEXT: mv a0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fcvt.l.s s1, fs6, rtz
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-NOV-NEXT: lui a7, 8
@@ -1261,7 +1264,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: bge a0, a7, .LBB15_18
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
-; CHECK-NOV-NEXT: bge s2, a7, .LBB15_19
+; CHECK-NOV-NEXT: bge s1, a7, .LBB15_19
; CHECK-NOV-NEXT: .LBB15_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
; CHECK-NOV-NEXT: bge a1, a7, .LBB15_20
@@ -1293,14 +1296,14 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB15_13: # %entry
; CHECK-NOV-NEXT: bge a7, a1, .LBB15_31
; CHECK-NOV-NEXT: .LBB15_14: # %entry
-; CHECK-NOV-NEXT: bge a7, s2, .LBB15_32
+; CHECK-NOV-NEXT: bge a7, s1, .LBB15_32
; CHECK-NOV-NEXT: .LBB15_15: # %entry
; CHECK-NOV-NEXT: blt a7, a0, .LBB15_17
; CHECK-NOV-NEXT: .LBB15_16: # %entry
; CHECK-NOV-NEXT: lui a0, 1048568
; CHECK-NOV-NEXT: .LBB15_17: # %entry
; CHECK-NOV-NEXT: sh a0, 14(s0)
-; CHECK-NOV-NEXT: sh s2, 12(s0)
+; CHECK-NOV-NEXT: sh s1, 12(s0)
; CHECK-NOV-NEXT: sh a1, 10(s0)
; CHECK-NOV-NEXT: sh a2, 8(s0)
; CHECK-NOV-NEXT: sh a3, 6(s0)
@@ -1328,9 +1331,9 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB15_18: # %entry
; CHECK-NOV-NEXT: mv a0, a7
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
-; CHECK-NOV-NEXT: blt s2, a7, .LBB15_2
+; CHECK-NOV-NEXT: blt s1, a7, .LBB15_2
; CHECK-NOV-NEXT: .LBB15_19: # %entry
-; CHECK-NOV-NEXT: mv s2, a7
+; CHECK-NOV-NEXT: mv s1, a7
; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
; CHECK-NOV-NEXT: blt a1, a7, .LBB15_3
; CHECK-NOV-NEXT: .LBB15_20: # %entry
@@ -1373,9 +1376,9 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: blt a7, a1, .LBB15_14
; CHECK-NOV-NEXT: .LBB15_31: # %entry
; CHECK-NOV-NEXT: lui a1, 1048568
-; CHECK-NOV-NEXT: blt a7, s2, .LBB15_15
+; CHECK-NOV-NEXT: blt a7, s1, .LBB15_15
; CHECK-NOV-NEXT: .LBB15_32: # %entry
-; CHECK-NOV-NEXT: lui s2, 1048568
+; CHECK-NOV-NEXT: lui s1, 1048568
; CHECK-NOV-NEXT: bge a7, a0, .LBB15_16
; CHECK-NOV-NEXT: j .LBB15_17
;
@@ -1411,13 +1414,14 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: lhu s5, 16(a0)
; CHECK-V-NEXT: lhu s6, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
+; CHECK-V-NEXT: fmv.w.x fa0, a0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s6
+; CHECK-V-NEXT: fmv.w.x fa0, s6
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma
@@ -1426,7 +1430,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v10, 1
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s5
+; CHECK-V-NEXT: fmv.w.x fa0, s5
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma
@@ -1435,7 +1439,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s4
+; CHECK-V-NEXT: fmv.w.x fa0, s4
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma
@@ -1444,7 +1448,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 3
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s3
+; CHECK-V-NEXT: fmv.w.x fa0, s3
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma
@@ -1453,7 +1457,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 4
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: fmv.w.x fa0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma
@@ -1462,7 +1466,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 5
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s1
+; CHECK-V-NEXT: fmv.w.x fa0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma
@@ -1471,7 +1475,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 6
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s0
+; CHECK-V-NEXT: fmv.w.x fa0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
@@ -1555,29 +1559,29 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: lhu s7, 16(a1)
; CHECK-NOV-NEXT: lhu a1, 8(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: mv a0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a1
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs6, fa0
-; CHECK-NOV-NEXT: mv a0, s7
+; CHECK-NOV-NEXT: fmv.w.x fa0, s7
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs5, fa0
-; CHECK-NOV-NEXT: mv a0, s6
+; CHECK-NOV-NEXT: fmv.w.x fa0, s6
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs4, fa0
-; CHECK-NOV-NEXT: mv a0, s5
+; CHECK-NOV-NEXT: fmv.w.x fa0, s5
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs3, fa0
-; CHECK-NOV-NEXT: mv a0, s4
+; CHECK-NOV-NEXT: fmv.w.x fa0, s4
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: mv a0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fcvt.lu.s s2, fs6, rtz
-; CHECK-NOV-NEXT: mv a0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fcvt.lu.s s1, fs6, rtz
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-NOV-NEXT: lui a1, 16
@@ -1585,7 +1589,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: bgeu a0, a1, .LBB16_10
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a2, fs5, rtz
-; CHECK-NOV-NEXT: bgeu s2, a1, .LBB16_11
+; CHECK-NOV-NEXT: bgeu s1, a1, .LBB16_11
; CHECK-NOV-NEXT: .LBB16_2: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a3, fs4, rtz
; CHECK-NOV-NEXT: bgeu a2, a1, .LBB16_12
@@ -1612,7 +1616,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: sh a4, 8(s0)
; CHECK-NOV-NEXT: sh a3, 6(s0)
; CHECK-NOV-NEXT: sh a2, 4(s0)
-; CHECK-NOV-NEXT: sh s2, 2(s0)
+; CHECK-NOV-NEXT: sh s1, 2(s0)
; CHECK-NOV-NEXT: sh a0, 0(s0)
; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
@@ -1635,9 +1639,9 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB16_10: # %entry
; CHECK-NOV-NEXT: mv a0, a1
; CHECK-NOV-NEXT: fcvt.lu.s a2, fs5, rtz
-; CHECK-NOV-NEXT: bltu s2, a1, .LBB16_2
+; CHECK-NOV-NEXT: bltu s1, a1, .LBB16_2
; CHECK-NOV-NEXT: .LBB16_11: # %entry
-; CHECK-NOV-NEXT: mv s2, a1
+; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: fcvt.lu.s a3, fs4, rtz
; CHECK-NOV-NEXT: bltu a2, a1, .LBB16_3
; CHECK-NOV-NEXT: .LBB16_12: # %entry
@@ -1693,13 +1697,14 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: lhu s5, 16(a0)
; CHECK-V-NEXT: lhu s6, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
+; CHECK-V-NEXT: fmv.w.x fa0, a0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s6
+; CHECK-V-NEXT: fmv.w.x fa0, s6
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma
@@ -1708,7 +1713,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v10, 1
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s5
+; CHECK-V-NEXT: fmv.w.x fa0, s5
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma
@@ -1717,7 +1722,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s4
+; CHECK-V-NEXT: fmv.w.x fa0, s4
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma
@@ -1726,7 +1731,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 3
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s3
+; CHECK-V-NEXT: fmv.w.x fa0, s3
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma
@@ -1735,7 +1740,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 4
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: fmv.w.x fa0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma
@@ -1744,7 +1749,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 5
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s1
+; CHECK-V-NEXT: fmv.w.x fa0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma
@@ -1753,7 +1758,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 6
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s0
+; CHECK-V-NEXT: fmv.w.x fa0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
@@ -1833,29 +1838,29 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: lhu s7, 16(a1)
; CHECK-NOV-NEXT: lhu a1, 8(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: mv a0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a1
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs6, fa0
-; CHECK-NOV-NEXT: mv a0, s7
+; CHECK-NOV-NEXT: fmv.w.x fa0, s7
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs5, fa0
-; CHECK-NOV-NEXT: mv a0, s6
+; CHECK-NOV-NEXT: fmv.w.x fa0, s6
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs4, fa0
-; CHECK-NOV-NEXT: mv a0, s5
+; CHECK-NOV-NEXT: fmv.w.x fa0, s5
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs3, fa0
-; CHECK-NOV-NEXT: mv a0, s4
+; CHECK-NOV-NEXT: fmv.w.x fa0, s4
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: mv a0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fcvt.l.s s2, fs6, rtz
-; CHECK-NOV-NEXT: mv a0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fcvt.l.s s1, fs6, rtz
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-NOV-NEXT: lui a3, 16
@@ -1863,7 +1868,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: bge a0, a3, .LBB17_10
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
-; CHECK-NOV-NEXT: bge s2, a3, .LBB17_11
+; CHECK-NOV-NEXT: bge s1, a3, .LBB17_11
; CHECK-NOV-NEXT: .LBB17_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
; CHECK-NOV-NEXT: bge a1, a3, .LBB17_12
@@ -1890,12 +1895,12 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: sgtz t2, a4
; CHECK-NOV-NEXT: sgtz t3, a2
; CHECK-NOV-NEXT: sgtz t4, a1
-; CHECK-NOV-NEXT: sgtz t5, s2
+; CHECK-NOV-NEXT: sgtz t5, s1
; CHECK-NOV-NEXT: sgtz t6, a0
; CHECK-NOV-NEXT: negw t6, t6
; CHECK-NOV-NEXT: and a0, t6, a0
; CHECK-NOV-NEXT: negw t5, t5
-; CHECK-NOV-NEXT: and t5, t5, s2
+; CHECK-NOV-NEXT: and t5, t5, s1
; CHECK-NOV-NEXT: negw t4, t4
; CHECK-NOV-NEXT: and a1, t4, a1
; CHECK-NOV-NEXT: negw t3, t3
@@ -1937,9 +1942,9 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB17_10: # %entry
; CHECK-NOV-NEXT: mv a0, a3
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
-; CHECK-NOV-NEXT: blt s2, a3, .LBB17_2
+; CHECK-NOV-NEXT: blt s1, a3, .LBB17_2
; CHECK-NOV-NEXT: .LBB17_11: # %entry
-; CHECK-NOV-NEXT: mv s2, a3
+; CHECK-NOV-NEXT: mv s1, a3
; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
; CHECK-NOV-NEXT: blt a1, a3, .LBB17_3
; CHECK-NOV-NEXT: .LBB17_12: # %entry
@@ -1995,13 +2000,14 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: lhu s5, 16(a0)
; CHECK-V-NEXT: lhu s6, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
+; CHECK-V-NEXT: fmv.w.x fa0, a0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s6
+; CHECK-V-NEXT: fmv.w.x fa0, s6
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma
@@ -2010,7 +2016,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v10, 1
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s5
+; CHECK-V-NEXT: fmv.w.x fa0, s5
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma
@@ -2019,7 +2025,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s4
+; CHECK-V-NEXT: fmv.w.x fa0, s4
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma
@@ -2028,7 +2034,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 3
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s3
+; CHECK-V-NEXT: fmv.w.x fa0, s3
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma
@@ -2037,7 +2043,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 4
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: fmv.w.x fa0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma
@@ -2046,7 +2052,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 5
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s1
+; CHECK-V-NEXT: fmv.w.x fa0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma
@@ -2055,7 +2061,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 6
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s0
+; CHECK-V-NEXT: fmv.w.x fa0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
@@ -2924,11 +2930,12 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset s1, -24
; CHECK-NOV-NEXT: .cfi_offset s2, -32
; CHECK-NOV-NEXT: mv s2, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a0
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: call __fixsfti at plt
; CHECK-NOV-NEXT: mv s0, a0
; CHECK-NOV-NEXT: mv s1, a1
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: call __fixsfti at plt
; CHECK-NOV-NEXT: mv a2, a0
@@ -3005,11 +3012,12 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: .cfi_offset s2, -32
; CHECK-V-NEXT: mv s2, a1
+; CHECK-V-NEXT: fmv.w.x fa0, a0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: call __fixsfti at plt
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: fmv.w.x fa0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: call __fixsfti at plt
; CHECK-V-NEXT: li a2, -1
@@ -3097,11 +3105,12 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset s1, -24
; CHECK-NOV-NEXT: .cfi_offset s2, -32
; CHECK-NOV-NEXT: mv s0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a0
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: call __fixunssfti at plt
; CHECK-NOV-NEXT: mv s1, a0
; CHECK-NOV-NEXT: mv s2, a1
-; CHECK-NOV-NEXT: mv a0, s0
+; CHECK-NOV-NEXT: fmv.w.x fa0, s0
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: call __fixunssfti at plt
; CHECK-NOV-NEXT: snez a1, a1
@@ -3131,11 +3140,12 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: .cfi_offset s2, -32
; CHECK-V-NEXT: mv s0, a1
+; CHECK-V-NEXT: fmv.w.x fa0, a0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: call __fixunssfti at plt
; CHECK-V-NEXT: mv s1, a0
; CHECK-V-NEXT: mv s2, a1
-; CHECK-V-NEXT: mv a0, s0
+; CHECK-V-NEXT: fmv.w.x fa0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: call __fixunssfti at plt
; CHECK-V-NEXT: snez a1, a1
@@ -3176,11 +3186,12 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset s1, -24
; CHECK-NOV-NEXT: .cfi_offset s2, -32
; CHECK-NOV-NEXT: mv s2, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a0
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: call __fixsfti at plt
; CHECK-NOV-NEXT: mv s0, a0
; CHECK-NOV-NEXT: mv s1, a1
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: call __fixsfti at plt
; CHECK-NOV-NEXT: mv a2, s1
@@ -3236,11 +3247,12 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: .cfi_offset s2, -32
; CHECK-V-NEXT: mv s2, a1
+; CHECK-V-NEXT: fmv.w.x fa0, a0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: call __fixsfti at plt
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: fmv.w.x fa0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: call __fixsfti at plt
; CHECK-V-NEXT: mv a2, s1
@@ -3653,17 +3665,17 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: lhu s3, 8(a1)
; CHECK-NOV-NEXT: lhu a1, 16(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: mv a0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a1
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: mv a0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fcvt.l.s s2, fs2, rtz
-; CHECK-NOV-NEXT: mv a0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fcvt.l.s s1, fs2, rtz
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-NOV-NEXT: lui a1, 524288
@@ -3671,7 +3683,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: bge a0, a4, .LBB33_10
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a2, fs1, rtz
-; CHECK-NOV-NEXT: bge s2, a4, .LBB33_11
+; CHECK-NOV-NEXT: bge s1, a4, .LBB33_11
; CHECK-NOV-NEXT: .LBB33_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
; CHECK-NOV-NEXT: bge a2, a4, .LBB33_12
@@ -3682,14 +3694,14 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB33_5: # %entry
; CHECK-NOV-NEXT: bge a1, a2, .LBB33_15
; CHECK-NOV-NEXT: .LBB33_6: # %entry
-; CHECK-NOV-NEXT: bge a1, s2, .LBB33_16
+; CHECK-NOV-NEXT: bge a1, s1, .LBB33_16
; CHECK-NOV-NEXT: .LBB33_7: # %entry
; CHECK-NOV-NEXT: blt a1, a0, .LBB33_9
; CHECK-NOV-NEXT: .LBB33_8: # %entry
; CHECK-NOV-NEXT: lui a0, 524288
; CHECK-NOV-NEXT: .LBB33_9: # %entry
; CHECK-NOV-NEXT: sw a0, 12(s0)
-; CHECK-NOV-NEXT: sw s2, 8(s0)
+; CHECK-NOV-NEXT: sw s1, 8(s0)
; CHECK-NOV-NEXT: sw a2, 4(s0)
; CHECK-NOV-NEXT: sw a3, 0(s0)
; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
@@ -3705,9 +3717,9 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB33_10: # %entry
; CHECK-NOV-NEXT: mv a0, a4
; CHECK-NOV-NEXT: fcvt.l.s a2, fs1, rtz
-; CHECK-NOV-NEXT: blt s2, a4, .LBB33_2
+; CHECK-NOV-NEXT: blt s1, a4, .LBB33_2
; CHECK-NOV-NEXT: .LBB33_11: # %entry
-; CHECK-NOV-NEXT: mv s2, a4
+; CHECK-NOV-NEXT: mv s1, a4
; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
; CHECK-NOV-NEXT: blt a2, a4, .LBB33_3
; CHECK-NOV-NEXT: .LBB33_12: # %entry
@@ -3721,9 +3733,9 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: blt a1, a2, .LBB33_6
; CHECK-NOV-NEXT: .LBB33_15: # %entry
; CHECK-NOV-NEXT: lui a2, 524288
-; CHECK-NOV-NEXT: blt a1, s2, .LBB33_7
+; CHECK-NOV-NEXT: blt a1, s1, .LBB33_7
; CHECK-NOV-NEXT: .LBB33_16: # %entry
-; CHECK-NOV-NEXT: lui s2, 524288
+; CHECK-NOV-NEXT: lui s1, 524288
; CHECK-NOV-NEXT: bge a1, a0, .LBB33_8
; CHECK-NOV-NEXT: j .LBB33_9
;
@@ -3747,13 +3759,14 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: lhu s1, 16(a0)
; CHECK-V-NEXT: lhu s2, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
+; CHECK-V-NEXT: fmv.w.x fa0, a0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: fmv.w.x fa0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
@@ -3762,7 +3775,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v10, 1
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s1
+; CHECK-V-NEXT: fmv.w.x fa0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
@@ -3771,7 +3784,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s0
+; CHECK-V-NEXT: fmv.w.x fa0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
@@ -3828,17 +3841,17 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: lhu s3, 16(a1)
; CHECK-NOV-NEXT: lhu a1, 8(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: mv a0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a1
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: mv a0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fcvt.lu.s s2, fs2, rtz
-; CHECK-NOV-NEXT: mv a0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fcvt.lu.s s1, fs2, rtz
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-NOV-NEXT: li a1, -1
@@ -3846,7 +3859,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: bgeu a0, a1, .LBB34_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a2, fs1, rtz
-; CHECK-NOV-NEXT: bgeu s2, a1, .LBB34_7
+; CHECK-NOV-NEXT: bgeu s1, a1, .LBB34_7
; CHECK-NOV-NEXT: .LBB34_2: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a3, fs0, rtz
; CHECK-NOV-NEXT: bgeu a2, a1, .LBB34_8
@@ -3857,7 +3870,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB34_5: # %entry
; CHECK-NOV-NEXT: sw a3, 12(s0)
; CHECK-NOV-NEXT: sw a2, 8(s0)
-; CHECK-NOV-NEXT: sw s2, 4(s0)
+; CHECK-NOV-NEXT: sw s1, 4(s0)
; CHECK-NOV-NEXT: sw a0, 0(s0)
; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
@@ -3872,9 +3885,9 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB34_6: # %entry
; CHECK-NOV-NEXT: mv a0, a1
; CHECK-NOV-NEXT: fcvt.lu.s a2, fs1, rtz
-; CHECK-NOV-NEXT: bltu s2, a1, .LBB34_2
+; CHECK-NOV-NEXT: bltu s1, a1, .LBB34_2
; CHECK-NOV-NEXT: .LBB34_7: # %entry
-; CHECK-NOV-NEXT: mv s2, a1
+; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: fcvt.lu.s a3, fs0, rtz
; CHECK-NOV-NEXT: bltu a2, a1, .LBB34_3
; CHECK-NOV-NEXT: .LBB34_8: # %entry
@@ -3902,13 +3915,14 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: lhu s1, 16(a0)
; CHECK-V-NEXT: lhu s2, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
+; CHECK-V-NEXT: fmv.w.x fa0, a0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: fmv.w.x fa0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
@@ -3917,7 +3931,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v10, 1
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s1
+; CHECK-V-NEXT: fmv.w.x fa0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
@@ -3926,7 +3940,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s0
+; CHECK-V-NEXT: fmv.w.x fa0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
@@ -3981,17 +3995,17 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: lhu s3, 8(a1)
; CHECK-NOV-NEXT: lhu a1, 16(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: mv a0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a1
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: mv a0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fcvt.l.s s2, fs2, rtz
-; CHECK-NOV-NEXT: mv a0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fcvt.l.s s1, fs2, rtz
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-NOV-NEXT: li a2, -1
@@ -3999,7 +4013,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: bge a0, a2, .LBB35_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
-; CHECK-NOV-NEXT: bge s2, a2, .LBB35_7
+; CHECK-NOV-NEXT: bge s1, a2, .LBB35_7
; CHECK-NOV-NEXT: .LBB35_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
; CHECK-NOV-NEXT: bge a1, a2, .LBB35_8
@@ -4014,9 +4028,9 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: sgtz a3, a1
; CHECK-NOV-NEXT: negw a3, a3
; CHECK-NOV-NEXT: and a1, a3, a1
-; CHECK-NOV-NEXT: sgtz a3, s2
+; CHECK-NOV-NEXT: sgtz a3, s1
; CHECK-NOV-NEXT: negw a3, a3
-; CHECK-NOV-NEXT: and a3, a3, s2
+; CHECK-NOV-NEXT: and a3, a3, s1
; CHECK-NOV-NEXT: sgtz a4, a0
; CHECK-NOV-NEXT: negw a4, a4
; CHECK-NOV-NEXT: and a0, a4, a0
@@ -4037,9 +4051,9 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB35_6: # %entry
; CHECK-NOV-NEXT: mv a0, a2
; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz
-; CHECK-NOV-NEXT: blt s2, a2, .LBB35_2
+; CHECK-NOV-NEXT: blt s1, a2, .LBB35_2
; CHECK-NOV-NEXT: .LBB35_7: # %entry
-; CHECK-NOV-NEXT: mv s2, a2
+; CHECK-NOV-NEXT: mv s1, a2
; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz
; CHECK-NOV-NEXT: blt a1, a2, .LBB35_3
; CHECK-NOV-NEXT: .LBB35_8: # %entry
@@ -4067,13 +4081,14 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: lhu s1, 16(a0)
; CHECK-V-NEXT: lhu s2, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
+; CHECK-V-NEXT: fmv.w.x fa0, a0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: fmv.w.x fa0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma
@@ -4082,7 +4097,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v10, 1
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s1
+; CHECK-V-NEXT: fmv.w.x fa0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma
@@ -4091,7 +4106,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s0
+; CHECK-V-NEXT: fmv.w.x fa0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
@@ -4500,29 +4515,29 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: lhu s7, 40(a1)
; CHECK-NOV-NEXT: lhu a1, 48(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: mv a0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a1
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs6, fa0
-; CHECK-NOV-NEXT: mv a0, s7
+; CHECK-NOV-NEXT: fmv.w.x fa0, s7
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs5, fa0
-; CHECK-NOV-NEXT: mv a0, s6
+; CHECK-NOV-NEXT: fmv.w.x fa0, s6
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs4, fa0
-; CHECK-NOV-NEXT: mv a0, s5
+; CHECK-NOV-NEXT: fmv.w.x fa0, s5
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs3, fa0
-; CHECK-NOV-NEXT: mv a0, s4
+; CHECK-NOV-NEXT: fmv.w.x fa0, s4
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: mv a0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fcvt.l.s s2, fs6, rtz
-; CHECK-NOV-NEXT: mv a0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fcvt.l.s s1, fs6, rtz
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-NOV-NEXT: lui a7, 8
@@ -4530,7 +4545,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: bge a0, a7, .LBB42_18
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
-; CHECK-NOV-NEXT: bge s2, a7, .LBB42_19
+; CHECK-NOV-NEXT: bge s1, a7, .LBB42_19
; CHECK-NOV-NEXT: .LBB42_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
; CHECK-NOV-NEXT: bge a1, a7, .LBB42_20
@@ -4562,14 +4577,14 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB42_13: # %entry
; CHECK-NOV-NEXT: bge a7, a1, .LBB42_31
; CHECK-NOV-NEXT: .LBB42_14: # %entry
-; CHECK-NOV-NEXT: bge a7, s2, .LBB42_32
+; CHECK-NOV-NEXT: bge a7, s1, .LBB42_32
; CHECK-NOV-NEXT: .LBB42_15: # %entry
; CHECK-NOV-NEXT: blt a7, a0, .LBB42_17
; CHECK-NOV-NEXT: .LBB42_16: # %entry
; CHECK-NOV-NEXT: lui a0, 1048568
; CHECK-NOV-NEXT: .LBB42_17: # %entry
; CHECK-NOV-NEXT: sh a0, 14(s0)
-; CHECK-NOV-NEXT: sh s2, 12(s0)
+; CHECK-NOV-NEXT: sh s1, 12(s0)
; CHECK-NOV-NEXT: sh a1, 10(s0)
; CHECK-NOV-NEXT: sh a2, 8(s0)
; CHECK-NOV-NEXT: sh a3, 6(s0)
@@ -4597,9 +4612,9 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB42_18: # %entry
; CHECK-NOV-NEXT: mv a0, a7
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
-; CHECK-NOV-NEXT: blt s2, a7, .LBB42_2
+; CHECK-NOV-NEXT: blt s1, a7, .LBB42_2
; CHECK-NOV-NEXT: .LBB42_19: # %entry
-; CHECK-NOV-NEXT: mv s2, a7
+; CHECK-NOV-NEXT: mv s1, a7
; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
; CHECK-NOV-NEXT: blt a1, a7, .LBB42_3
; CHECK-NOV-NEXT: .LBB42_20: # %entry
@@ -4642,9 +4657,9 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: blt a7, a1, .LBB42_14
; CHECK-NOV-NEXT: .LBB42_31: # %entry
; CHECK-NOV-NEXT: lui a1, 1048568
-; CHECK-NOV-NEXT: blt a7, s2, .LBB42_15
+; CHECK-NOV-NEXT: blt a7, s1, .LBB42_15
; CHECK-NOV-NEXT: .LBB42_32: # %entry
-; CHECK-NOV-NEXT: lui s2, 1048568
+; CHECK-NOV-NEXT: lui s1, 1048568
; CHECK-NOV-NEXT: bge a7, a0, .LBB42_16
; CHECK-NOV-NEXT: j .LBB42_17
;
@@ -4680,13 +4695,14 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: lhu s5, 16(a0)
; CHECK-V-NEXT: lhu s6, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
+; CHECK-V-NEXT: fmv.w.x fa0, a0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s6
+; CHECK-V-NEXT: fmv.w.x fa0, s6
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma
@@ -4695,7 +4711,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v10, 1
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s5
+; CHECK-V-NEXT: fmv.w.x fa0, s5
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma
@@ -4704,7 +4720,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s4
+; CHECK-V-NEXT: fmv.w.x fa0, s4
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma
@@ -4713,7 +4729,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 3
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s3
+; CHECK-V-NEXT: fmv.w.x fa0, s3
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma
@@ -4722,7 +4738,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 4
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: fmv.w.x fa0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma
@@ -4731,7 +4747,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 5
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s1
+; CHECK-V-NEXT: fmv.w.x fa0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma
@@ -4740,7 +4756,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 6
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s0
+; CHECK-V-NEXT: fmv.w.x fa0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
@@ -4822,31 +4838,31 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: lhu s7, 16(a1)
; CHECK-NOV-NEXT: lhu a1, 8(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: mv a0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a1
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs5, fa0
-; CHECK-NOV-NEXT: mv a0, s7
+; CHECK-NOV-NEXT: fmv.w.x fa0, s7
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs6, fa0
-; CHECK-NOV-NEXT: mv a0, s6
+; CHECK-NOV-NEXT: fmv.w.x fa0, s6
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs4, fa0
-; CHECK-NOV-NEXT: mv a0, s5
+; CHECK-NOV-NEXT: fmv.w.x fa0, s5
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs3, fa0
-; CHECK-NOV-NEXT: mv a0, s4
+; CHECK-NOV-NEXT: fmv.w.x fa0, s4
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: mv a0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fcvt.lu.s s3, fs6, rtz
-; CHECK-NOV-NEXT: fcvt.lu.s s2, fs5, rtz
-; CHECK-NOV-NEXT: sext.w s2, s2
-; CHECK-NOV-NEXT: mv a0, s1
+; CHECK-NOV-NEXT: fcvt.lu.s s2, fs6, rtz
+; CHECK-NOV-NEXT: fcvt.lu.s a0, fs5, rtz
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: sext.w s1, a0
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-NOV-NEXT: sext.w a0, a0
@@ -4857,10 +4873,10 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: mv a0, a1
; CHECK-NOV-NEXT: .LBB43_2: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a3, fs4, rtz
-; CHECK-NOV-NEXT: sext.w a2, s3
-; CHECK-NOV-NEXT: bltu s2, a1, .LBB43_4
+; CHECK-NOV-NEXT: sext.w a2, s2
+; CHECK-NOV-NEXT: bltu s1, a1, .LBB43_4
; CHECK-NOV-NEXT: # %bb.3: # %entry
-; CHECK-NOV-NEXT: mv s2, a1
+; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: .LBB43_4: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a4, fs3, rtz
; CHECK-NOV-NEXT: sext.w a3, a3
@@ -4897,7 +4913,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: sh a4, 8(s0)
; CHECK-NOV-NEXT: sh a3, 6(s0)
; CHECK-NOV-NEXT: sh a2, 4(s0)
-; CHECK-NOV-NEXT: sh s2, 2(s0)
+; CHECK-NOV-NEXT: sh s1, 2(s0)
; CHECK-NOV-NEXT: sh a0, 0(s0)
; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
@@ -4958,13 +4974,14 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: lhu s5, 16(a0)
; CHECK-V-NEXT: lhu s6, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
+; CHECK-V-NEXT: fmv.w.x fa0, a0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s6
+; CHECK-V-NEXT: fmv.w.x fa0, s6
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma
@@ -4973,7 +4990,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v10, 1
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s5
+; CHECK-V-NEXT: fmv.w.x fa0, s5
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma
@@ -4982,7 +4999,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s4
+; CHECK-V-NEXT: fmv.w.x fa0, s4
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma
@@ -4991,7 +5008,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 3
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s3
+; CHECK-V-NEXT: fmv.w.x fa0, s3
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma
@@ -5000,7 +5017,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 4
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: fmv.w.x fa0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma
@@ -5009,7 +5026,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 5
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s1
+; CHECK-V-NEXT: fmv.w.x fa0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma
@@ -5018,7 +5035,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 6
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s0
+; CHECK-V-NEXT: fmv.w.x fa0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
@@ -5097,29 +5114,29 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: lhu s7, 40(a1)
; CHECK-NOV-NEXT: lhu a1, 48(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: mv a0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a1
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs6, fa0
-; CHECK-NOV-NEXT: mv a0, s7
+; CHECK-NOV-NEXT: fmv.w.x fa0, s7
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs5, fa0
-; CHECK-NOV-NEXT: mv a0, s6
+; CHECK-NOV-NEXT: fmv.w.x fa0, s6
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs4, fa0
-; CHECK-NOV-NEXT: mv a0, s5
+; CHECK-NOV-NEXT: fmv.w.x fa0, s5
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs3, fa0
-; CHECK-NOV-NEXT: mv a0, s4
+; CHECK-NOV-NEXT: fmv.w.x fa0, s4
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: mv a0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fcvt.l.s s2, fs6, rtz
-; CHECK-NOV-NEXT: mv a0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fcvt.l.s s1, fs6, rtz
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-NOV-NEXT: lui a3, 16
@@ -5127,7 +5144,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: bge a0, a3, .LBB44_10
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
-; CHECK-NOV-NEXT: bge s2, a3, .LBB44_11
+; CHECK-NOV-NEXT: bge s1, a3, .LBB44_11
; CHECK-NOV-NEXT: .LBB44_2: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
; CHECK-NOV-NEXT: bge a1, a3, .LBB44_12
@@ -5166,9 +5183,9 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: sgtz a7, a1
; CHECK-NOV-NEXT: negw a7, a7
; CHECK-NOV-NEXT: and a1, a7, a1
-; CHECK-NOV-NEXT: sgtz a7, s2
+; CHECK-NOV-NEXT: sgtz a7, s1
; CHECK-NOV-NEXT: negw a7, a7
-; CHECK-NOV-NEXT: and a7, a7, s2
+; CHECK-NOV-NEXT: and a7, a7, s1
; CHECK-NOV-NEXT: sgtz t0, a0
; CHECK-NOV-NEXT: negw t0, t0
; CHECK-NOV-NEXT: and a0, t0, a0
@@ -5201,9 +5218,9 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB44_10: # %entry
; CHECK-NOV-NEXT: mv a0, a3
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
-; CHECK-NOV-NEXT: blt s2, a3, .LBB44_2
+; CHECK-NOV-NEXT: blt s1, a3, .LBB44_2
; CHECK-NOV-NEXT: .LBB44_11: # %entry
-; CHECK-NOV-NEXT: mv s2, a3
+; CHECK-NOV-NEXT: mv s1, a3
; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
; CHECK-NOV-NEXT: blt a1, a3, .LBB44_3
; CHECK-NOV-NEXT: .LBB44_12: # %entry
@@ -5259,13 +5276,14 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: lhu s5, 16(a0)
; CHECK-V-NEXT: lhu s6, 0(a0)
; CHECK-V-NEXT: lhu a0, 8(a0)
+; CHECK-V-NEXT: fmv.w.x fa0, a0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: addi a0, sp, 16
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s6
+; CHECK-V-NEXT: fmv.w.x fa0, s6
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma
@@ -5274,7 +5292,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v8, v10, 1
; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s5
+; CHECK-V-NEXT: fmv.w.x fa0, s5
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma
@@ -5283,7 +5301,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 2
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s4
+; CHECK-V-NEXT: fmv.w.x fa0, s4
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma
@@ -5292,7 +5310,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 3
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s3
+; CHECK-V-NEXT: fmv.w.x fa0, s3
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma
@@ -5301,7 +5319,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 4
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: fmv.w.x fa0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma
@@ -5310,7 +5328,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 5
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s1
+; CHECK-V-NEXT: fmv.w.x fa0, s1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma
@@ -5319,7 +5337,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
; CHECK-V-NEXT: vslideup.vi v10, v8, 6
; CHECK-V-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
-; CHECK-V-NEXT: mv a0, s0
+; CHECK-V-NEXT: fmv.w.x fa0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz
; CHECK-V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
@@ -6142,11 +6160,12 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset s1, -24
; CHECK-NOV-NEXT: .cfi_offset s2, -32
; CHECK-NOV-NEXT: mv s2, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a0
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: call __fixsfti at plt
; CHECK-NOV-NEXT: mv s0, a0
; CHECK-NOV-NEXT: mv s1, a1
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: call __fixsfti at plt
; CHECK-NOV-NEXT: mv a2, a0
@@ -6226,11 +6245,12 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: .cfi_offset s2, -32
; CHECK-V-NEXT: mv s2, a1
+; CHECK-V-NEXT: fmv.w.x fa0, a0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: call __fixsfti at plt
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: fmv.w.x fa0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: call __fixsfti at plt
; CHECK-V-NEXT: li a2, -1
@@ -6319,12 +6339,12 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset s1, -24
; CHECK-NOV-NEXT: .cfi_offset s2, -32
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: mv a0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a1
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: call __fixunssfti at plt
; CHECK-NOV-NEXT: mv s1, a0
; CHECK-NOV-NEXT: mv s2, a1
-; CHECK-NOV-NEXT: mv a0, s0
+; CHECK-NOV-NEXT: fmv.w.x fa0, s0
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: call __fixunssfti at plt
; CHECK-NOV-NEXT: snez a1, a1
@@ -6353,12 +6373,12 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: .cfi_offset s2, -32
; CHECK-V-NEXT: mv s0, a0
-; CHECK-V-NEXT: mv a0, a1
+; CHECK-V-NEXT: fmv.w.x fa0, a1
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: call __fixunssfti at plt
; CHECK-V-NEXT: mv s1, a0
; CHECK-V-NEXT: mv s2, a1
-; CHECK-V-NEXT: mv a0, s0
+; CHECK-V-NEXT: fmv.w.x fa0, s0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: call __fixunssfti at plt
; CHECK-V-NEXT: snez a1, a1
@@ -6398,11 +6418,12 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset s1, -24
; CHECK-NOV-NEXT: .cfi_offset s2, -32
; CHECK-NOV-NEXT: mv s2, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a0
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: call __fixsfti at plt
; CHECK-NOV-NEXT: mv s0, a0
; CHECK-NOV-NEXT: mv s1, a1
-; CHECK-NOV-NEXT: mv a0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2 at plt
; CHECK-NOV-NEXT: call __fixsfti at plt
; CHECK-NOV-NEXT: mv a2, a1
@@ -6447,11 +6468,12 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: .cfi_offset s2, -32
; CHECK-V-NEXT: mv s2, a1
+; CHECK-V-NEXT: fmv.w.x fa0, a0
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: call __fixsfti at plt
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
-; CHECK-V-NEXT: mv a0, s2
+; CHECK-V-NEXT: fmv.w.x fa0, s2
; CHECK-V-NEXT: call __extendhfsf2 at plt
; CHECK-V-NEXT: call __fixsfti at plt
; CHECK-V-NEXT: mv a2, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr63596.ll b/llvm/test/CodeGen/RISCV/rvv/pr63596.ll
index 3f5de02c1e4295..65dca0daed8c77 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr63596.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr63596.ll
@@ -13,15 +13,16 @@ define <4 x float> @foo(ptr %0) nounwind {
; CHECK-NEXT: lhu s1, 4(a0)
; CHECK-NEXT: lhu s2, 0(a0)
; CHECK-NEXT: lhu a0, 2(a0)
+; CHECK-NEXT: fmv.w.x fa0, a0
; CHECK-NEXT: call __extendhfsf2 at plt
; CHECK-NEXT: fsw fa0, 8(sp)
-; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: fmv.w.x fa0, s2
; CHECK-NEXT: call __extendhfsf2 at plt
; CHECK-NEXT: fsw fa0, 0(sp)
-; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: fmv.w.x fa0, s1
; CHECK-NEXT: call __extendhfsf2 at plt
; CHECK-NEXT: fsw fa0, 12(sp)
-; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: fmv.w.x fa0, s0
; CHECK-NEXT: call __extendhfsf2 at plt
; CHECK-NEXT: fsw fa0, 4(sp)
; CHECK-NEXT: addi a0, sp, 8
More information about the llvm-commits
mailing list