[llvm] f69078b - [RISCV] Update some tests to use floating point ABI where it makes sense.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 24 09:30:15 PST 2022


Author: Craig Topper
Date: 2022-02-24T09:27:57-08:00
New Revision: f69078b77f6354e51715499f71d8923938399fad

URL: https://github.com/llvm/llvm-project/commit/f69078b77f6354e51715499f71d8923938399fad
DIFF: https://github.com/llvm/llvm-project/commit/f69078b77f6354e51715499f71d8923938399fad.diff

LOG: [RISCV] Update some tests to use floating point ABI where it makes sense.

Trying to reduce the diffs from D118333 for cases where it makes
more sense to use an FP ABI.

Reviewed By: asb, kito-cheng

Differential Revision: https://reviews.llvm.org/D120447

Added: 
    

Modified: 
    llvm/test/CodeGen/RISCV/double-imm.ll
    llvm/test/CodeGen/RISCV/double-mem.ll
    llvm/test/CodeGen/RISCV/float-imm.ll
    llvm/test/CodeGen/RISCV/float-mem.ll
    llvm/test/CodeGen/RISCV/fpclamptosat.ll
    llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
    llvm/test/CodeGen/RISCV/frm-dependency.ll
    llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll
    llvm/test/CodeGen/RISCV/rv64d-double-convert.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll
    llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
    llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll
    llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll
    llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll
    llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll
    llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll
    llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll
    llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll
    llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
    llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/RISCV/double-imm.ll b/llvm/test/CodeGen/RISCV/double-imm.ll
index 7df02a84b9bd4..5c764d9dece2a 100644
--- a/llvm/test/CodeGen/RISCV/double-imm.ll
+++ b/llvm/test/CodeGen/RISCV/double-imm.ll
@@ -1,22 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV32IFD %s
+; RUN:   -target-abi=ilp32d | FileCheck -check-prefix=RV32IFD %s
 ; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV64IFD %s
+; RUN:   -target-abi=lp64d | FileCheck -check-prefix=RV64IFD %s
 
 define double @double_imm() nounwind {
 ; RV32IFD-LABEL: double_imm:
 ; RV32IFD:       # %bb.0:
-; RV32IFD-NEXT:    lui a0, 345155
-; RV32IFD-NEXT:    addi a0, a0, -744
-; RV32IFD-NEXT:    lui a1, 262290
-; RV32IFD-NEXT:    addi a1, a1, 507
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI0_0)
+; RV32IFD-NEXT:    fld fa0, %lo(.LCPI0_0)(a0)
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: double_imm:
 ; RV64IFD:       # %bb.0:
 ; RV64IFD-NEXT:    lui a0, %hi(.LCPI0_0)
-; RV64IFD-NEXT:    ld a0, %lo(.LCPI0_0)(a0)
+; RV64IFD-NEXT:    fld fa0, %lo(.LCPI0_0)(a0)
 ; RV64IFD-NEXT:    ret
   ret double 3.1415926535897931159979634685441851615905761718750
 }
@@ -24,26 +22,16 @@ define double @double_imm() nounwind {
 define double @double_imm_op(double %a) nounwind {
 ; RV32IFD-LABEL: double_imm_op:
 ; RV32IFD:       # %bb.0:
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    sw a0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
 ; RV32IFD-NEXT:    lui a0, %hi(.LCPI1_0)
-; RV32IFD-NEXT:    fld ft1, %lo(.LCPI1_0)(a0)
-; RV32IFD-NEXT:    fadd.d ft0, ft0, ft1
-; RV32IFD-NEXT:    fsd ft0, 8(sp)
-; RV32IFD-NEXT:    lw a0, 8(sp)
-; RV32IFD-NEXT:    lw a1, 12(sp)
-; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI1_0)(a0)
+; RV32IFD-NEXT:    fadd.d fa0, fa0, ft0
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: double_imm_op:
 ; RV64IFD:       # %bb.0:
-; RV64IFD-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV64IFD-NEXT:    fld ft0, %lo(.LCPI1_0)(a1)
-; RV64IFD-NEXT:    fmv.d.x ft1, a0
-; RV64IFD-NEXT:    fadd.d ft0, ft1, ft0
-; RV64IFD-NEXT:    fmv.x.d a0, ft0
+; RV64IFD-NEXT:    lui a0, %hi(.LCPI1_0)
+; RV64IFD-NEXT:    fld ft0, %lo(.LCPI1_0)(a0)
+; RV64IFD-NEXT:    fadd.d fa0, fa0, ft0
 ; RV64IFD-NEXT:    ret
   %1 = fadd double %a, 1.0
   ret double %1

diff  --git a/llvm/test/CodeGen/RISCV/double-mem.ll b/llvm/test/CodeGen/RISCV/double-mem.ll
index c9c445f8125bd..ba0b652ddffc7 100644
--- a/llvm/test/CodeGen/RISCV/double-mem.ll
+++ b/llvm/test/CodeGen/RISCV/double-mem.ll
@@ -1,28 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV32IFD %s
+; RUN:   -target-abi=ilp32d | FileCheck -check-prefix=RV32IFD %s
 ; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV64IFD %s
+; RUN:   -target-abi=lp64d | FileCheck -check-prefix=RV64IFD %s
 
 define dso_local double @fld(double *%a) nounwind {
 ; RV32IFD-LABEL: fld:
 ; RV32IFD:       # %bb.0:
-; RV32IFD-NEXT:    addi sp, sp, -16
 ; RV32IFD-NEXT:    fld ft0, 0(a0)
 ; RV32IFD-NEXT:    fld ft1, 24(a0)
-; RV32IFD-NEXT:    fadd.d ft0, ft0, ft1
-; RV32IFD-NEXT:    fsd ft0, 8(sp)
-; RV32IFD-NEXT:    lw a0, 8(sp)
-; RV32IFD-NEXT:    lw a1, 12(sp)
-; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    fadd.d fa0, ft0, ft1
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: fld:
 ; RV64IFD:       # %bb.0:
 ; RV64IFD-NEXT:    fld ft0, 0(a0)
 ; RV64IFD-NEXT:    fld ft1, 24(a0)
-; RV64IFD-NEXT:    fadd.d ft0, ft0, ft1
-; RV64IFD-NEXT:    fmv.x.d a0, ft0
+; RV64IFD-NEXT:    fadd.d fa0, ft0, ft1
 ; RV64IFD-NEXT:    ret
   %1 = load double, double* %a
   %2 = getelementptr double, double* %a, i32 3
@@ -36,24 +30,14 @@ define dso_local double @fld(double *%a) nounwind {
 define dso_local void @fsd(double *%a, double %b, double %c) nounwind {
 ; RV32IFD-LABEL: fsd:
 ; RV32IFD:       # %bb.0:
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    sw a3, 8(sp)
-; RV32IFD-NEXT:    sw a4, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 8(sp)
-; RV32IFD-NEXT:    sw a2, 12(sp)
-; RV32IFD-NEXT:    fld ft1, 8(sp)
-; RV32IFD-NEXT:    fadd.d ft0, ft1, ft0
+; RV32IFD-NEXT:    fadd.d ft0, fa0, fa1
 ; RV32IFD-NEXT:    fsd ft0, 0(a0)
 ; RV32IFD-NEXT:    fsd ft0, 64(a0)
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: fsd:
 ; RV64IFD:       # %bb.0:
-; RV64IFD-NEXT:    fmv.d.x ft0, a2
-; RV64IFD-NEXT:    fmv.d.x ft1, a1
-; RV64IFD-NEXT:    fadd.d ft0, ft1, ft0
+; RV64IFD-NEXT:    fadd.d ft0, fa0, fa1
 ; RV64IFD-NEXT:    fsd ft0, 0(a0)
 ; RV64IFD-NEXT:    fsd ft0, 64(a0)
 ; RV64IFD-NEXT:    ret
@@ -72,38 +56,24 @@ define dso_local void @fsd(double *%a, double %b, double %c) nounwind {
 define dso_local double @fld_fsd_global(double %a, double %b) nounwind {
 ; RV32IFD-LABEL: fld_fsd_global:
 ; RV32IFD:       # %bb.0:
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    sw a2, 8(sp)
-; RV32IFD-NEXT:    sw a3, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
-; RV32IFD-NEXT:    sw a0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 12(sp)
-; RV32IFD-NEXT:    fld ft1, 8(sp)
-; RV32IFD-NEXT:    fadd.d ft0, ft1, ft0
+; RV32IFD-NEXT:    fadd.d fa0, fa0, fa1
 ; RV32IFD-NEXT:    lui a0, %hi(G)
-; RV32IFD-NEXT:    fld ft1, %lo(G)(a0)
-; RV32IFD-NEXT:    fsd ft0, %lo(G)(a0)
+; RV32IFD-NEXT:    fld ft0, %lo(G)(a0)
+; RV32IFD-NEXT:    fsd fa0, %lo(G)(a0)
 ; RV32IFD-NEXT:    addi a0, a0, %lo(G)
-; RV32IFD-NEXT:    fld ft1, 72(a0)
-; RV32IFD-NEXT:    fsd ft0, 72(a0)
-; RV32IFD-NEXT:    fsd ft0, 8(sp)
-; RV32IFD-NEXT:    lw a0, 8(sp)
-; RV32IFD-NEXT:    lw a1, 12(sp)
-; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    fld ft0, 72(a0)
+; RV32IFD-NEXT:    fsd fa0, 72(a0)
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: fld_fsd_global:
 ; RV64IFD:       # %bb.0:
-; RV64IFD-NEXT:    fmv.d.x ft0, a1
-; RV64IFD-NEXT:    fmv.d.x ft1, a0
-; RV64IFD-NEXT:    fadd.d ft0, ft1, ft0
+; RV64IFD-NEXT:    fadd.d fa0, fa0, fa1
 ; RV64IFD-NEXT:    lui a0, %hi(G)
-; RV64IFD-NEXT:    fld ft1, %lo(G)(a0)
-; RV64IFD-NEXT:    fsd ft0, %lo(G)(a0)
-; RV64IFD-NEXT:    addi a1, a0, %lo(G)
-; RV64IFD-NEXT:    fld ft1, 72(a1)
-; RV64IFD-NEXT:    fmv.x.d a0, ft0
-; RV64IFD-NEXT:    fsd ft0, 72(a1)
+; RV64IFD-NEXT:    fld ft0, %lo(G)(a0)
+; RV64IFD-NEXT:    fsd fa0, %lo(G)(a0)
+; RV64IFD-NEXT:    addi a0, a0, %lo(G)
+; RV64IFD-NEXT:    fld ft0, 72(a0)
+; RV64IFD-NEXT:    fsd fa0, 72(a0)
 ; RV64IFD-NEXT:    ret
 ; Use %a and %b in an FP op to ensure floating point registers are used, even
 ; for the soft float ABI
@@ -120,29 +90,19 @@ define dso_local double @fld_fsd_global(double %a, double %b) nounwind {
 define dso_local double @fld_fsd_constant(double %a) nounwind {
 ; RV32IFD-LABEL: fld_fsd_constant:
 ; RV32IFD:       # %bb.0:
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    sw a0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
 ; RV32IFD-NEXT:    lui a0, 912092
-; RV32IFD-NEXT:    fld ft1, -273(a0)
-; RV32IFD-NEXT:    fadd.d ft0, ft0, ft1
-; RV32IFD-NEXT:    fsd ft0, -273(a0)
-; RV32IFD-NEXT:    fsd ft0, 8(sp)
-; RV32IFD-NEXT:    lw a0, 8(sp)
-; RV32IFD-NEXT:    lw a1, 12(sp)
-; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    fld ft0, -273(a0)
+; RV32IFD-NEXT:    fadd.d fa0, fa0, ft0
+; RV32IFD-NEXT:    fsd fa0, -273(a0)
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: fld_fsd_constant:
 ; RV64IFD:       # %bb.0:
-; RV64IFD-NEXT:    lui a1, 228023
-; RV64IFD-NEXT:    slli a1, a1, 2
-; RV64IFD-NEXT:    fld ft0, -273(a1)
-; RV64IFD-NEXT:    fmv.d.x ft1, a0
-; RV64IFD-NEXT:    fadd.d ft0, ft1, ft0
-; RV64IFD-NEXT:    fmv.x.d a0, ft0
-; RV64IFD-NEXT:    fsd ft0, -273(a1)
+; RV64IFD-NEXT:    lui a0, 228023
+; RV64IFD-NEXT:    slli a0, a0, 2
+; RV64IFD-NEXT:    fld ft0, -273(a0)
+; RV64IFD-NEXT:    fadd.d fa0, fa0, ft0
+; RV64IFD-NEXT:    fsd fa0, -273(a0)
 ; RV64IFD-NEXT:    ret
   %1 = inttoptr i32 3735928559 to double*
   %2 = load volatile double, double* %1
@@ -158,19 +118,14 @@ define dso_local double @fld_stack(double %a) nounwind {
 ; RV32IFD:       # %bb.0:
 ; RV32IFD-NEXT:    addi sp, sp, -32
 ; RV32IFD-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT:    sw a0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
-; RV32IFD-NEXT:    fsd ft0, 0(sp) # 8-byte Folded Spill
-; RV32IFD-NEXT:    addi a0, sp, 16
+; RV32IFD-NEXT:    fsd fs0, 16(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    addi a0, sp, 8
 ; RV32IFD-NEXT:    call notdead at plt
-; RV32IFD-NEXT:    fld ft0, 16(sp)
-; RV32IFD-NEXT:    fld ft1, 0(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT:    fadd.d ft0, ft0, ft1
-; RV32IFD-NEXT:    fsd ft0, 8(sp)
-; RV32IFD-NEXT:    lw a0, 8(sp)
-; RV32IFD-NEXT:    lw a1, 12(sp)
+; RV32IFD-NEXT:    fld ft0, 8(sp)
+; RV32IFD-NEXT:    fadd.d fa0, ft0, fs0
 ; RV32IFD-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 16(sp) # 8-byte Folded Reload
 ; RV32IFD-NEXT:    addi sp, sp, 32
 ; RV32IFD-NEXT:    ret
 ;
@@ -178,15 +133,14 @@ define dso_local double @fld_stack(double %a) nounwind {
 ; RV64IFD:       # %bb.0:
 ; RV64IFD-NEXT:    addi sp, sp, -32
 ; RV64IFD-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
-; RV64IFD-NEXT:    fmv.d.x ft0, a0
-; RV64IFD-NEXT:    fsd ft0, 8(sp) # 8-byte Folded Spill
-; RV64IFD-NEXT:    addi a0, sp, 16
+; RV64IFD-NEXT:    fsd fs0, 16(sp) # 8-byte Folded Spill
+; RV64IFD-NEXT:    fmv.d fs0, fa0
+; RV64IFD-NEXT:    addi a0, sp, 8
 ; RV64IFD-NEXT:    call notdead at plt
-; RV64IFD-NEXT:    fld ft0, 16(sp)
-; RV64IFD-NEXT:    fld ft1, 8(sp) # 8-byte Folded Reload
-; RV64IFD-NEXT:    fadd.d ft0, ft0, ft1
-; RV64IFD-NEXT:    fmv.x.d a0, ft0
+; RV64IFD-NEXT:    fld ft0, 8(sp)
+; RV64IFD-NEXT:    fadd.d fa0, ft0, fs0
 ; RV64IFD-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IFD-NEXT:    fld fs0, 16(sp) # 8-byte Folded Reload
 ; RV64IFD-NEXT:    addi sp, sp, 32
 ; RV64IFD-NEXT:    ret
   %1 = alloca double, align 8
@@ -200,29 +154,21 @@ define dso_local double @fld_stack(double %a) nounwind {
 define dso_local void @fsd_stack(double %a, double %b) nounwind {
 ; RV32IFD-LABEL: fsd_stack:
 ; RV32IFD:       # %bb.0:
-; RV32IFD-NEXT:    addi sp, sp, -32
-; RV32IFD-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT:    sw a2, 8(sp)
-; RV32IFD-NEXT:    sw a3, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
-; RV32IFD-NEXT:    sw a0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 12(sp)
-; RV32IFD-NEXT:    fld ft1, 8(sp)
-; RV32IFD-NEXT:    fadd.d ft0, ft1, ft0
-; RV32IFD-NEXT:    fsd ft0, 16(sp)
-; RV32IFD-NEXT:    addi a0, sp, 16
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fadd.d ft0, fa0, fa1
+; RV32IFD-NEXT:    fsd ft0, 0(sp)
+; RV32IFD-NEXT:    mv a0, sp
 ; RV32IFD-NEXT:    call notdead at plt
-; RV32IFD-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT:    addi sp, sp, 32
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: fsd_stack:
 ; RV64IFD:       # %bb.0:
 ; RV64IFD-NEXT:    addi sp, sp, -16
 ; RV64IFD-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IFD-NEXT:    fmv.d.x ft0, a1
-; RV64IFD-NEXT:    fmv.d.x ft1, a0
-; RV64IFD-NEXT:    fadd.d ft0, ft1, ft0
+; RV64IFD-NEXT:    fadd.d ft0, fa0, fa1
 ; RV64IFD-NEXT:    fsd ft0, 0(sp)
 ; RV64IFD-NEXT:    mv a0, sp
 ; RV64IFD-NEXT:    call notdead at plt
@@ -241,19 +187,13 @@ define dso_local void @fsd_stack(double %a, double %b) nounwind {
 define dso_local void @fsd_trunc(float* %a, double %b) nounwind noinline optnone {
 ; RV32IFD-LABEL: fsd_trunc:
 ; RV32IFD:       # %bb.0:
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    sw a1, 8(sp)
-; RV32IFD-NEXT:    sw a2, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
-; RV32IFD-NEXT:    fcvt.s.d ft0, ft0
+; RV32IFD-NEXT:    fcvt.s.d ft0, fa0
 ; RV32IFD-NEXT:    fsw ft0, 0(a0)
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: fsd_trunc:
 ; RV64IFD:       # %bb.0:
-; RV64IFD-NEXT:    fmv.d.x ft0, a1
-; RV64IFD-NEXT:    fcvt.s.d ft0, ft0
+; RV64IFD-NEXT:    fcvt.s.d ft0, fa0
 ; RV64IFD-NEXT:    fsw ft0, 0(a0)
 ; RV64IFD-NEXT:    ret
   %1 = fptrunc double %b to float

diff  --git a/llvm/test/CodeGen/RISCV/float-imm.ll b/llvm/test/CodeGen/RISCV/float-imm.ll
index 47e9bb7695d15..912ec57483938 100644
--- a/llvm/test/CodeGen/RISCV/float-imm.ll
+++ b/llvm/test/CodeGen/RISCV/float-imm.ll
@@ -1,22 +1,21 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV32IF %s
+; RUN:   -target-abi=ilp32f | FileCheck -check-prefix=RV32IF %s
 ; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV64IF %s
+; RUN:   -target-abi=lp64f | FileCheck -check-prefix=RV64IF %s
 
 ; TODO: constant pool shouldn't be necessary for RV64IF.
 define float @float_imm() nounwind {
 ; RV32IF-LABEL: float_imm:
 ; RV32IF:       # %bb.0:
-; RV32IF-NEXT:    lui a0, 263313
-; RV32IF-NEXT:    addi a0, a0, -37
+; RV32IF-NEXT:    lui a0, %hi(.LCPI0_0)
+; RV32IF-NEXT:    flw fa0, %lo(.LCPI0_0)(a0)
 ; RV32IF-NEXT:    ret
 ;
 ; RV64IF-LABEL: float_imm:
 ; RV64IF:       # %bb.0:
 ; RV64IF-NEXT:    lui a0, %hi(.LCPI0_0)
-; RV64IF-NEXT:    flw ft0, %lo(.LCPI0_0)(a0)
-; RV64IF-NEXT:    fmv.x.w a0, ft0
+; RV64IF-NEXT:    flw fa0, %lo(.LCPI0_0)(a0)
 ; RV64IF-NEXT:    ret
   ret float 3.14159274101257324218750
 }
@@ -24,20 +23,16 @@ define float @float_imm() nounwind {
 define float @float_imm_op(float %a) nounwind {
 ; RV32IF-LABEL: float_imm_op:
 ; RV32IF:       # %bb.0:
-; RV32IF-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV32IF-NEXT:    flw ft0, %lo(.LCPI1_0)(a1)
-; RV32IF-NEXT:    fmv.w.x ft1, a0
-; RV32IF-NEXT:    fadd.s ft0, ft1, ft0
-; RV32IF-NEXT:    fmv.x.w a0, ft0
+; RV32IF-NEXT:    lui a0, %hi(.LCPI1_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI1_0)(a0)
+; RV32IF-NEXT:    fadd.s fa0, fa0, ft0
 ; RV32IF-NEXT:    ret
 ;
 ; RV64IF-LABEL: float_imm_op:
 ; RV64IF:       # %bb.0:
-; RV64IF-NEXT:    lui a1, %hi(.LCPI1_0)
-; RV64IF-NEXT:    flw ft0, %lo(.LCPI1_0)(a1)
-; RV64IF-NEXT:    fmv.w.x ft1, a0
-; RV64IF-NEXT:    fadd.s ft0, ft1, ft0
-; RV64IF-NEXT:    fmv.x.w a0, ft0
+; RV64IF-NEXT:    lui a0, %hi(.LCPI1_0)
+; RV64IF-NEXT:    flw ft0, %lo(.LCPI1_0)(a0)
+; RV64IF-NEXT:    fadd.s fa0, fa0, ft0
 ; RV64IF-NEXT:    ret
   %1 = fadd float %a, 1.0
   ret float %1

diff  --git a/llvm/test/CodeGen/RISCV/float-mem.ll b/llvm/test/CodeGen/RISCV/float-mem.ll
index de9f49ab7d455..fe1e9bcf431b8 100644
--- a/llvm/test/CodeGen/RISCV/float-mem.ll
+++ b/llvm/test/CodeGen/RISCV/float-mem.ll
@@ -1,24 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV32IF %s
+; RUN:   -target-abi=ilp32f | FileCheck -check-prefix=RV32IF %s
 ; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV64IF %s
+; RUN:   -target-abi=lp64f | FileCheck -check-prefix=RV64IF %s
 
 define dso_local float @flw(float *%a) nounwind {
 ; RV32IF-LABEL: flw:
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    flw ft0, 0(a0)
 ; RV32IF-NEXT:    flw ft1, 12(a0)
-; RV32IF-NEXT:    fadd.s ft0, ft0, ft1
-; RV32IF-NEXT:    fmv.x.w a0, ft0
+; RV32IF-NEXT:    fadd.s fa0, ft0, ft1
 ; RV32IF-NEXT:    ret
 ;
 ; RV64IF-LABEL: flw:
 ; RV64IF:       # %bb.0:
 ; RV64IF-NEXT:    flw ft0, 0(a0)
 ; RV64IF-NEXT:    flw ft1, 12(a0)
-; RV64IF-NEXT:    fadd.s ft0, ft0, ft1
-; RV64IF-NEXT:    fmv.x.w a0, ft0
+; RV64IF-NEXT:    fadd.s fa0, ft0, ft1
 ; RV64IF-NEXT:    ret
   %1 = load float, float* %a
   %2 = getelementptr float, float* %a, i32 3
@@ -34,18 +32,14 @@ define dso_local void @fsw(float *%a, float %b, float %c) nounwind {
 ; for the soft float ABI
 ; RV32IF-LABEL: fsw:
 ; RV32IF:       # %bb.0:
-; RV32IF-NEXT:    fmv.w.x ft0, a2
-; RV32IF-NEXT:    fmv.w.x ft1, a1
-; RV32IF-NEXT:    fadd.s ft0, ft1, ft0
+; RV32IF-NEXT:    fadd.s ft0, fa0, fa1
 ; RV32IF-NEXT:    fsw ft0, 0(a0)
 ; RV32IF-NEXT:    fsw ft0, 32(a0)
 ; RV32IF-NEXT:    ret
 ;
 ; RV64IF-LABEL: fsw:
 ; RV64IF:       # %bb.0:
-; RV64IF-NEXT:    fmv.w.x ft0, a2
-; RV64IF-NEXT:    fmv.w.x ft1, a1
-; RV64IF-NEXT:    fadd.s ft0, ft1, ft0
+; RV64IF-NEXT:    fadd.s ft0, fa0, fa1
 ; RV64IF-NEXT:    fsw ft0, 0(a0)
 ; RV64IF-NEXT:    fsw ft0, 32(a0)
 ; RV64IF-NEXT:    ret
@@ -64,30 +58,24 @@ define dso_local float @flw_fsw_global(float %a, float %b) nounwind {
 ; for the soft float ABI
 ; RV32IF-LABEL: flw_fsw_global:
 ; RV32IF:       # %bb.0:
-; RV32IF-NEXT:    fmv.w.x ft0, a1
-; RV32IF-NEXT:    fmv.w.x ft1, a0
-; RV32IF-NEXT:    fadd.s ft0, ft1, ft0
+; RV32IF-NEXT:    fadd.s fa0, fa0, fa1
 ; RV32IF-NEXT:    lui a0, %hi(G)
-; RV32IF-NEXT:    flw ft1, %lo(G)(a0)
-; RV32IF-NEXT:    fsw ft0, %lo(G)(a0)
-; RV32IF-NEXT:    addi a1, a0, %lo(G)
-; RV32IF-NEXT:    flw ft1, 36(a1)
-; RV32IF-NEXT:    fmv.x.w a0, ft0
-; RV32IF-NEXT:    fsw ft0, 36(a1)
+; RV32IF-NEXT:    flw ft0, %lo(G)(a0)
+; RV32IF-NEXT:    fsw fa0, %lo(G)(a0)
+; RV32IF-NEXT:    addi a0, a0, %lo(G)
+; RV32IF-NEXT:    flw ft0, 36(a0)
+; RV32IF-NEXT:    fsw fa0, 36(a0)
 ; RV32IF-NEXT:    ret
 ;
 ; RV64IF-LABEL: flw_fsw_global:
 ; RV64IF:       # %bb.0:
-; RV64IF-NEXT:    fmv.w.x ft0, a1
-; RV64IF-NEXT:    fmv.w.x ft1, a0
-; RV64IF-NEXT:    fadd.s ft0, ft1, ft0
+; RV64IF-NEXT:    fadd.s fa0, fa0, fa1
 ; RV64IF-NEXT:    lui a0, %hi(G)
-; RV64IF-NEXT:    flw ft1, %lo(G)(a0)
-; RV64IF-NEXT:    fsw ft0, %lo(G)(a0)
-; RV64IF-NEXT:    addi a1, a0, %lo(G)
-; RV64IF-NEXT:    flw ft1, 36(a1)
-; RV64IF-NEXT:    fmv.x.w a0, ft0
-; RV64IF-NEXT:    fsw ft0, 36(a1)
+; RV64IF-NEXT:    flw ft0, %lo(G)(a0)
+; RV64IF-NEXT:    fsw fa0, %lo(G)(a0)
+; RV64IF-NEXT:    addi a0, a0, %lo(G)
+; RV64IF-NEXT:    flw ft0, 36(a0)
+; RV64IF-NEXT:    fsw fa0, 36(a0)
 ; RV64IF-NEXT:    ret
   %1 = fadd float %a, %b
   %2 = load volatile float, float* @G
@@ -102,23 +90,19 @@ define dso_local float @flw_fsw_global(float %a, float %b) nounwind {
 define dso_local float @flw_fsw_constant(float %a) nounwind {
 ; RV32IF-LABEL: flw_fsw_constant:
 ; RV32IF:       # %bb.0:
-; RV32IF-NEXT:    lui a1, 912092
-; RV32IF-NEXT:    flw ft0, -273(a1)
-; RV32IF-NEXT:    fmv.w.x ft1, a0
-; RV32IF-NEXT:    fadd.s ft0, ft1, ft0
-; RV32IF-NEXT:    fmv.x.w a0, ft0
-; RV32IF-NEXT:    fsw ft0, -273(a1)
+; RV32IF-NEXT:    lui a0, 912092
+; RV32IF-NEXT:    flw ft0, -273(a0)
+; RV32IF-NEXT:    fadd.s fa0, fa0, ft0
+; RV32IF-NEXT:    fsw fa0, -273(a0)
 ; RV32IF-NEXT:    ret
 ;
 ; RV64IF-LABEL: flw_fsw_constant:
 ; RV64IF:       # %bb.0:
-; RV64IF-NEXT:    lui a1, 228023
-; RV64IF-NEXT:    slli a1, a1, 2
-; RV64IF-NEXT:    flw ft0, -273(a1)
-; RV64IF-NEXT:    fmv.w.x ft1, a0
-; RV64IF-NEXT:    fadd.s ft0, ft1, ft0
-; RV64IF-NEXT:    fmv.x.w a0, ft0
-; RV64IF-NEXT:    fsw ft0, -273(a1)
+; RV64IF-NEXT:    lui a0, 228023
+; RV64IF-NEXT:    slli a0, a0, 2
+; RV64IF-NEXT:    flw ft0, -273(a0)
+; RV64IF-NEXT:    fadd.s fa0, fa0, ft0
+; RV64IF-NEXT:    fsw fa0, -273(a0)
 ; RV64IF-NEXT:    ret
   %1 = inttoptr i32 3735928559 to float*
   %2 = load volatile float, float* %1
@@ -134,15 +118,14 @@ define dso_local float @flw_stack(float %a) nounwind {
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
 ; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IF-NEXT:    fmv.w.x ft0, a0
-; RV32IF-NEXT:    fsw ft0, 4(sp) # 4-byte Folded Spill
-; RV32IF-NEXT:    addi a0, sp, 8
+; RV32IF-NEXT:    fsw fs0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    addi a0, sp, 4
 ; RV32IF-NEXT:    call notdead at plt
-; RV32IF-NEXT:    flw ft0, 8(sp)
-; RV32IF-NEXT:    flw ft1, 4(sp) # 4-byte Folded Reload
-; RV32IF-NEXT:    fadd.s ft0, ft0, ft1
-; RV32IF-NEXT:    fmv.x.w a0, ft0
+; RV32IF-NEXT:    flw ft0, 4(sp)
+; RV32IF-NEXT:    fadd.s fa0, ft0, fs0
 ; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
 ;
@@ -150,15 +133,14 @@ define dso_local float @flw_stack(float %a) nounwind {
 ; RV64IF:       # %bb.0:
 ; RV64IF-NEXT:    addi sp, sp, -16
 ; RV64IF-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IF-NEXT:    fmv.w.x ft0, a0
-; RV64IF-NEXT:    fsw ft0, 0(sp) # 4-byte Folded Spill
-; RV64IF-NEXT:    addi a0, sp, 4
+; RV64IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV64IF-NEXT:    fmv.s fs0, fa0
+; RV64IF-NEXT:    mv a0, sp
 ; RV64IF-NEXT:    call notdead at plt
-; RV64IF-NEXT:    flw ft0, 4(sp)
-; RV64IF-NEXT:    flw ft1, 0(sp) # 4-byte Folded Reload
-; RV64IF-NEXT:    fadd.s ft0, ft0, ft1
-; RV64IF-NEXT:    fmv.x.w a0, ft0
+; RV64IF-NEXT:    flw ft0, 0(sp)
+; RV64IF-NEXT:    fadd.s fa0, ft0, fs0
 ; RV64IF-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
 ; RV64IF-NEXT:    addi sp, sp, 16
 ; RV64IF-NEXT:    ret
   %1 = alloca float, align 4
@@ -174,9 +156,7 @@ define dso_local void @fsw_stack(float %a, float %b) nounwind {
 ; RV32IF:       # %bb.0:
 ; RV32IF-NEXT:    addi sp, sp, -16
 ; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IF-NEXT:    fmv.w.x ft0, a1
-; RV32IF-NEXT:    fmv.w.x ft1, a0
-; RV32IF-NEXT:    fadd.s ft0, ft1, ft0
+; RV32IF-NEXT:    fadd.s ft0, fa0, fa1
 ; RV32IF-NEXT:    fsw ft0, 8(sp)
 ; RV32IF-NEXT:    addi a0, sp, 8
 ; RV32IF-NEXT:    call notdead at plt
@@ -188,9 +168,7 @@ define dso_local void @fsw_stack(float %a, float %b) nounwind {
 ; RV64IF:       # %bb.0:
 ; RV64IF-NEXT:    addi sp, sp, -16
 ; RV64IF-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64IF-NEXT:    fmv.w.x ft0, a1
-; RV64IF-NEXT:    fmv.w.x ft1, a0
-; RV64IF-NEXT:    fadd.s ft0, ft1, ft0
+; RV64IF-NEXT:    fadd.s ft0, fa0, fa1
 ; RV64IF-NEXT:    fsw ft0, 4(sp)
 ; RV64IF-NEXT:    addi a0, sp, 4
 ; RV64IF-NEXT:    call notdead at plt

diff  --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index abf4101badd51..b508e7c46df5c 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -1,12 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefixes=RV32,RV32IF %s
+; RUN:   -target-abi=ilp32f | FileCheck -check-prefixes=RV32,RV32IF %s
 ; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefixes=RV64,RV64IF %s
+; RUN:   -target-abi=lp64f | FileCheck -check-prefixes=RV64,RV64IF %s
 ; RUN: llc -mtriple=riscv32 -mattr=+f,+d -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefixes=RV32,RV32IFD %s
+; RUN:   -target-abi=ilp32d | FileCheck -check-prefixes=RV32,RV32IFD %s
 ; RUN: llc -mtriple=riscv64 -mattr=+f,+d -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefixes=RV64,RV64IFD %s
+; RUN:   -target-abi=lp64d | FileCheck -check-prefixes=RV64,RV64IFD %s
 
 ; i32 saturate
 
@@ -71,23 +71,16 @@ define i32 @stest_f64i32(double %x) {
 ;
 ; RV32IFD-LABEL: stest_f64i32:
 ; RV32IFD:       # %bb.0: # %entry
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    .cfi_def_cfa_offset 16
-; RV32IFD-NEXT:    sw a0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
-; RV32IFD-NEXT:    feq.d a0, ft0, ft0
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
 ; RV32IFD-NEXT:    beqz a0, .LBB0_2
 ; RV32IFD-NEXT:  # %bb.1:
-; RV32IFD-NEXT:    fcvt.w.d a0, ft0, rtz
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rtz
 ; RV32IFD-NEXT:  .LBB0_2: # %entry
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: stest_f64i32:
 ; RV64IFD:       # %bb.0: # %entry
-; RV64IFD-NEXT:    fmv.d.x ft0, a0
-; RV64IFD-NEXT:    fcvt.l.d a0, ft0, rtz
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rtz
 ; RV64IFD-NEXT:    lui a1, 524288
 ; RV64IFD-NEXT:    addiw a2, a1, -1
 ; RV64IFD-NEXT:    bge a0, a2, .LBB0_3
@@ -154,23 +147,16 @@ define i32 @utest_f64i32(double %x) {
 ;
 ; RV32IFD-LABEL: utest_f64i32:
 ; RV32IFD:       # %bb.0: # %entry
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    .cfi_def_cfa_offset 16
-; RV32IFD-NEXT:    sw a0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
-; RV32IFD-NEXT:    feq.d a0, ft0, ft0
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
 ; RV32IFD-NEXT:    beqz a0, .LBB1_2
 ; RV32IFD-NEXT:  # %bb.1:
-; RV32IFD-NEXT:    fcvt.wu.d a0, ft0, rtz
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rtz
 ; RV32IFD-NEXT:  .LBB1_2: # %entry
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: utest_f64i32:
 ; RV64IFD:       # %bb.0: # %entry
-; RV64IFD-NEXT:    fmv.d.x ft0, a0
-; RV64IFD-NEXT:    fcvt.lu.d a0, ft0, rtz
+; RV64IFD-NEXT:    fcvt.lu.d a0, fa0, rtz
 ; RV64IFD-NEXT:    li a1, -1
 ; RV64IFD-NEXT:    srli a1, a1, 32
 ; RV64IFD-NEXT:    bltu a0, a1, .LBB1_2
@@ -245,23 +231,16 @@ define i32 @ustest_f64i32(double %x) {
 ;
 ; RV32IFD-LABEL: ustest_f64i32:
 ; RV32IFD:       # %bb.0: # %entry
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    .cfi_def_cfa_offset 16
-; RV32IFD-NEXT:    sw a0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
-; RV32IFD-NEXT:    feq.d a0, ft0, ft0
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
 ; RV32IFD-NEXT:    beqz a0, .LBB2_2
 ; RV32IFD-NEXT:  # %bb.1:
-; RV32IFD-NEXT:    fcvt.wu.d a0, ft0, rtz
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rtz
 ; RV32IFD-NEXT:  .LBB2_2: # %entry
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: ustest_f64i32:
 ; RV64IFD:       # %bb.0: # %entry
-; RV64IFD-NEXT:    fmv.d.x ft0, a0
-; RV64IFD-NEXT:    fcvt.l.d a0, ft0, rtz
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rtz
 ; RV64IFD-NEXT:    li a1, -1
 ; RV64IFD-NEXT:    srli a1, a1, 32
 ; RV64IFD-NEXT:    bge a0, a1, .LBB2_3
@@ -288,18 +267,16 @@ entry:
 define i32 @stest_f32i32(float %x) {
 ; RV32-LABEL: stest_f32i32:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    feq.s a0, ft0, ft0
+; RV32-NEXT:    feq.s a0, fa0, fa0
 ; RV32-NEXT:    beqz a0, .LBB3_2
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fcvt.w.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.w.s a0, fa0, rtz
 ; RV32-NEXT:  .LBB3_2: # %entry
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: stest_f32i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.l.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.l.s a0, fa0, rtz
 ; RV64-NEXT:    lui a1, 524288
 ; RV64-NEXT:    addiw a2, a1, -1
 ; RV64-NEXT:    bge a0, a2, .LBB3_3
@@ -326,18 +303,16 @@ entry:
 define i32 @utest_f32i32(float %x) {
 ; RV32-LABEL: utest_f32i32:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    feq.s a0, ft0, ft0
+; RV32-NEXT:    feq.s a0, fa0, fa0
 ; RV32-NEXT:    beqz a0, .LBB4_2
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fcvt.wu.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.wu.s a0, fa0, rtz
 ; RV32-NEXT:  .LBB4_2: # %entry
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: utest_f32i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.lu.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; RV64-NEXT:    li a1, -1
 ; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    bltu a0, a1, .LBB4_2
@@ -356,18 +331,16 @@ entry:
 define i32 @ustest_f32i32(float %x) {
 ; RV32-LABEL: ustest_f32i32:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    feq.s a0, ft0, ft0
+; RV32-NEXT:    feq.s a0, fa0, fa0
 ; RV32-NEXT:    beqz a0, .LBB5_2
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fcvt.wu.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.wu.s a0, fa0, rtz
 ; RV32-NEXT:  .LBB5_2: # %entry
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ustest_f32i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.l.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.l.s a0, fa0, rtz
 ; RV64-NEXT:    li a1, -1
 ; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    bge a0, a1, .LBB5_3
@@ -398,6 +371,7 @@ define i32 @stest_f16i32(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
 ; RV32-NEXT:    call __fixsfdi at plt
 ; RV32-NEXT:    lui a2, 524288
@@ -436,9 +410,9 @@ define i32 @stest_f16i32(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.l.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.l.s a0, fa0, rtz
 ; RV64-NEXT:    lui a1, 524288
 ; RV64-NEXT:    addiw a2, a1, -1
 ; RV64-NEXT:    blt a0, a2, .LBB6_2
@@ -469,6 +443,7 @@ define i32 @utesth_f16i32(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
 ; RV32-NEXT:    call __fixunssfdi at plt
 ; RV32-NEXT:    beqz a1, .LBB7_2
@@ -493,9 +468,9 @@ define i32 @utesth_f16i32(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.lu.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; RV64-NEXT:    li a1, -1
 ; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    bltu a0, a1, .LBB7_2
@@ -520,6 +495,7 @@ define i32 @ustest_f16i32(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
 ; RV32-NEXT:    call __fixsfdi at plt
 ; RV32-NEXT:    beqz a1, .LBB8_2
@@ -556,9 +532,9 @@ define i32 @ustest_f16i32(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.l.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.l.s a0, fa0, rtz
 ; RV64-NEXT:    li a1, -1
 ; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    blt a0, a1, .LBB8_2
@@ -631,12 +607,7 @@ define i16 @stest_f64i16(double %x) {
 ;
 ; RV32IFD-LABEL: stest_f64i16:
 ; RV32IFD:       # %bb.0: # %entry
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    .cfi_def_cfa_offset 16
-; RV32IFD-NEXT:    sw a0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
-; RV32IFD-NEXT:    fcvt.w.d a0, ft0, rtz
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rtz
 ; RV32IFD-NEXT:    lui a1, 8
 ; RV32IFD-NEXT:    addi a1, a1, -1
 ; RV32IFD-NEXT:    bge a0, a1, .LBB9_3
@@ -644,7 +615,6 @@ define i16 @stest_f64i16(double %x) {
 ; RV32IFD-NEXT:    lui a1, 1048568
 ; RV32IFD-NEXT:    bge a1, a0, .LBB9_4
 ; RV32IFD-NEXT:  .LBB9_2: # %entry
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ; RV32IFD-NEXT:  .LBB9_3: # %entry
 ; RV32IFD-NEXT:    mv a0, a1
@@ -652,13 +622,11 @@ define i16 @stest_f64i16(double %x) {
 ; RV32IFD-NEXT:    blt a1, a0, .LBB9_2
 ; RV32IFD-NEXT:  .LBB9_4: # %entry
 ; RV32IFD-NEXT:    lui a0, 1048568
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: stest_f64i16:
 ; RV64IFD:       # %bb.0: # %entry
-; RV64IFD-NEXT:    fmv.d.x ft0, a0
-; RV64IFD-NEXT:    fcvt.w.d a0, ft0, rtz
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rtz
 ; RV64IFD-NEXT:    lui a1, 8
 ; RV64IFD-NEXT:    addiw a1, a1, -1
 ; RV64IFD-NEXT:    bge a0, a1, .LBB9_3
@@ -721,25 +689,18 @@ define i16 @utest_f64i16(double %x) {
 ;
 ; RV32IFD-LABEL: utest_f64i16:
 ; RV32IFD:       # %bb.0: # %entry
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    .cfi_def_cfa_offset 16
-; RV32IFD-NEXT:    sw a0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
-; RV32IFD-NEXT:    fcvt.wu.d a0, ft0, rtz
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rtz
 ; RV32IFD-NEXT:    lui a1, 16
 ; RV32IFD-NEXT:    addi a1, a1, -1
 ; RV32IFD-NEXT:    bltu a0, a1, .LBB10_2
 ; RV32IFD-NEXT:  # %bb.1: # %entry
 ; RV32IFD-NEXT:    mv a0, a1
 ; RV32IFD-NEXT:  .LBB10_2: # %entry
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: utest_f64i16:
 ; RV64IFD:       # %bb.0: # %entry
-; RV64IFD-NEXT:    fmv.d.x ft0, a0
-; RV64IFD-NEXT:    fcvt.wu.d a0, ft0, rtz
+; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rtz
 ; RV64IFD-NEXT:    lui a1, 16
 ; RV64IFD-NEXT:    addiw a1, a1, -1
 ; RV64IFD-NEXT:    bltu a0, a1, .LBB10_2
@@ -800,32 +761,24 @@ define i16 @ustest_f64i16(double %x) {
 ;
 ; RV32IFD-LABEL: ustest_f64i16:
 ; RV32IFD:       # %bb.0: # %entry
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    .cfi_def_cfa_offset 16
-; RV32IFD-NEXT:    sw a0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
-; RV32IFD-NEXT:    fcvt.w.d a0, ft0, rtz
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rtz
 ; RV32IFD-NEXT:    lui a1, 16
 ; RV32IFD-NEXT:    addi a1, a1, -1
 ; RV32IFD-NEXT:    bge a0, a1, .LBB11_3
 ; RV32IFD-NEXT:  # %bb.1: # %entry
 ; RV32IFD-NEXT:    blez a0, .LBB11_4
 ; RV32IFD-NEXT:  .LBB11_2: # %entry
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ; RV32IFD-NEXT:  .LBB11_3: # %entry
 ; RV32IFD-NEXT:    mv a0, a1
 ; RV32IFD-NEXT:    bgtz a0, .LBB11_2
 ; RV32IFD-NEXT:  .LBB11_4: # %entry
 ; RV32IFD-NEXT:    li a0, 0
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: ustest_f64i16:
 ; RV64IFD:       # %bb.0: # %entry
-; RV64IFD-NEXT:    fmv.d.x ft0, a0
-; RV64IFD-NEXT:    fcvt.w.d a0, ft0, rtz
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rtz
 ; RV64IFD-NEXT:    lui a1, 16
 ; RV64IFD-NEXT:    addiw a1, a1, -1
 ; RV64IFD-NEXT:    bge a0, a1, .LBB11_3
@@ -852,8 +805,7 @@ entry:
 define i16 @stest_f32i16(float %x) {
 ; RV32-LABEL: stest_f32i16:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    fcvt.w.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.w.s a0, fa0, rtz
 ; RV32-NEXT:    lui a1, 8
 ; RV32-NEXT:    addi a1, a1, -1
 ; RV32-NEXT:    bge a0, a1, .LBB12_3
@@ -872,8 +824,7 @@ define i16 @stest_f32i16(float %x) {
 ;
 ; RV64-LABEL: stest_f32i16:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.w.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.w.s a0, fa0, rtz
 ; RV64-NEXT:    lui a1, 8
 ; RV64-NEXT:    addiw a1, a1, -1
 ; RV64-NEXT:    bge a0, a1, .LBB12_3
@@ -902,8 +853,7 @@ entry:
 define i16 @utest_f32i16(float %x) {
 ; RV32-LABEL: utest_f32i16:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    fcvt.wu.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.wu.s a0, fa0, rtz
 ; RV32-NEXT:    lui a1, 16
 ; RV32-NEXT:    addi a1, a1, -1
 ; RV32-NEXT:    bltu a0, a1, .LBB13_2
@@ -914,8 +864,7 @@ define i16 @utest_f32i16(float %x) {
 ;
 ; RV64-LABEL: utest_f32i16:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.wu.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.wu.s a0, fa0, rtz
 ; RV64-NEXT:    lui a1, 16
 ; RV64-NEXT:    addiw a1, a1, -1
 ; RV64-NEXT:    bltu a0, a1, .LBB13_2
@@ -934,8 +883,7 @@ entry:
 define i16 @ustest_f32i16(float %x) {
 ; RV32-LABEL: ustest_f32i16:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    fcvt.w.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.w.s a0, fa0, rtz
 ; RV32-NEXT:    lui a1, 16
 ; RV32-NEXT:    addi a1, a1, -1
 ; RV32-NEXT:    bge a0, a1, .LBB14_3
@@ -952,8 +900,7 @@ define i16 @ustest_f32i16(float %x) {
 ;
 ; RV64-LABEL: ustest_f32i16:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.w.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.w.s a0, fa0, rtz
 ; RV64-NEXT:    lui a1, 16
 ; RV64-NEXT:    addiw a1, a1, -1
 ; RV64-NEXT:    bge a0, a1, .LBB14_3
@@ -984,9 +931,9 @@ define i16 @stest_f16i16(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    fcvt.w.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.w.s a0, fa0, rtz
 ; RV32-NEXT:    lui a1, 8
 ; RV32-NEXT:    addi a1, a1, -1
 ; RV32-NEXT:    blt a0, a1, .LBB15_2
@@ -1008,9 +955,9 @@ define i16 @stest_f16i16(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.l.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.l.s a0, fa0, rtz
 ; RV64-NEXT:    lui a1, 8
 ; RV64-NEXT:    addiw a1, a1, -1
 ; RV64-NEXT:    blt a0, a1, .LBB15_2
@@ -1042,9 +989,9 @@ define i16 @utesth_f16i16(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    fcvt.wu.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.wu.s a0, fa0, rtz
 ; RV32-NEXT:    lui a1, 16
 ; RV32-NEXT:    addi a1, a1, -1
 ; RV32-NEXT:    bltu a0, a1, .LBB16_2
@@ -1061,9 +1008,9 @@ define i16 @utesth_f16i16(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.lu.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; RV64-NEXT:    lui a1, 16
 ; RV64-NEXT:    addiw a1, a1, -1
 ; RV64-NEXT:    bltu a0, a1, .LBB16_2
@@ -1088,9 +1035,9 @@ define i16 @ustest_f16i16(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    fcvt.w.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.w.s a0, fa0, rtz
 ; RV32-NEXT:    lui a1, 16
 ; RV32-NEXT:    addi a1, a1, -1
 ; RV32-NEXT:    blt a0, a1, .LBB17_2
@@ -1111,9 +1058,9 @@ define i16 @ustest_f16i16(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.l.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.l.s a0, fa0, rtz
 ; RV64-NEXT:    lui a1, 16
 ; RV64-NEXT:    addiw a1, a1, -1
 ; RV64-NEXT:    blt a0, a1, .LBB17_2
@@ -1140,66 +1087,66 @@ entry:
 ; i64 saturate
 
 define i64 @stest_f64i64(double %x) {
-; RV32-LABEL: stest_f64i64:
-; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    call __fixdfti at plt
-; RV32-NEXT:    lw a2, 20(sp)
-; RV32-NEXT:    lw a3, 16(sp)
-; RV32-NEXT:    lw a1, 12(sp)
-; RV32-NEXT:    lw a0, 8(sp)
-; RV32-NEXT:    lui a4, 524288
-; RV32-NEXT:    addi a5, a4, -1
-; RV32-NEXT:    beq a1, a5, .LBB18_2
-; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    sltu a7, a1, a5
-; RV32-NEXT:    or a6, a3, a2
-; RV32-NEXT:    bnez a6, .LBB18_3
-; RV32-NEXT:    j .LBB18_4
-; RV32-NEXT:  .LBB18_2:
-; RV32-NEXT:    addi a6, a0, 1
-; RV32-NEXT:    snez a7, a6
-; RV32-NEXT:    or a6, a3, a2
-; RV32-NEXT:    beqz a6, .LBB18_4
-; RV32-NEXT:  .LBB18_3: # %entry
-; RV32-NEXT:    slti a7, a2, 0
-; RV32-NEXT:  .LBB18_4: # %entry
-; RV32-NEXT:    li a6, -1
-; RV32-NEXT:    beqz a7, .LBB18_7
-; RV32-NEXT:  # %bb.5: # %entry
-; RV32-NEXT:    beq a1, a4, .LBB18_8
-; RV32-NEXT:  .LBB18_6: # %entry
-; RV32-NEXT:    sltu a4, a4, a1
-; RV32-NEXT:    and a3, a3, a2
-; RV32-NEXT:    bne a3, a6, .LBB18_9
-; RV32-NEXT:    j .LBB18_10
-; RV32-NEXT:  .LBB18_7: # %entry
-; RV32-NEXT:    li a2, 0
-; RV32-NEXT:    li a3, 0
-; RV32-NEXT:    li a0, -1
-; RV32-NEXT:    mv a1, a5
-; RV32-NEXT:    bne a1, a4, .LBB18_6
-; RV32-NEXT:  .LBB18_8:
-; RV32-NEXT:    snez a4, a0
-; RV32-NEXT:    and a3, a3, a2
-; RV32-NEXT:    beq a3, a6, .LBB18_10
-; RV32-NEXT:  .LBB18_9: # %entry
-; RV32-NEXT:    slt a4, a6, a2
-; RV32-NEXT:  .LBB18_10: # %entry
-; RV32-NEXT:    bnez a4, .LBB18_12
-; RV32-NEXT:  # %bb.11: # %entry
-; RV32-NEXT:    li a0, 0
-; RV32-NEXT:    lui a1, 524288
-; RV32-NEXT:  .LBB18_12: # %entry
-; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 32
-; RV32-NEXT:    ret
+; RV32IF-LABEL: stest_f64i64:
+; RV32IF:       # %bb.0: # %entry
+; RV32IF-NEXT:    addi sp, sp, -32
+; RV32IF-NEXT:    .cfi_def_cfa_offset 32
+; RV32IF-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    .cfi_offset ra, -4
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:    mv a1, a0
+; RV32IF-NEXT:    addi a0, sp, 8
+; RV32IF-NEXT:    call __fixdfti at plt
+; RV32IF-NEXT:    lw a2, 20(sp)
+; RV32IF-NEXT:    lw a3, 16(sp)
+; RV32IF-NEXT:    lw a1, 12(sp)
+; RV32IF-NEXT:    lw a0, 8(sp)
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    addi a5, a4, -1
+; RV32IF-NEXT:    beq a1, a5, .LBB18_2
+; RV32IF-NEXT:  # %bb.1: # %entry
+; RV32IF-NEXT:    sltu a7, a1, a5
+; RV32IF-NEXT:    or a6, a3, a2
+; RV32IF-NEXT:    bnez a6, .LBB18_3
+; RV32IF-NEXT:    j .LBB18_4
+; RV32IF-NEXT:  .LBB18_2:
+; RV32IF-NEXT:    addi a6, a0, 1
+; RV32IF-NEXT:    snez a7, a6
+; RV32IF-NEXT:    or a6, a3, a2
+; RV32IF-NEXT:    beqz a6, .LBB18_4
+; RV32IF-NEXT:  .LBB18_3: # %entry
+; RV32IF-NEXT:    slti a7, a2, 0
+; RV32IF-NEXT:  .LBB18_4: # %entry
+; RV32IF-NEXT:    li a6, -1
+; RV32IF-NEXT:    beqz a7, .LBB18_7
+; RV32IF-NEXT:  # %bb.5: # %entry
+; RV32IF-NEXT:    beq a1, a4, .LBB18_8
+; RV32IF-NEXT:  .LBB18_6: # %entry
+; RV32IF-NEXT:    sltu a4, a4, a1
+; RV32IF-NEXT:    and a3, a3, a2
+; RV32IF-NEXT:    bne a3, a6, .LBB18_9
+; RV32IF-NEXT:    j .LBB18_10
+; RV32IF-NEXT:  .LBB18_7: # %entry
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    mv a1, a5
+; RV32IF-NEXT:    bne a1, a4, .LBB18_6
+; RV32IF-NEXT:  .LBB18_8:
+; RV32IF-NEXT:    snez a4, a0
+; RV32IF-NEXT:    and a3, a3, a2
+; RV32IF-NEXT:    beq a3, a6, .LBB18_10
+; RV32IF-NEXT:  .LBB18_9: # %entry
+; RV32IF-NEXT:    slt a4, a6, a2
+; RV32IF-NEXT:  .LBB18_10: # %entry
+; RV32IF-NEXT:    bnez a4, .LBB18_12
+; RV32IF-NEXT:  # %bb.11: # %entry
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:  .LBB18_12: # %entry
+; RV32IF-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 32
+; RV32IF-NEXT:    ret
 ;
 ; RV64IF-LABEL: stest_f64i64:
 ; RV64IF:       # %bb.0: # %entry
@@ -1238,13 +1185,71 @@ define i64 @stest_f64i64(double %x) {
 ; RV64IF-NEXT:    addi sp, sp, 16
 ; RV64IF-NEXT:    ret
 ;
+; RV32IFD-LABEL: stest_f64i64:
+; RV32IFD:       # %bb.0: # %entry
+; RV32IFD-NEXT:    addi sp, sp, -32
+; RV32IFD-NEXT:    .cfi_def_cfa_offset 32
+; RV32IFD-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    .cfi_offset ra, -4
+; RV32IFD-NEXT:    addi a0, sp, 8
+; RV32IFD-NEXT:    call __fixdfti at plt
+; RV32IFD-NEXT:    lw a2, 20(sp)
+; RV32IFD-NEXT:    lw a3, 16(sp)
+; RV32IFD-NEXT:    lw a1, 12(sp)
+; RV32IFD-NEXT:    lw a0, 8(sp)
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    addi a5, a4, -1
+; RV32IFD-NEXT:    beq a1, a5, .LBB18_2
+; RV32IFD-NEXT:  # %bb.1: # %entry
+; RV32IFD-NEXT:    sltu a7, a1, a5
+; RV32IFD-NEXT:    or a6, a3, a2
+; RV32IFD-NEXT:    bnez a6, .LBB18_3
+; RV32IFD-NEXT:    j .LBB18_4
+; RV32IFD-NEXT:  .LBB18_2:
+; RV32IFD-NEXT:    addi a6, a0, 1
+; RV32IFD-NEXT:    snez a7, a6
+; RV32IFD-NEXT:    or a6, a3, a2
+; RV32IFD-NEXT:    beqz a6, .LBB18_4
+; RV32IFD-NEXT:  .LBB18_3: # %entry
+; RV32IFD-NEXT:    slti a7, a2, 0
+; RV32IFD-NEXT:  .LBB18_4: # %entry
+; RV32IFD-NEXT:    li a6, -1
+; RV32IFD-NEXT:    beqz a7, .LBB18_7
+; RV32IFD-NEXT:  # %bb.5: # %entry
+; RV32IFD-NEXT:    beq a1, a4, .LBB18_8
+; RV32IFD-NEXT:  .LBB18_6: # %entry
+; RV32IFD-NEXT:    sltu a4, a4, a1
+; RV32IFD-NEXT:    and a3, a3, a2
+; RV32IFD-NEXT:    bne a3, a6, .LBB18_9
+; RV32IFD-NEXT:    j .LBB18_10
+; RV32IFD-NEXT:  .LBB18_7: # %entry
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    mv a1, a5
+; RV32IFD-NEXT:    bne a1, a4, .LBB18_6
+; RV32IFD-NEXT:  .LBB18_8:
+; RV32IFD-NEXT:    snez a4, a0
+; RV32IFD-NEXT:    and a3, a3, a2
+; RV32IFD-NEXT:    beq a3, a6, .LBB18_10
+; RV32IFD-NEXT:  .LBB18_9: # %entry
+; RV32IFD-NEXT:    slt a4, a6, a2
+; RV32IFD-NEXT:  .LBB18_10: # %entry
+; RV32IFD-NEXT:    bnez a4, .LBB18_12
+; RV32IFD-NEXT:  # %bb.11: # %entry
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:  .LBB18_12: # %entry
+; RV32IFD-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 32
+; RV32IFD-NEXT:    ret
+;
 ; RV64IFD-LABEL: stest_f64i64:
 ; RV64IFD:       # %bb.0: # %entry
-; RV64IFD-NEXT:    fmv.d.x ft0, a0
-; RV64IFD-NEXT:    feq.d a0, ft0, ft0
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
 ; RV64IFD-NEXT:    beqz a0, .LBB18_2
 ; RV64IFD-NEXT:  # %bb.1:
-; RV64IFD-NEXT:    fcvt.l.d a0, ft0, rtz
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rtz
 ; RV64IFD-NEXT:  .LBB18_2: # %entry
 ; RV64IFD-NEXT:    ret
 entry:
@@ -1258,43 +1263,43 @@ entry:
 }
 
 define i64 @utest_f64i64(double %x) {
-; RV32-LABEL: utest_f64i64:
-; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    call __fixunsdfti at plt
-; RV32-NEXT:    lw a0, 20(sp)
-; RV32-NEXT:    lw a1, 16(sp)
-; RV32-NEXT:    beqz a0, .LBB19_2
-; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    li a2, 0
-; RV32-NEXT:    j .LBB19_3
-; RV32-NEXT:  .LBB19_2:
-; RV32-NEXT:    seqz a2, a1
-; RV32-NEXT:  .LBB19_3: # %entry
-; RV32-NEXT:    xori a1, a1, 1
-; RV32-NEXT:    or a1, a1, a0
-; RV32-NEXT:    li a0, 0
-; RV32-NEXT:    beqz a1, .LBB19_5
-; RV32-NEXT:  # %bb.4: # %entry
-; RV32-NEXT:    mv a0, a2
-; RV32-NEXT:  .LBB19_5: # %entry
-; RV32-NEXT:    bnez a0, .LBB19_7
-; RV32-NEXT:  # %bb.6: # %entry
-; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    j .LBB19_8
-; RV32-NEXT:  .LBB19_7:
-; RV32-NEXT:    lw a1, 12(sp)
-; RV32-NEXT:    lw a0, 8(sp)
-; RV32-NEXT:  .LBB19_8: # %entry
-; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 32
-; RV32-NEXT:    ret
+; RV32IF-LABEL: utest_f64i64:
+; RV32IF:       # %bb.0: # %entry
+; RV32IF-NEXT:    addi sp, sp, -32
+; RV32IF-NEXT:    .cfi_def_cfa_offset 32
+; RV32IF-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    .cfi_offset ra, -4
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:    mv a1, a0
+; RV32IF-NEXT:    addi a0, sp, 8
+; RV32IF-NEXT:    call __fixunsdfti at plt
+; RV32IF-NEXT:    lw a0, 20(sp)
+; RV32IF-NEXT:    lw a1, 16(sp)
+; RV32IF-NEXT:    beqz a0, .LBB19_2
+; RV32IF-NEXT:  # %bb.1: # %entry
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:    j .LBB19_3
+; RV32IF-NEXT:  .LBB19_2:
+; RV32IF-NEXT:    seqz a2, a1
+; RV32IF-NEXT:  .LBB19_3: # %entry
+; RV32IF-NEXT:    xori a1, a1, 1
+; RV32IF-NEXT:    or a1, a1, a0
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    beqz a1, .LBB19_5
+; RV32IF-NEXT:  # %bb.4: # %entry
+; RV32IF-NEXT:    mv a0, a2
+; RV32IF-NEXT:  .LBB19_5: # %entry
+; RV32IF-NEXT:    bnez a0, .LBB19_7
+; RV32IF-NEXT:  # %bb.6: # %entry
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    j .LBB19_8
+; RV32IF-NEXT:  .LBB19_7:
+; RV32IF-NEXT:    lw a1, 12(sp)
+; RV32IF-NEXT:    lw a0, 8(sp)
+; RV32IF-NEXT:  .LBB19_8: # %entry
+; RV32IF-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 32
+; RV32IF-NEXT:    ret
 ;
 ; RV64-LABEL: utest_f64i64:
 ; RV64:       # %bb.0: # %entry
@@ -1310,6 +1315,42 @@ define i64 @utest_f64i64(double %x) {
 ; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
+;
+; RV32IFD-LABEL: utest_f64i64:
+; RV32IFD:       # %bb.0: # %entry
+; RV32IFD-NEXT:    addi sp, sp, -32
+; RV32IFD-NEXT:    .cfi_def_cfa_offset 32
+; RV32IFD-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    .cfi_offset ra, -4
+; RV32IFD-NEXT:    addi a0, sp, 8
+; RV32IFD-NEXT:    call __fixunsdfti at plt
+; RV32IFD-NEXT:    lw a0, 20(sp)
+; RV32IFD-NEXT:    lw a1, 16(sp)
+; RV32IFD-NEXT:    beqz a0, .LBB19_2
+; RV32IFD-NEXT:  # %bb.1: # %entry
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:    j .LBB19_3
+; RV32IFD-NEXT:  .LBB19_2:
+; RV32IFD-NEXT:    seqz a2, a1
+; RV32IFD-NEXT:  .LBB19_3: # %entry
+; RV32IFD-NEXT:    xori a1, a1, 1
+; RV32IFD-NEXT:    or a1, a1, a0
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    beqz a1, .LBB19_5
+; RV32IFD-NEXT:  # %bb.4: # %entry
+; RV32IFD-NEXT:    mv a0, a2
+; RV32IFD-NEXT:  .LBB19_5: # %entry
+; RV32IFD-NEXT:    bnez a0, .LBB19_7
+; RV32IFD-NEXT:  # %bb.6: # %entry
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    j .LBB19_8
+; RV32IFD-NEXT:  .LBB19_7:
+; RV32IFD-NEXT:    lw a1, 12(sp)
+; RV32IFD-NEXT:    lw a0, 8(sp)
+; RV32IFD-NEXT:  .LBB19_8: # %entry
+; RV32IFD-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 32
+; RV32IFD-NEXT:    ret
 entry:
   %conv = fptoui double %x to i128
   %0 = icmp ult i128 %conv, 18446744073709551616
@@ -1319,68 +1360,68 @@ entry:
 }
 
 define i64 @ustest_f64i64(double %x) {
-; RV32-LABEL: ustest_f64i64:
-; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    call __fixdfti at plt
-; RV32-NEXT:    lw a2, 20(sp)
-; RV32-NEXT:    lw a3, 16(sp)
-; RV32-NEXT:    beqz a2, .LBB20_2
-; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    slti a0, a2, 0
-; RV32-NEXT:    j .LBB20_3
-; RV32-NEXT:  .LBB20_2:
-; RV32-NEXT:    seqz a0, a3
-; RV32-NEXT:  .LBB20_3: # %entry
-; RV32-NEXT:    xori a1, a3, 1
-; RV32-NEXT:    or a4, a1, a2
-; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    beqz a4, .LBB20_5
-; RV32-NEXT:  # %bb.4: # %entry
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:  .LBB20_5: # %entry
-; RV32-NEXT:    bnez a1, .LBB20_9
-; RV32-NEXT:  # %bb.6: # %entry
-; RV32-NEXT:    li a0, 0
-; RV32-NEXT:    li a2, 0
-; RV32-NEXT:    li a3, 1
-; RV32-NEXT:    bnez a2, .LBB20_10
-; RV32-NEXT:  .LBB20_7:
-; RV32-NEXT:    snez a4, a3
-; RV32-NEXT:    bnez a1, .LBB20_11
-; RV32-NEXT:  .LBB20_8:
-; RV32-NEXT:    snez a5, a0
-; RV32-NEXT:    or a2, a3, a2
-; RV32-NEXT:    bnez a2, .LBB20_12
-; RV32-NEXT:    j .LBB20_13
-; RV32-NEXT:  .LBB20_9:
-; RV32-NEXT:    lw a1, 12(sp)
-; RV32-NEXT:    lw a0, 8(sp)
-; RV32-NEXT:    beqz a2, .LBB20_7
-; RV32-NEXT:  .LBB20_10: # %entry
-; RV32-NEXT:    sgtz a4, a2
-; RV32-NEXT:    beqz a1, .LBB20_8
-; RV32-NEXT:  .LBB20_11: # %entry
-; RV32-NEXT:    snez a5, a1
-; RV32-NEXT:    or a2, a3, a2
-; RV32-NEXT:    beqz a2, .LBB20_13
-; RV32-NEXT:  .LBB20_12: # %entry
-; RV32-NEXT:    mv a5, a4
-; RV32-NEXT:  .LBB20_13: # %entry
-; RV32-NEXT:    bnez a5, .LBB20_15
-; RV32-NEXT:  # %bb.14: # %entry
-; RV32-NEXT:    li a0, 0
-; RV32-NEXT:    li a1, 0
-; RV32-NEXT:  .LBB20_15: # %entry
-; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 32
-; RV32-NEXT:    ret
+; RV32IF-LABEL: ustest_f64i64:
+; RV32IF:       # %bb.0: # %entry
+; RV32IF-NEXT:    addi sp, sp, -32
+; RV32IF-NEXT:    .cfi_def_cfa_offset 32
+; RV32IF-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    .cfi_offset ra, -4
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:    mv a1, a0
+; RV32IF-NEXT:    addi a0, sp, 8
+; RV32IF-NEXT:    call __fixdfti at plt
+; RV32IF-NEXT:    lw a2, 20(sp)
+; RV32IF-NEXT:    lw a3, 16(sp)
+; RV32IF-NEXT:    beqz a2, .LBB20_2
+; RV32IF-NEXT:  # %bb.1: # %entry
+; RV32IF-NEXT:    slti a0, a2, 0
+; RV32IF-NEXT:    j .LBB20_3
+; RV32IF-NEXT:  .LBB20_2:
+; RV32IF-NEXT:    seqz a0, a3
+; RV32IF-NEXT:  .LBB20_3: # %entry
+; RV32IF-NEXT:    xori a1, a3, 1
+; RV32IF-NEXT:    or a4, a1, a2
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a4, .LBB20_5
+; RV32IF-NEXT:  # %bb.4: # %entry
+; RV32IF-NEXT:    mv a1, a0
+; RV32IF-NEXT:  .LBB20_5: # %entry
+; RV32IF-NEXT:    bnez a1, .LBB20_9
+; RV32IF-NEXT:  # %bb.6: # %entry
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:    li a3, 1
+; RV32IF-NEXT:    bnez a2, .LBB20_10
+; RV32IF-NEXT:  .LBB20_7:
+; RV32IF-NEXT:    snez a4, a3
+; RV32IF-NEXT:    bnez a1, .LBB20_11
+; RV32IF-NEXT:  .LBB20_8:
+; RV32IF-NEXT:    snez a5, a0
+; RV32IF-NEXT:    or a2, a3, a2
+; RV32IF-NEXT:    bnez a2, .LBB20_12
+; RV32IF-NEXT:    j .LBB20_13
+; RV32IF-NEXT:  .LBB20_9:
+; RV32IF-NEXT:    lw a1, 12(sp)
+; RV32IF-NEXT:    lw a0, 8(sp)
+; RV32IF-NEXT:    beqz a2, .LBB20_7
+; RV32IF-NEXT:  .LBB20_10: # %entry
+; RV32IF-NEXT:    sgtz a4, a2
+; RV32IF-NEXT:    beqz a1, .LBB20_8
+; RV32IF-NEXT:  .LBB20_11: # %entry
+; RV32IF-NEXT:    snez a5, a1
+; RV32IF-NEXT:    or a2, a3, a2
+; RV32IF-NEXT:    beqz a2, .LBB20_13
+; RV32IF-NEXT:  .LBB20_12: # %entry
+; RV32IF-NEXT:    mv a5, a4
+; RV32IF-NEXT:  .LBB20_13: # %entry
+; RV32IF-NEXT:    bnez a5, .LBB20_15
+; RV32IF-NEXT:  # %bb.14: # %entry
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:  .LBB20_15: # %entry
+; RV32IF-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 32
+; RV32IF-NEXT:    ret
 ;
 ; RV64-LABEL: ustest_f64i64:
 ; RV64:       # %bb.0: # %entry
@@ -1408,6 +1449,67 @@ define i64 @ustest_f64i64(double %x) {
 ; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
+;
+; RV32IFD-LABEL: ustest_f64i64:
+; RV32IFD:       # %bb.0: # %entry
+; RV32IFD-NEXT:    addi sp, sp, -32
+; RV32IFD-NEXT:    .cfi_def_cfa_offset 32
+; RV32IFD-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    .cfi_offset ra, -4
+; RV32IFD-NEXT:    addi a0, sp, 8
+; RV32IFD-NEXT:    call __fixdfti at plt
+; RV32IFD-NEXT:    lw a2, 20(sp)
+; RV32IFD-NEXT:    lw a3, 16(sp)
+; RV32IFD-NEXT:    beqz a2, .LBB20_2
+; RV32IFD-NEXT:  # %bb.1: # %entry
+; RV32IFD-NEXT:    slti a0, a2, 0
+; RV32IFD-NEXT:    j .LBB20_3
+; RV32IFD-NEXT:  .LBB20_2:
+; RV32IFD-NEXT:    seqz a0, a3
+; RV32IFD-NEXT:  .LBB20_3: # %entry
+; RV32IFD-NEXT:    xori a1, a3, 1
+; RV32IFD-NEXT:    or a4, a1, a2
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a4, .LBB20_5
+; RV32IFD-NEXT:  # %bb.4: # %entry
+; RV32IFD-NEXT:    mv a1, a0
+; RV32IFD-NEXT:  .LBB20_5: # %entry
+; RV32IFD-NEXT:    bnez a1, .LBB20_9
+; RV32IFD-NEXT:  # %bb.6: # %entry
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:    li a3, 1
+; RV32IFD-NEXT:    bnez a2, .LBB20_10
+; RV32IFD-NEXT:  .LBB20_7:
+; RV32IFD-NEXT:    snez a4, a3
+; RV32IFD-NEXT:    bnez a1, .LBB20_11
+; RV32IFD-NEXT:  .LBB20_8:
+; RV32IFD-NEXT:    snez a5, a0
+; RV32IFD-NEXT:    or a2, a3, a2
+; RV32IFD-NEXT:    bnez a2, .LBB20_12
+; RV32IFD-NEXT:    j .LBB20_13
+; RV32IFD-NEXT:  .LBB20_9:
+; RV32IFD-NEXT:    lw a1, 12(sp)
+; RV32IFD-NEXT:    lw a0, 8(sp)
+; RV32IFD-NEXT:    beqz a2, .LBB20_7
+; RV32IFD-NEXT:  .LBB20_10: # %entry
+; RV32IFD-NEXT:    sgtz a4, a2
+; RV32IFD-NEXT:    beqz a1, .LBB20_8
+; RV32IFD-NEXT:  .LBB20_11: # %entry
+; RV32IFD-NEXT:    snez a5, a1
+; RV32IFD-NEXT:    or a2, a3, a2
+; RV32IFD-NEXT:    beqz a2, .LBB20_13
+; RV32IFD-NEXT:  .LBB20_12: # %entry
+; RV32IFD-NEXT:    mv a5, a4
+; RV32IFD-NEXT:  .LBB20_13: # %entry
+; RV32IFD-NEXT:    bnez a5, .LBB20_15
+; RV32IFD-NEXT:  # %bb.14: # %entry
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:  .LBB20_15: # %entry
+; RV32IFD-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 32
+; RV32IFD-NEXT:    ret
 entry:
   %conv = fptosi double %x to i128
   %0 = icmp slt i128 %conv, 18446744073709551616
@@ -1425,7 +1527,6 @@ define i64 @stest_f32i64(float %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    call __fixsfti at plt
 ; RV32-NEXT:    lw a2, 20(sp)
@@ -1481,11 +1582,10 @@ define i64 @stest_f32i64(float %x) {
 ;
 ; RV64-LABEL: stest_f32i64:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    feq.s a0, ft0, ft0
+; RV64-NEXT:    feq.s a0, fa0, fa0
 ; RV64-NEXT:    beqz a0, .LBB21_2
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    fcvt.l.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.l.s a0, fa0, rtz
 ; RV64-NEXT:  .LBB21_2: # %entry
 ; RV64-NEXT:    ret
 entry:
@@ -1505,7 +1605,6 @@ define i64 @utest_f32i64(float %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    call __fixunssfti at plt
 ; RV32-NEXT:    lw a0, 20(sp)
@@ -1565,7 +1664,6 @@ define i64 @ustest_f32i64(float %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    call __fixsfti at plt
 ; RV32-NEXT:    lw a2, 20(sp)
@@ -1664,8 +1762,8 @@ define i64 @stest_f16i64(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
-; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    call __fixsfti at plt
 ; RV32-NEXT:    lw a2, 20(sp)
@@ -1725,6 +1823,7 @@ define i64 @stest_f16i64(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
 ; RV64-NEXT:    call __fixsfti at plt
 ; RV64-NEXT:    li a2, -1
@@ -1773,8 +1872,8 @@ define i64 @utesth_f16i64(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
-; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    call __fixunssfti at plt
 ; RV32-NEXT:    lw a0, 20(sp)
@@ -1811,6 +1910,7 @@ define i64 @utesth_f16i64(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
 ; RV64-NEXT:    call __fixunssfti at plt
 ; RV64-NEXT:    beqz a1, .LBB25_2
@@ -1835,8 +1935,8 @@ define i64 @ustest_f16i64(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
-; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    call __fixsfti at plt
 ; RV32-NEXT:    lw a2, 20(sp)
@@ -1898,6 +1998,7 @@ define i64 @ustest_f16i64(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
 ; RV64-NEXT:    call __fixsfti at plt
 ; RV64-NEXT:    blez a1, .LBB26_2
@@ -2011,23 +2112,16 @@ define i32 @stest_f64i32_mm(double %x) {
 ;
 ; RV32IFD-LABEL: stest_f64i32_mm:
 ; RV32IFD:       # %bb.0: # %entry
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    .cfi_def_cfa_offset 16
-; RV32IFD-NEXT:    sw a0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
-; RV32IFD-NEXT:    feq.d a0, ft0, ft0
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
 ; RV32IFD-NEXT:    beqz a0, .LBB27_2
 ; RV32IFD-NEXT:  # %bb.1:
-; RV32IFD-NEXT:    fcvt.w.d a0, ft0, rtz
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rtz
 ; RV32IFD-NEXT:  .LBB27_2: # %entry
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: stest_f64i32_mm:
 ; RV64IFD:       # %bb.0: # %entry
-; RV64IFD-NEXT:    fmv.d.x ft0, a0
-; RV64IFD-NEXT:    fcvt.l.d a0, ft0, rtz
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rtz
 ; RV64IFD-NEXT:    lui a1, 524288
 ; RV64IFD-NEXT:    addiw a2, a1, -1
 ; RV64IFD-NEXT:    bge a0, a2, .LBB27_3
@@ -2084,23 +2178,16 @@ define i32 @utest_f64i32_mm(double %x) {
 ;
 ; RV32IFD-LABEL: utest_f64i32_mm:
 ; RV32IFD:       # %bb.0: # %entry
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    .cfi_def_cfa_offset 16
-; RV32IFD-NEXT:    sw a0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
-; RV32IFD-NEXT:    feq.d a0, ft0, ft0
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
 ; RV32IFD-NEXT:    beqz a0, .LBB28_2
 ; RV32IFD-NEXT:  # %bb.1:
-; RV32IFD-NEXT:    fcvt.wu.d a0, ft0, rtz
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rtz
 ; RV32IFD-NEXT:  .LBB28_2: # %entry
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: utest_f64i32_mm:
 ; RV64IFD:       # %bb.0: # %entry
-; RV64IFD-NEXT:    fmv.d.x ft0, a0
-; RV64IFD-NEXT:    fcvt.lu.d a0, ft0, rtz
+; RV64IFD-NEXT:    fcvt.lu.d a0, fa0, rtz
 ; RV64IFD-NEXT:    li a1, -1
 ; RV64IFD-NEXT:    srli a1, a1, 32
 ; RV64IFD-NEXT:    bltu a0, a1, .LBB28_2
@@ -2178,23 +2265,16 @@ define i32 @ustest_f64i32_mm(double %x) {
 ;
 ; RV32IFD-LABEL: ustest_f64i32_mm:
 ; RV32IFD:       # %bb.0: # %entry
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    .cfi_def_cfa_offset 16
-; RV32IFD-NEXT:    sw a0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
-; RV32IFD-NEXT:    feq.d a0, ft0, ft0
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
 ; RV32IFD-NEXT:    beqz a0, .LBB29_2
 ; RV32IFD-NEXT:  # %bb.1:
-; RV32IFD-NEXT:    fcvt.wu.d a0, ft0, rtz
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rtz
 ; RV32IFD-NEXT:  .LBB29_2: # %entry
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: ustest_f64i32_mm:
 ; RV64IFD:       # %bb.0: # %entry
-; RV64IFD-NEXT:    fmv.d.x ft0, a0
-; RV64IFD-NEXT:    fcvt.l.d a0, ft0, rtz
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rtz
 ; RV64IFD-NEXT:    li a1, -1
 ; RV64IFD-NEXT:    srli a1, a1, 32
 ; RV64IFD-NEXT:    bge a0, a1, .LBB29_3
@@ -2219,18 +2299,16 @@ entry:
 define i32 @stest_f32i32_mm(float %x) {
 ; RV32-LABEL: stest_f32i32_mm:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    feq.s a0, ft0, ft0
+; RV32-NEXT:    feq.s a0, fa0, fa0
 ; RV32-NEXT:    beqz a0, .LBB30_2
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fcvt.w.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.w.s a0, fa0, rtz
 ; RV32-NEXT:  .LBB30_2: # %entry
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: stest_f32i32_mm:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.l.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.l.s a0, fa0, rtz
 ; RV64-NEXT:    lui a1, 524288
 ; RV64-NEXT:    addiw a2, a1, -1
 ; RV64-NEXT:    bge a0, a2, .LBB30_3
@@ -2255,18 +2333,16 @@ entry:
 define i32 @utest_f32i32_mm(float %x) {
 ; RV32-LABEL: utest_f32i32_mm:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    feq.s a0, ft0, ft0
+; RV32-NEXT:    feq.s a0, fa0, fa0
 ; RV32-NEXT:    beqz a0, .LBB31_2
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fcvt.wu.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.wu.s a0, fa0, rtz
 ; RV32-NEXT:  .LBB31_2: # %entry
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: utest_f32i32_mm:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.lu.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; RV64-NEXT:    li a1, -1
 ; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    bltu a0, a1, .LBB31_2
@@ -2284,18 +2360,16 @@ entry:
 define i32 @ustest_f32i32_mm(float %x) {
 ; RV32-LABEL: ustest_f32i32_mm:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    feq.s a0, ft0, ft0
+; RV32-NEXT:    feq.s a0, fa0, fa0
 ; RV32-NEXT:    beqz a0, .LBB32_2
 ; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    fcvt.wu.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.wu.s a0, fa0, rtz
 ; RV32-NEXT:  .LBB32_2: # %entry
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ustest_f32i32_mm:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.l.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.l.s a0, fa0, rtz
 ; RV64-NEXT:    li a1, -1
 ; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    bge a0, a1, .LBB32_3
@@ -2324,6 +2398,7 @@ define i32 @stest_f16i32_mm(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
 ; RV32-NEXT:    call __fixsfdi at plt
 ; RV32-NEXT:    lui a2, 524288
@@ -2378,9 +2453,9 @@ define i32 @stest_f16i32_mm(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.l.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.l.s a0, fa0, rtz
 ; RV64-NEXT:    lui a1, 524288
 ; RV64-NEXT:    addiw a2, a1, -1
 ; RV64-NEXT:    blt a0, a2, .LBB33_2
@@ -2409,6 +2484,7 @@ define i32 @utesth_f16i32_mm(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
 ; RV32-NEXT:    call __fixunssfdi at plt
 ; RV32-NEXT:    beqz a1, .LBB34_2
@@ -2425,9 +2501,9 @@ define i32 @utesth_f16i32_mm(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.lu.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; RV64-NEXT:    li a1, -1
 ; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    bltu a0, a1, .LBB34_2
@@ -2451,6 +2527,7 @@ define i32 @ustest_f16i32_mm(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
 ; RV32-NEXT:    call __fixsfdi at plt
 ; RV32-NEXT:    mv a2, a0
@@ -2491,9 +2568,9 @@ define i32 @ustest_f16i32_mm(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.l.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.l.s a0, fa0, rtz
 ; RV64-NEXT:    li a1, -1
 ; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    blt a0, a1, .LBB35_2
@@ -2564,12 +2641,7 @@ define i16 @stest_f64i16_mm(double %x) {
 ;
 ; RV32IFD-LABEL: stest_f64i16_mm:
 ; RV32IFD:       # %bb.0: # %entry
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    .cfi_def_cfa_offset 16
-; RV32IFD-NEXT:    sw a0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
-; RV32IFD-NEXT:    fcvt.w.d a0, ft0, rtz
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rtz
 ; RV32IFD-NEXT:    lui a1, 8
 ; RV32IFD-NEXT:    addi a1, a1, -1
 ; RV32IFD-NEXT:    bge a0, a1, .LBB36_3
@@ -2577,7 +2649,6 @@ define i16 @stest_f64i16_mm(double %x) {
 ; RV32IFD-NEXT:    lui a1, 1048568
 ; RV32IFD-NEXT:    bge a1, a0, .LBB36_4
 ; RV32IFD-NEXT:  .LBB36_2: # %entry
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ; RV32IFD-NEXT:  .LBB36_3: # %entry
 ; RV32IFD-NEXT:    mv a0, a1
@@ -2585,13 +2656,11 @@ define i16 @stest_f64i16_mm(double %x) {
 ; RV32IFD-NEXT:    blt a1, a0, .LBB36_2
 ; RV32IFD-NEXT:  .LBB36_4: # %entry
 ; RV32IFD-NEXT:    lui a0, 1048568
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: stest_f64i16_mm:
 ; RV64IFD:       # %bb.0: # %entry
-; RV64IFD-NEXT:    fmv.d.x ft0, a0
-; RV64IFD-NEXT:    fcvt.w.d a0, ft0, rtz
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rtz
 ; RV64IFD-NEXT:    lui a1, 8
 ; RV64IFD-NEXT:    addiw a1, a1, -1
 ; RV64IFD-NEXT:    bge a0, a1, .LBB36_3
@@ -2652,25 +2721,18 @@ define i16 @utest_f64i16_mm(double %x) {
 ;
 ; RV32IFD-LABEL: utest_f64i16_mm:
 ; RV32IFD:       # %bb.0: # %entry
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    .cfi_def_cfa_offset 16
-; RV32IFD-NEXT:    sw a0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
-; RV32IFD-NEXT:    fcvt.wu.d a0, ft0, rtz
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rtz
 ; RV32IFD-NEXT:    lui a1, 16
 ; RV32IFD-NEXT:    addi a1, a1, -1
 ; RV32IFD-NEXT:    bltu a0, a1, .LBB37_2
 ; RV32IFD-NEXT:  # %bb.1: # %entry
 ; RV32IFD-NEXT:    mv a0, a1
 ; RV32IFD-NEXT:  .LBB37_2: # %entry
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: utest_f64i16_mm:
 ; RV64IFD:       # %bb.0: # %entry
-; RV64IFD-NEXT:    fmv.d.x ft0, a0
-; RV64IFD-NEXT:    fcvt.wu.d a0, ft0, rtz
+; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rtz
 ; RV64IFD-NEXT:    lui a1, 16
 ; RV64IFD-NEXT:    addiw a1, a1, -1
 ; RV64IFD-NEXT:    bltu a0, a1, .LBB37_2
@@ -2730,32 +2792,24 @@ define i16 @ustest_f64i16_mm(double %x) {
 ;
 ; RV32IFD-LABEL: ustest_f64i16_mm:
 ; RV32IFD:       # %bb.0: # %entry
-; RV32IFD-NEXT:    addi sp, sp, -16
-; RV32IFD-NEXT:    .cfi_def_cfa_offset 16
-; RV32IFD-NEXT:    sw a0, 8(sp)
-; RV32IFD-NEXT:    sw a1, 12(sp)
-; RV32IFD-NEXT:    fld ft0, 8(sp)
-; RV32IFD-NEXT:    fcvt.w.d a0, ft0, rtz
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rtz
 ; RV32IFD-NEXT:    lui a1, 16
 ; RV32IFD-NEXT:    addi a1, a1, -1
 ; RV32IFD-NEXT:    bge a0, a1, .LBB38_3
 ; RV32IFD-NEXT:  # %bb.1: # %entry
 ; RV32IFD-NEXT:    blez a0, .LBB38_4
 ; RV32IFD-NEXT:  .LBB38_2: # %entry
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ; RV32IFD-NEXT:  .LBB38_3: # %entry
 ; RV32IFD-NEXT:    mv a0, a1
 ; RV32IFD-NEXT:    bgtz a0, .LBB38_2
 ; RV32IFD-NEXT:  .LBB38_4: # %entry
 ; RV32IFD-NEXT:    li a0, 0
-; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ;
 ; RV64IFD-LABEL: ustest_f64i16_mm:
 ; RV64IFD:       # %bb.0: # %entry
-; RV64IFD-NEXT:    fmv.d.x ft0, a0
-; RV64IFD-NEXT:    fcvt.w.d a0, ft0, rtz
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rtz
 ; RV64IFD-NEXT:    lui a1, 16
 ; RV64IFD-NEXT:    addiw a1, a1, -1
 ; RV64IFD-NEXT:    bge a0, a1, .LBB38_3
@@ -2780,8 +2834,7 @@ entry:
 define i16 @stest_f32i16_mm(float %x) {
 ; RV32-LABEL: stest_f32i16_mm:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    fcvt.w.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.w.s a0, fa0, rtz
 ; RV32-NEXT:    lui a1, 8
 ; RV32-NEXT:    addi a1, a1, -1
 ; RV32-NEXT:    bge a0, a1, .LBB39_3
@@ -2800,8 +2853,7 @@ define i16 @stest_f32i16_mm(float %x) {
 ;
 ; RV64-LABEL: stest_f32i16_mm:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.w.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.w.s a0, fa0, rtz
 ; RV64-NEXT:    lui a1, 8
 ; RV64-NEXT:    addiw a1, a1, -1
 ; RV64-NEXT:    bge a0, a1, .LBB39_3
@@ -2828,8 +2880,7 @@ entry:
 define i16 @utest_f32i16_mm(float %x) {
 ; RV32-LABEL: utest_f32i16_mm:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    fcvt.wu.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.wu.s a0, fa0, rtz
 ; RV32-NEXT:    lui a1, 16
 ; RV32-NEXT:    addi a1, a1, -1
 ; RV32-NEXT:    bltu a0, a1, .LBB40_2
@@ -2840,8 +2891,7 @@ define i16 @utest_f32i16_mm(float %x) {
 ;
 ; RV64-LABEL: utest_f32i16_mm:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.wu.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.wu.s a0, fa0, rtz
 ; RV64-NEXT:    lui a1, 16
 ; RV64-NEXT:    addiw a1, a1, -1
 ; RV64-NEXT:    bltu a0, a1, .LBB40_2
@@ -2859,8 +2909,7 @@ entry:
 define i16 @ustest_f32i16_mm(float %x) {
 ; RV32-LABEL: ustest_f32i16_mm:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    fcvt.w.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.w.s a0, fa0, rtz
 ; RV32-NEXT:    lui a1, 16
 ; RV32-NEXT:    addi a1, a1, -1
 ; RV32-NEXT:    bge a0, a1, .LBB41_3
@@ -2877,8 +2926,7 @@ define i16 @ustest_f32i16_mm(float %x) {
 ;
 ; RV64-LABEL: ustest_f32i16_mm:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.w.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.w.s a0, fa0, rtz
 ; RV64-NEXT:    lui a1, 16
 ; RV64-NEXT:    addiw a1, a1, -1
 ; RV64-NEXT:    bge a0, a1, .LBB41_3
@@ -2907,9 +2955,9 @@ define i16 @stest_f16i16_mm(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    fcvt.w.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.w.s a0, fa0, rtz
 ; RV32-NEXT:    lui a1, 8
 ; RV32-NEXT:    addi a1, a1, -1
 ; RV32-NEXT:    blt a0, a1, .LBB42_2
@@ -2931,9 +2979,9 @@ define i16 @stest_f16i16_mm(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.l.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.l.s a0, fa0, rtz
 ; RV64-NEXT:    lui a1, 8
 ; RV64-NEXT:    addiw a1, a1, -1
 ; RV64-NEXT:    blt a0, a1, .LBB42_2
@@ -2963,9 +3011,9 @@ define i16 @utesth_f16i16_mm(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    fcvt.wu.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.wu.s a0, fa0, rtz
 ; RV32-NEXT:    lui a1, 16
 ; RV32-NEXT:    addi a1, a1, -1
 ; RV32-NEXT:    bltu a0, a1, .LBB43_2
@@ -2982,9 +3030,9 @@ define i16 @utesth_f16i16_mm(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.lu.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; RV64-NEXT:    sext.w a0, a0
 ; RV64-NEXT:    lui a1, 16
 ; RV64-NEXT:    addiw a1, a1, -1
@@ -3009,9 +3057,9 @@ define i16 @ustest_f16i16_mm(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
-; RV32-NEXT:    fmv.w.x ft0, a0
-; RV32-NEXT:    fcvt.w.s a0, ft0, rtz
+; RV32-NEXT:    fcvt.w.s a0, fa0, rtz
 ; RV32-NEXT:    lui a1, 16
 ; RV32-NEXT:    addi a1, a1, -1
 ; RV32-NEXT:    blt a0, a1, .LBB44_2
@@ -3032,9 +3080,9 @@ define i16 @ustest_f16i16_mm(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    fcvt.l.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.l.s a0, fa0, rtz
 ; RV64-NEXT:    lui a1, 16
 ; RV64-NEXT:    addiw a1, a1, -1
 ; RV64-NEXT:    blt a0, a1, .LBB44_2
@@ -3059,123 +3107,123 @@ entry:
 ; i64 saturate
 
 define i64 @stest_f64i64_mm(double %x) {
-; RV32-LABEL: stest_f64i64_mm:
-; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    call __fixdfti at plt
-; RV32-NEXT:    lw a5, 8(sp)
-; RV32-NEXT:    lw a3, 20(sp)
-; RV32-NEXT:    lw a1, 12(sp)
-; RV32-NEXT:    li a2, -1
-; RV32-NEXT:    mv a7, a5
-; RV32-NEXT:    bltz a3, .LBB45_2
-; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    li a7, -1
-; RV32-NEXT:  .LBB45_2: # %entry
-; RV32-NEXT:    lui a4, 524288
-; RV32-NEXT:    addi a6, a4, -1
-; RV32-NEXT:    mv t0, a5
-; RV32-NEXT:    bgeu a1, a6, .LBB45_19
-; RV32-NEXT:  # %bb.3: # %entry
-; RV32-NEXT:    lw a0, 16(sp)
-; RV32-NEXT:    bne a1, a6, .LBB45_20
-; RV32-NEXT:  .LBB45_4: # %entry
-; RV32-NEXT:    or t0, a0, a3
-; RV32-NEXT:    bnez t0, .LBB45_21
-; RV32-NEXT:  .LBB45_5: # %entry
-; RV32-NEXT:    mv a7, a1
-; RV32-NEXT:    bgez a3, .LBB45_22
-; RV32-NEXT:  .LBB45_6: # %entry
-; RV32-NEXT:    bgeu a1, a6, .LBB45_23
-; RV32-NEXT:  .LBB45_7: # %entry
-; RV32-NEXT:    bnez t0, .LBB45_24
-; RV32-NEXT:  .LBB45_8: # %entry
-; RV32-NEXT:    li a6, 0
-; RV32-NEXT:    bnez a3, .LBB45_25
-; RV32-NEXT:  .LBB45_9: # %entry
-; RV32-NEXT:    bgez a3, .LBB45_26
-; RV32-NEXT:  .LBB45_10: # %entry
-; RV32-NEXT:    mv a7, a5
-; RV32-NEXT:    bgeu a4, a1, .LBB45_27
-; RV32-NEXT:  .LBB45_11: # %entry
-; RV32-NEXT:    mv a0, a5
-; RV32-NEXT:    bne a1, a4, .LBB45_28
-; RV32-NEXT:  .LBB45_12: # %entry
-; RV32-NEXT:    bltz a3, .LBB45_29
-; RV32-NEXT:  .LBB45_13: # %entry
-; RV32-NEXT:    and a6, a6, a3
-; RV32-NEXT:    bne a6, a2, .LBB45_30
-; RV32-NEXT:  .LBB45_14: # %entry
-; RV32-NEXT:    mv a5, a1
-; RV32-NEXT:    bltz a3, .LBB45_31
-; RV32-NEXT:  .LBB45_15: # %entry
-; RV32-NEXT:    bgeu a4, a1, .LBB45_32
-; RV32-NEXT:  .LBB45_16: # %entry
-; RV32-NEXT:    beq a6, a2, .LBB45_18
-; RV32-NEXT:  .LBB45_17: # %entry
-; RV32-NEXT:    mv a1, a5
-; RV32-NEXT:  .LBB45_18: # %entry
-; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 32
-; RV32-NEXT:    ret
-; RV32-NEXT:  .LBB45_19: # %entry
-; RV32-NEXT:    li t0, -1
-; RV32-NEXT:    lw a0, 16(sp)
-; RV32-NEXT:    beq a1, a6, .LBB45_4
-; RV32-NEXT:  .LBB45_20: # %entry
-; RV32-NEXT:    mv a5, t0
-; RV32-NEXT:    or t0, a0, a3
-; RV32-NEXT:    beqz t0, .LBB45_5
-; RV32-NEXT:  .LBB45_21: # %entry
-; RV32-NEXT:    mv a5, a7
-; RV32-NEXT:    mv a7, a1
-; RV32-NEXT:    bltz a3, .LBB45_6
-; RV32-NEXT:  .LBB45_22: # %entry
-; RV32-NEXT:    mv a7, a6
-; RV32-NEXT:    bltu a1, a6, .LBB45_7
-; RV32-NEXT:  .LBB45_23: # %entry
-; RV32-NEXT:    mv a1, a6
-; RV32-NEXT:    beqz t0, .LBB45_8
-; RV32-NEXT:  .LBB45_24: # %entry
-; RV32-NEXT:    mv a1, a7
-; RV32-NEXT:    li a6, 0
-; RV32-NEXT:    beqz a3, .LBB45_9
-; RV32-NEXT:  .LBB45_25: # %entry
-; RV32-NEXT:    srai a6, a3, 31
-; RV32-NEXT:    and a6, a6, a0
-; RV32-NEXT:    bltz a3, .LBB45_10
-; RV32-NEXT:  .LBB45_26: # %entry
-; RV32-NEXT:    li a3, 0
-; RV32-NEXT:    mv a7, a5
-; RV32-NEXT:    bltu a4, a1, .LBB45_11
-; RV32-NEXT:  .LBB45_27: # %entry
-; RV32-NEXT:    li a7, 0
-; RV32-NEXT:    mv a0, a5
-; RV32-NEXT:    beq a1, a4, .LBB45_12
-; RV32-NEXT:  .LBB45_28: # %entry
-; RV32-NEXT:    mv a0, a7
-; RV32-NEXT:    bgez a3, .LBB45_13
-; RV32-NEXT:  .LBB45_29: # %entry
-; RV32-NEXT:    li a5, 0
-; RV32-NEXT:    and a6, a6, a3
-; RV32-NEXT:    beq a6, a2, .LBB45_14
-; RV32-NEXT:  .LBB45_30: # %entry
-; RV32-NEXT:    mv a0, a5
-; RV32-NEXT:    mv a5, a1
-; RV32-NEXT:    bgez a3, .LBB45_15
-; RV32-NEXT:  .LBB45_31: # %entry
-; RV32-NEXT:    lui a5, 524288
-; RV32-NEXT:    bltu a4, a1, .LBB45_16
-; RV32-NEXT:  .LBB45_32: # %entry
-; RV32-NEXT:    lui a1, 524288
-; RV32-NEXT:    bne a6, a2, .LBB45_17
-; RV32-NEXT:    j .LBB45_18
+; RV32IF-LABEL: stest_f64i64_mm:
+; RV32IF:       # %bb.0: # %entry
+; RV32IF-NEXT:    addi sp, sp, -32
+; RV32IF-NEXT:    .cfi_def_cfa_offset 32
+; RV32IF-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    .cfi_offset ra, -4
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:    mv a1, a0
+; RV32IF-NEXT:    addi a0, sp, 8
+; RV32IF-NEXT:    call __fixdfti at plt
+; RV32IF-NEXT:    lw a5, 8(sp)
+; RV32IF-NEXT:    lw a3, 20(sp)
+; RV32IF-NEXT:    lw a1, 12(sp)
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    mv a7, a5
+; RV32IF-NEXT:    bltz a3, .LBB45_2
+; RV32IF-NEXT:  # %bb.1: # %entry
+; RV32IF-NEXT:    li a7, -1
+; RV32IF-NEXT:  .LBB45_2: # %entry
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    addi a6, a4, -1
+; RV32IF-NEXT:    mv t0, a5
+; RV32IF-NEXT:    bgeu a1, a6, .LBB45_19
+; RV32IF-NEXT:  # %bb.3: # %entry
+; RV32IF-NEXT:    lw a0, 16(sp)
+; RV32IF-NEXT:    bne a1, a6, .LBB45_20
+; RV32IF-NEXT:  .LBB45_4: # %entry
+; RV32IF-NEXT:    or t0, a0, a3
+; RV32IF-NEXT:    bnez t0, .LBB45_21
+; RV32IF-NEXT:  .LBB45_5: # %entry
+; RV32IF-NEXT:    mv a7, a1
+; RV32IF-NEXT:    bgez a3, .LBB45_22
+; RV32IF-NEXT:  .LBB45_6: # %entry
+; RV32IF-NEXT:    bgeu a1, a6, .LBB45_23
+; RV32IF-NEXT:  .LBB45_7: # %entry
+; RV32IF-NEXT:    bnez t0, .LBB45_24
+; RV32IF-NEXT:  .LBB45_8: # %entry
+; RV32IF-NEXT:    li a6, 0
+; RV32IF-NEXT:    bnez a3, .LBB45_25
+; RV32IF-NEXT:  .LBB45_9: # %entry
+; RV32IF-NEXT:    bgez a3, .LBB45_26
+; RV32IF-NEXT:  .LBB45_10: # %entry
+; RV32IF-NEXT:    mv a7, a5
+; RV32IF-NEXT:    bgeu a4, a1, .LBB45_27
+; RV32IF-NEXT:  .LBB45_11: # %entry
+; RV32IF-NEXT:    mv a0, a5
+; RV32IF-NEXT:    bne a1, a4, .LBB45_28
+; RV32IF-NEXT:  .LBB45_12: # %entry
+; RV32IF-NEXT:    bltz a3, .LBB45_29
+; RV32IF-NEXT:  .LBB45_13: # %entry
+; RV32IF-NEXT:    and a6, a6, a3
+; RV32IF-NEXT:    bne a6, a2, .LBB45_30
+; RV32IF-NEXT:  .LBB45_14: # %entry
+; RV32IF-NEXT:    mv a5, a1
+; RV32IF-NEXT:    bltz a3, .LBB45_31
+; RV32IF-NEXT:  .LBB45_15: # %entry
+; RV32IF-NEXT:    bgeu a4, a1, .LBB45_32
+; RV32IF-NEXT:  .LBB45_16: # %entry
+; RV32IF-NEXT:    beq a6, a2, .LBB45_18
+; RV32IF-NEXT:  .LBB45_17: # %entry
+; RV32IF-NEXT:    mv a1, a5
+; RV32IF-NEXT:  .LBB45_18: # %entry
+; RV32IF-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 32
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB45_19: # %entry
+; RV32IF-NEXT:    li t0, -1
+; RV32IF-NEXT:    lw a0, 16(sp)
+; RV32IF-NEXT:    beq a1, a6, .LBB45_4
+; RV32IF-NEXT:  .LBB45_20: # %entry
+; RV32IF-NEXT:    mv a5, t0
+; RV32IF-NEXT:    or t0, a0, a3
+; RV32IF-NEXT:    beqz t0, .LBB45_5
+; RV32IF-NEXT:  .LBB45_21: # %entry
+; RV32IF-NEXT:    mv a5, a7
+; RV32IF-NEXT:    mv a7, a1
+; RV32IF-NEXT:    bltz a3, .LBB45_6
+; RV32IF-NEXT:  .LBB45_22: # %entry
+; RV32IF-NEXT:    mv a7, a6
+; RV32IF-NEXT:    bltu a1, a6, .LBB45_7
+; RV32IF-NEXT:  .LBB45_23: # %entry
+; RV32IF-NEXT:    mv a1, a6
+; RV32IF-NEXT:    beqz t0, .LBB45_8
+; RV32IF-NEXT:  .LBB45_24: # %entry
+; RV32IF-NEXT:    mv a1, a7
+; RV32IF-NEXT:    li a6, 0
+; RV32IF-NEXT:    beqz a3, .LBB45_9
+; RV32IF-NEXT:  .LBB45_25: # %entry
+; RV32IF-NEXT:    srai a6, a3, 31
+; RV32IF-NEXT:    and a6, a6, a0
+; RV32IF-NEXT:    bltz a3, .LBB45_10
+; RV32IF-NEXT:  .LBB45_26: # %entry
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:    mv a7, a5
+; RV32IF-NEXT:    bltu a4, a1, .LBB45_11
+; RV32IF-NEXT:  .LBB45_27: # %entry
+; RV32IF-NEXT:    li a7, 0
+; RV32IF-NEXT:    mv a0, a5
+; RV32IF-NEXT:    beq a1, a4, .LBB45_12
+; RV32IF-NEXT:  .LBB45_28: # %entry
+; RV32IF-NEXT:    mv a0, a7
+; RV32IF-NEXT:    bgez a3, .LBB45_13
+; RV32IF-NEXT:  .LBB45_29: # %entry
+; RV32IF-NEXT:    li a5, 0
+; RV32IF-NEXT:    and a6, a6, a3
+; RV32IF-NEXT:    beq a6, a2, .LBB45_14
+; RV32IF-NEXT:  .LBB45_30: # %entry
+; RV32IF-NEXT:    mv a0, a5
+; RV32IF-NEXT:    mv a5, a1
+; RV32IF-NEXT:    bgez a3, .LBB45_15
+; RV32IF-NEXT:  .LBB45_31: # %entry
+; RV32IF-NEXT:    lui a5, 524288
+; RV32IF-NEXT:    bltu a4, a1, .LBB45_16
+; RV32IF-NEXT:  .LBB45_32: # %entry
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    bne a6, a2, .LBB45_17
+; RV32IF-NEXT:    j .LBB45_18
 ;
 ; RV64IF-LABEL: stest_f64i64_mm:
 ; RV64IF:       # %bb.0: # %entry
@@ -3228,13 +3276,128 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV64IF-NEXT:    bne a1, a2, .LBB45_8
 ; RV64IF-NEXT:    j .LBB45_9
 ;
+; RV32IFD-LABEL: stest_f64i64_mm:
+; RV32IFD:       # %bb.0: # %entry
+; RV32IFD-NEXT:    addi sp, sp, -32
+; RV32IFD-NEXT:    .cfi_def_cfa_offset 32
+; RV32IFD-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    .cfi_offset ra, -4
+; RV32IFD-NEXT:    addi a0, sp, 8
+; RV32IFD-NEXT:    call __fixdfti at plt
+; RV32IFD-NEXT:    lw a5, 8(sp)
+; RV32IFD-NEXT:    lw a3, 20(sp)
+; RV32IFD-NEXT:    lw a1, 12(sp)
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    mv a7, a5
+; RV32IFD-NEXT:    bltz a3, .LBB45_2
+; RV32IFD-NEXT:  # %bb.1: # %entry
+; RV32IFD-NEXT:    li a7, -1
+; RV32IFD-NEXT:  .LBB45_2: # %entry
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    addi a6, a4, -1
+; RV32IFD-NEXT:    mv t0, a5
+; RV32IFD-NEXT:    bgeu a1, a6, .LBB45_19
+; RV32IFD-NEXT:  # %bb.3: # %entry
+; RV32IFD-NEXT:    lw a0, 16(sp)
+; RV32IFD-NEXT:    bne a1, a6, .LBB45_20
+; RV32IFD-NEXT:  .LBB45_4: # %entry
+; RV32IFD-NEXT:    or t0, a0, a3
+; RV32IFD-NEXT:    bnez t0, .LBB45_21
+; RV32IFD-NEXT:  .LBB45_5: # %entry
+; RV32IFD-NEXT:    mv a7, a1
+; RV32IFD-NEXT:    bgez a3, .LBB45_22
+; RV32IFD-NEXT:  .LBB45_6: # %entry
+; RV32IFD-NEXT:    bgeu a1, a6, .LBB45_23
+; RV32IFD-NEXT:  .LBB45_7: # %entry
+; RV32IFD-NEXT:    bnez t0, .LBB45_24
+; RV32IFD-NEXT:  .LBB45_8: # %entry
+; RV32IFD-NEXT:    li a6, 0
+; RV32IFD-NEXT:    bnez a3, .LBB45_25
+; RV32IFD-NEXT:  .LBB45_9: # %entry
+; RV32IFD-NEXT:    bgez a3, .LBB45_26
+; RV32IFD-NEXT:  .LBB45_10: # %entry
+; RV32IFD-NEXT:    mv a7, a5
+; RV32IFD-NEXT:    bgeu a4, a1, .LBB45_27
+; RV32IFD-NEXT:  .LBB45_11: # %entry
+; RV32IFD-NEXT:    mv a0, a5
+; RV32IFD-NEXT:    bne a1, a4, .LBB45_28
+; RV32IFD-NEXT:  .LBB45_12: # %entry
+; RV32IFD-NEXT:    bltz a3, .LBB45_29
+; RV32IFD-NEXT:  .LBB45_13: # %entry
+; RV32IFD-NEXT:    and a6, a6, a3
+; RV32IFD-NEXT:    bne a6, a2, .LBB45_30
+; RV32IFD-NEXT:  .LBB45_14: # %entry
+; RV32IFD-NEXT:    mv a5, a1
+; RV32IFD-NEXT:    bltz a3, .LBB45_31
+; RV32IFD-NEXT:  .LBB45_15: # %entry
+; RV32IFD-NEXT:    bgeu a4, a1, .LBB45_32
+; RV32IFD-NEXT:  .LBB45_16: # %entry
+; RV32IFD-NEXT:    beq a6, a2, .LBB45_18
+; RV32IFD-NEXT:  .LBB45_17: # %entry
+; RV32IFD-NEXT:    mv a1, a5
+; RV32IFD-NEXT:  .LBB45_18: # %entry
+; RV32IFD-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 32
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB45_19: # %entry
+; RV32IFD-NEXT:    li t0, -1
+; RV32IFD-NEXT:    lw a0, 16(sp)
+; RV32IFD-NEXT:    beq a1, a6, .LBB45_4
+; RV32IFD-NEXT:  .LBB45_20: # %entry
+; RV32IFD-NEXT:    mv a5, t0
+; RV32IFD-NEXT:    or t0, a0, a3
+; RV32IFD-NEXT:    beqz t0, .LBB45_5
+; RV32IFD-NEXT:  .LBB45_21: # %entry
+; RV32IFD-NEXT:    mv a5, a7
+; RV32IFD-NEXT:    mv a7, a1
+; RV32IFD-NEXT:    bltz a3, .LBB45_6
+; RV32IFD-NEXT:  .LBB45_22: # %entry
+; RV32IFD-NEXT:    mv a7, a6
+; RV32IFD-NEXT:    bltu a1, a6, .LBB45_7
+; RV32IFD-NEXT:  .LBB45_23: # %entry
+; RV32IFD-NEXT:    mv a1, a6
+; RV32IFD-NEXT:    beqz t0, .LBB45_8
+; RV32IFD-NEXT:  .LBB45_24: # %entry
+; RV32IFD-NEXT:    mv a1, a7
+; RV32IFD-NEXT:    li a6, 0
+; RV32IFD-NEXT:    beqz a3, .LBB45_9
+; RV32IFD-NEXT:  .LBB45_25: # %entry
+; RV32IFD-NEXT:    srai a6, a3, 31
+; RV32IFD-NEXT:    and a6, a6, a0
+; RV32IFD-NEXT:    bltz a3, .LBB45_10
+; RV32IFD-NEXT:  .LBB45_26: # %entry
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:    mv a7, a5
+; RV32IFD-NEXT:    bltu a4, a1, .LBB45_11
+; RV32IFD-NEXT:  .LBB45_27: # %entry
+; RV32IFD-NEXT:    li a7, 0
+; RV32IFD-NEXT:    mv a0, a5
+; RV32IFD-NEXT:    beq a1, a4, .LBB45_12
+; RV32IFD-NEXT:  .LBB45_28: # %entry
+; RV32IFD-NEXT:    mv a0, a7
+; RV32IFD-NEXT:    bgez a3, .LBB45_13
+; RV32IFD-NEXT:  .LBB45_29: # %entry
+; RV32IFD-NEXT:    li a5, 0
+; RV32IFD-NEXT:    and a6, a6, a3
+; RV32IFD-NEXT:    beq a6, a2, .LBB45_14
+; RV32IFD-NEXT:  .LBB45_30: # %entry
+; RV32IFD-NEXT:    mv a0, a5
+; RV32IFD-NEXT:    mv a5, a1
+; RV32IFD-NEXT:    bgez a3, .LBB45_15
+; RV32IFD-NEXT:  .LBB45_31: # %entry
+; RV32IFD-NEXT:    lui a5, 524288
+; RV32IFD-NEXT:    bltu a4, a1, .LBB45_16
+; RV32IFD-NEXT:  .LBB45_32: # %entry
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    bne a6, a2, .LBB45_17
+; RV32IFD-NEXT:    j .LBB45_18
+;
 ; RV64IFD-LABEL: stest_f64i64_mm:
 ; RV64IFD:       # %bb.0: # %entry
-; RV64IFD-NEXT:    fmv.d.x ft0, a0
-; RV64IFD-NEXT:    feq.d a0, ft0, ft0
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
 ; RV64IFD-NEXT:    beqz a0, .LBB45_2
 ; RV64IFD-NEXT:  # %bb.1:
-; RV64IFD-NEXT:    fcvt.l.d a0, ft0, rtz
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rtz
 ; RV64IFD-NEXT:  .LBB45_2: # %entry
 ; RV64IFD-NEXT:    ret
 entry:
@@ -3246,53 +3409,53 @@ entry:
 }
 
 define i64 @utest_f64i64_mm(double %x) {
-; RV32-LABEL: utest_f64i64_mm:
-; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    call __fixunsdfti at plt
-; RV32-NEXT:    lw a0, 20(sp)
-; RV32-NEXT:    lw a3, 16(sp)
-; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    beqz a0, .LBB46_3
-; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    beq a2, a1, .LBB46_4
-; RV32-NEXT:  .LBB46_2:
-; RV32-NEXT:    lw a4, 8(sp)
-; RV32-NEXT:    j .LBB46_5
-; RV32-NEXT:  .LBB46_3:
-; RV32-NEXT:    seqz a2, a3
-; RV32-NEXT:    bne a2, a1, .LBB46_2
-; RV32-NEXT:  .LBB46_4: # %entry
-; RV32-NEXT:    mv a4, a1
-; RV32-NEXT:  .LBB46_5: # %entry
-; RV32-NEXT:    xori a3, a3, 1
-; RV32-NEXT:    or a3, a3, a0
-; RV32-NEXT:    mv a0, a1
-; RV32-NEXT:    beq a3, a1, .LBB46_7
-; RV32-NEXT:  # %bb.6: # %entry
-; RV32-NEXT:    mv a0, a4
-; RV32-NEXT:  .LBB46_7: # %entry
-; RV32-NEXT:    bne a2, a1, .LBB46_9
-; RV32-NEXT:  # %bb.8: # %entry
-; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    bne a3, a1, .LBB46_10
-; RV32-NEXT:    j .LBB46_11
-; RV32-NEXT:  .LBB46_9:
-; RV32-NEXT:    lw a2, 12(sp)
-; RV32-NEXT:    beq a3, a1, .LBB46_11
-; RV32-NEXT:  .LBB46_10: # %entry
-; RV32-NEXT:    mv a1, a2
-; RV32-NEXT:  .LBB46_11: # %entry
-; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 32
-; RV32-NEXT:    ret
+; RV32IF-LABEL: utest_f64i64_mm:
+; RV32IF:       # %bb.0: # %entry
+; RV32IF-NEXT:    addi sp, sp, -32
+; RV32IF-NEXT:    .cfi_def_cfa_offset 32
+; RV32IF-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    .cfi_offset ra, -4
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:    mv a1, a0
+; RV32IF-NEXT:    addi a0, sp, 8
+; RV32IF-NEXT:    call __fixunsdfti at plt
+; RV32IF-NEXT:    lw a0, 20(sp)
+; RV32IF-NEXT:    lw a3, 16(sp)
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a0, .LBB46_3
+; RV32IF-NEXT:  # %bb.1: # %entry
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:    beq a2, a1, .LBB46_4
+; RV32IF-NEXT:  .LBB46_2:
+; RV32IF-NEXT:    lw a4, 8(sp)
+; RV32IF-NEXT:    j .LBB46_5
+; RV32IF-NEXT:  .LBB46_3:
+; RV32IF-NEXT:    seqz a2, a3
+; RV32IF-NEXT:    bne a2, a1, .LBB46_2
+; RV32IF-NEXT:  .LBB46_4: # %entry
+; RV32IF-NEXT:    mv a4, a1
+; RV32IF-NEXT:  .LBB46_5: # %entry
+; RV32IF-NEXT:    xori a3, a3, 1
+; RV32IF-NEXT:    or a3, a3, a0
+; RV32IF-NEXT:    mv a0, a1
+; RV32IF-NEXT:    beq a3, a1, .LBB46_7
+; RV32IF-NEXT:  # %bb.6: # %entry
+; RV32IF-NEXT:    mv a0, a4
+; RV32IF-NEXT:  .LBB46_7: # %entry
+; RV32IF-NEXT:    bne a2, a1, .LBB46_9
+; RV32IF-NEXT:  # %bb.8: # %entry
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:    bne a3, a1, .LBB46_10
+; RV32IF-NEXT:    j .LBB46_11
+; RV32IF-NEXT:  .LBB46_9:
+; RV32IF-NEXT:    lw a2, 12(sp)
+; RV32IF-NEXT:    beq a3, a1, .LBB46_11
+; RV32IF-NEXT:  .LBB46_10: # %entry
+; RV32IF-NEXT:    mv a1, a2
+; RV32IF-NEXT:  .LBB46_11: # %entry
+; RV32IF-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 32
+; RV32IF-NEXT:    ret
 ;
 ; RV64-LABEL: utest_f64i64_mm:
 ; RV64:       # %bb.0: # %entry
@@ -3315,6 +3478,52 @@ define i64 @utest_f64i64_mm(double %x) {
 ; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
+;
+; RV32IFD-LABEL: utest_f64i64_mm:
+; RV32IFD:       # %bb.0: # %entry
+; RV32IFD-NEXT:    addi sp, sp, -32
+; RV32IFD-NEXT:    .cfi_def_cfa_offset 32
+; RV32IFD-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    .cfi_offset ra, -4
+; RV32IFD-NEXT:    addi a0, sp, 8
+; RV32IFD-NEXT:    call __fixunsdfti at plt
+; RV32IFD-NEXT:    lw a0, 20(sp)
+; RV32IFD-NEXT:    lw a3, 16(sp)
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a0, .LBB46_3
+; RV32IFD-NEXT:  # %bb.1: # %entry
+; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:    beq a2, a1, .LBB46_4
+; RV32IFD-NEXT:  .LBB46_2:
+; RV32IFD-NEXT:    lw a4, 8(sp)
+; RV32IFD-NEXT:    j .LBB46_5
+; RV32IFD-NEXT:  .LBB46_3:
+; RV32IFD-NEXT:    seqz a2, a3
+; RV32IFD-NEXT:    bne a2, a1, .LBB46_2
+; RV32IFD-NEXT:  .LBB46_4: # %entry
+; RV32IFD-NEXT:    mv a4, a1
+; RV32IFD-NEXT:  .LBB46_5: # %entry
+; RV32IFD-NEXT:    xori a3, a3, 1
+; RV32IFD-NEXT:    or a3, a3, a0
+; RV32IFD-NEXT:    mv a0, a1
+; RV32IFD-NEXT:    beq a3, a1, .LBB46_7
+; RV32IFD-NEXT:  # %bb.6: # %entry
+; RV32IFD-NEXT:    mv a0, a4
+; RV32IFD-NEXT:  .LBB46_7: # %entry
+; RV32IFD-NEXT:    bne a2, a1, .LBB46_9
+; RV32IFD-NEXT:  # %bb.8: # %entry
+; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:    bne a3, a1, .LBB46_10
+; RV32IFD-NEXT:    j .LBB46_11
+; RV32IFD-NEXT:  .LBB46_9:
+; RV32IFD-NEXT:    lw a2, 12(sp)
+; RV32IFD-NEXT:    beq a3, a1, .LBB46_11
+; RV32IFD-NEXT:  .LBB46_10: # %entry
+; RV32IFD-NEXT:    mv a1, a2
+; RV32IFD-NEXT:  .LBB46_11: # %entry
+; RV32IFD-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 32
+; RV32IFD-NEXT:    ret
 entry:
   %conv = fptoui double %x to i128
   %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616)
@@ -3323,117 +3532,117 @@ entry:
 }
 
 define i64 @ustest_f64i64_mm(double %x) {
-; RV32-LABEL: ustest_f64i64_mm:
-; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    mv a2, a1
-; RV32-NEXT:    mv a1, a0
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    call __fixdfti at plt
-; RV32-NEXT:    lw a0, 16(sp)
-; RV32-NEXT:    lw a2, 20(sp)
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    mv a4, a0
-; RV32-NEXT:    bgez a2, .LBB47_5
-; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    mv a3, a0
-; RV32-NEXT:    bgeu a0, a1, .LBB47_6
-; RV32-NEXT:  .LBB47_2: # %entry
-; RV32-NEXT:    beqz a2, .LBB47_7
-; RV32-NEXT:  .LBB47_3: # %entry
-; RV32-NEXT:    slti a1, a2, 0
-; RV32-NEXT:    mv a3, a4
-; RV32-NEXT:    beqz a1, .LBB47_8
-; RV32-NEXT:  .LBB47_4:
-; RV32-NEXT:    lw a5, 8(sp)
-; RV32-NEXT:    j .LBB47_9
-; RV32-NEXT:  .LBB47_5: # %entry
-; RV32-NEXT:    li a4, 1
-; RV32-NEXT:    mv a3, a0
-; RV32-NEXT:    bltu a0, a1, .LBB47_2
-; RV32-NEXT:  .LBB47_6: # %entry
-; RV32-NEXT:    li a3, 1
-; RV32-NEXT:    bnez a2, .LBB47_3
-; RV32-NEXT:  .LBB47_7:
-; RV32-NEXT:    seqz a1, a0
-; RV32-NEXT:    bnez a1, .LBB47_4
-; RV32-NEXT:  .LBB47_8: # %entry
-; RV32-NEXT:    li a5, 0
-; RV32-NEXT:  .LBB47_9: # %entry
-; RV32-NEXT:    xori a0, a0, 1
-; RV32-NEXT:    or a0, a0, a2
-; RV32-NEXT:    li a4, 0
-; RV32-NEXT:    beqz a0, .LBB47_11
-; RV32-NEXT:  # %bb.10: # %entry
-; RV32-NEXT:    mv a4, a5
-; RV32-NEXT:  .LBB47_11: # %entry
-; RV32-NEXT:    bnez a1, .LBB47_13
-; RV32-NEXT:  # %bb.12: # %entry
-; RV32-NEXT:    li a5, 0
-; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    bnez a0, .LBB47_14
-; RV32-NEXT:    j .LBB47_15
-; RV32-NEXT:  .LBB47_13:
-; RV32-NEXT:    lw a5, 12(sp)
-; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    beqz a0, .LBB47_15
-; RV32-NEXT:  .LBB47_14: # %entry
-; RV32-NEXT:    mv a1, a5
-; RV32-NEXT:  .LBB47_15: # %entry
-; RV32-NEXT:    bgez a2, .LBB47_20
-; RV32-NEXT:  # %bb.16: # %entry
-; RV32-NEXT:    mv a5, a4
-; RV32-NEXT:    beqz a1, .LBB47_21
-; RV32-NEXT:  .LBB47_17: # %entry
-; RV32-NEXT:    mv a0, a4
-; RV32-NEXT:    bnez a1, .LBB47_22
-; RV32-NEXT:  .LBB47_18: # %entry
-; RV32-NEXT:    beqz a2, .LBB47_23
-; RV32-NEXT:  .LBB47_19: # %entry
-; RV32-NEXT:    sgtz a5, a2
-; RV32-NEXT:    beqz a5, .LBB47_24
-; RV32-NEXT:    j .LBB47_25
-; RV32-NEXT:  .LBB47_20: # %entry
-; RV32-NEXT:    li a2, 0
-; RV32-NEXT:    mv a5, a4
-; RV32-NEXT:    bnez a1, .LBB47_17
-; RV32-NEXT:  .LBB47_21: # %entry
-; RV32-NEXT:    li a5, 0
-; RV32-NEXT:    mv a0, a4
-; RV32-NEXT:    beqz a1, .LBB47_18
-; RV32-NEXT:  .LBB47_22: # %entry
-; RV32-NEXT:    mv a0, a5
-; RV32-NEXT:    bnez a2, .LBB47_19
-; RV32-NEXT:  .LBB47_23:
-; RV32-NEXT:    snez a5, a3
-; RV32-NEXT:    bnez a5, .LBB47_25
-; RV32-NEXT:  .LBB47_24: # %entry
-; RV32-NEXT:    li a4, 0
-; RV32-NEXT:  .LBB47_25: # %entry
-; RV32-NEXT:    or a2, a3, a2
-; RV32-NEXT:    bnez a2, .LBB47_30
-; RV32-NEXT:  # %bb.26: # %entry
-; RV32-NEXT:    mv a3, a1
-; RV32-NEXT:    beqz a5, .LBB47_31
-; RV32-NEXT:  .LBB47_27: # %entry
-; RV32-NEXT:    beqz a2, .LBB47_29
-; RV32-NEXT:  .LBB47_28: # %entry
-; RV32-NEXT:    mv a1, a3
-; RV32-NEXT:  .LBB47_29: # %entry
-; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 32
-; RV32-NEXT:    ret
-; RV32-NEXT:  .LBB47_30: # %entry
-; RV32-NEXT:    mv a0, a4
-; RV32-NEXT:    mv a3, a1
-; RV32-NEXT:    bnez a5, .LBB47_27
-; RV32-NEXT:  .LBB47_31: # %entry
-; RV32-NEXT:    li a3, 0
-; RV32-NEXT:    bnez a2, .LBB47_28
-; RV32-NEXT:    j .LBB47_29
+; RV32IF-LABEL: ustest_f64i64_mm:
+; RV32IF:       # %bb.0: # %entry
+; RV32IF-NEXT:    addi sp, sp, -32
+; RV32IF-NEXT:    .cfi_def_cfa_offset 32
+; RV32IF-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    .cfi_offset ra, -4
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:    mv a1, a0
+; RV32IF-NEXT:    addi a0, sp, 8
+; RV32IF-NEXT:    call __fixdfti at plt
+; RV32IF-NEXT:    lw a0, 16(sp)
+; RV32IF-NEXT:    lw a2, 20(sp)
+; RV32IF-NEXT:    li a1, 1
+; RV32IF-NEXT:    mv a4, a0
+; RV32IF-NEXT:    bgez a2, .LBB47_5
+; RV32IF-NEXT:  # %bb.1: # %entry
+; RV32IF-NEXT:    mv a3, a0
+; RV32IF-NEXT:    bgeu a0, a1, .LBB47_6
+; RV32IF-NEXT:  .LBB47_2: # %entry
+; RV32IF-NEXT:    beqz a2, .LBB47_7
+; RV32IF-NEXT:  .LBB47_3: # %entry
+; RV32IF-NEXT:    slti a1, a2, 0
+; RV32IF-NEXT:    mv a3, a4
+; RV32IF-NEXT:    beqz a1, .LBB47_8
+; RV32IF-NEXT:  .LBB47_4:
+; RV32IF-NEXT:    lw a5, 8(sp)
+; RV32IF-NEXT:    j .LBB47_9
+; RV32IF-NEXT:  .LBB47_5: # %entry
+; RV32IF-NEXT:    li a4, 1
+; RV32IF-NEXT:    mv a3, a0
+; RV32IF-NEXT:    bltu a0, a1, .LBB47_2
+; RV32IF-NEXT:  .LBB47_6: # %entry
+; RV32IF-NEXT:    li a3, 1
+; RV32IF-NEXT:    bnez a2, .LBB47_3
+; RV32IF-NEXT:  .LBB47_7:
+; RV32IF-NEXT:    seqz a1, a0
+; RV32IF-NEXT:    bnez a1, .LBB47_4
+; RV32IF-NEXT:  .LBB47_8: # %entry
+; RV32IF-NEXT:    li a5, 0
+; RV32IF-NEXT:  .LBB47_9: # %entry
+; RV32IF-NEXT:    xori a0, a0, 1
+; RV32IF-NEXT:    or a0, a0, a2
+; RV32IF-NEXT:    li a4, 0
+; RV32IF-NEXT:    beqz a0, .LBB47_11
+; RV32IF-NEXT:  # %bb.10: # %entry
+; RV32IF-NEXT:    mv a4, a5
+; RV32IF-NEXT:  .LBB47_11: # %entry
+; RV32IF-NEXT:    bnez a1, .LBB47_13
+; RV32IF-NEXT:  # %bb.12: # %entry
+; RV32IF-NEXT:    li a5, 0
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    bnez a0, .LBB47_14
+; RV32IF-NEXT:    j .LBB47_15
+; RV32IF-NEXT:  .LBB47_13:
+; RV32IF-NEXT:    lw a5, 12(sp)
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a0, .LBB47_15
+; RV32IF-NEXT:  .LBB47_14: # %entry
+; RV32IF-NEXT:    mv a1, a5
+; RV32IF-NEXT:  .LBB47_15: # %entry
+; RV32IF-NEXT:    bgez a2, .LBB47_20
+; RV32IF-NEXT:  # %bb.16: # %entry
+; RV32IF-NEXT:    mv a5, a4
+; RV32IF-NEXT:    beqz a1, .LBB47_21
+; RV32IF-NEXT:  .LBB47_17: # %entry
+; RV32IF-NEXT:    mv a0, a4
+; RV32IF-NEXT:    bnez a1, .LBB47_22
+; RV32IF-NEXT:  .LBB47_18: # %entry
+; RV32IF-NEXT:    beqz a2, .LBB47_23
+; RV32IF-NEXT:  .LBB47_19: # %entry
+; RV32IF-NEXT:    sgtz a5, a2
+; RV32IF-NEXT:    beqz a5, .LBB47_24
+; RV32IF-NEXT:    j .LBB47_25
+; RV32IF-NEXT:  .LBB47_20: # %entry
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:    mv a5, a4
+; RV32IF-NEXT:    bnez a1, .LBB47_17
+; RV32IF-NEXT:  .LBB47_21: # %entry
+; RV32IF-NEXT:    li a5, 0
+; RV32IF-NEXT:    mv a0, a4
+; RV32IF-NEXT:    beqz a1, .LBB47_18
+; RV32IF-NEXT:  .LBB47_22: # %entry
+; RV32IF-NEXT:    mv a0, a5
+; RV32IF-NEXT:    bnez a2, .LBB47_19
+; RV32IF-NEXT:  .LBB47_23:
+; RV32IF-NEXT:    snez a5, a3
+; RV32IF-NEXT:    bnez a5, .LBB47_25
+; RV32IF-NEXT:  .LBB47_24: # %entry
+; RV32IF-NEXT:    li a4, 0
+; RV32IF-NEXT:  .LBB47_25: # %entry
+; RV32IF-NEXT:    or a2, a3, a2
+; RV32IF-NEXT:    bnez a2, .LBB47_30
+; RV32IF-NEXT:  # %bb.26: # %entry
+; RV32IF-NEXT:    mv a3, a1
+; RV32IF-NEXT:    beqz a5, .LBB47_31
+; RV32IF-NEXT:  .LBB47_27: # %entry
+; RV32IF-NEXT:    beqz a2, .LBB47_29
+; RV32IF-NEXT:  .LBB47_28: # %entry
+; RV32IF-NEXT:    mv a1, a3
+; RV32IF-NEXT:  .LBB47_29: # %entry
+; RV32IF-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 32
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB47_30: # %entry
+; RV32IF-NEXT:    mv a0, a4
+; RV32IF-NEXT:    mv a3, a1
+; RV32IF-NEXT:    bnez a5, .LBB47_27
+; RV32IF-NEXT:  .LBB47_31: # %entry
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:    bnez a2, .LBB47_28
+; RV32IF-NEXT:    j .LBB47_29
 ;
 ; RV64-LABEL: ustest_f64i64_mm:
 ; RV64:       # %bb.0: # %entry
@@ -3473,6 +3682,116 @@ define i64 @ustest_f64i64_mm(double %x) {
 ; RV64-NEXT:    li a1, 0
 ; RV64-NEXT:    bnez a3, .LBB47_4
 ; RV64-NEXT:    j .LBB47_5
+;
+; RV32IFD-LABEL: ustest_f64i64_mm:
+; RV32IFD:       # %bb.0: # %entry
+; RV32IFD-NEXT:    addi sp, sp, -32
+; RV32IFD-NEXT:    .cfi_def_cfa_offset 32
+; RV32IFD-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    .cfi_offset ra, -4
+; RV32IFD-NEXT:    addi a0, sp, 8
+; RV32IFD-NEXT:    call __fixdfti at plt
+; RV32IFD-NEXT:    lw a0, 16(sp)
+; RV32IFD-NEXT:    lw a2, 20(sp)
+; RV32IFD-NEXT:    li a1, 1
+; RV32IFD-NEXT:    mv a4, a0
+; RV32IFD-NEXT:    bgez a2, .LBB47_5
+; RV32IFD-NEXT:  # %bb.1: # %entry
+; RV32IFD-NEXT:    mv a3, a0
+; RV32IFD-NEXT:    bgeu a0, a1, .LBB47_6
+; RV32IFD-NEXT:  .LBB47_2: # %entry
+; RV32IFD-NEXT:    beqz a2, .LBB47_7
+; RV32IFD-NEXT:  .LBB47_3: # %entry
+; RV32IFD-NEXT:    slti a1, a2, 0
+; RV32IFD-NEXT:    mv a3, a4
+; RV32IFD-NEXT:    beqz a1, .LBB47_8
+; RV32IFD-NEXT:  .LBB47_4:
+; RV32IFD-NEXT:    lw a5, 8(sp)
+; RV32IFD-NEXT:    j .LBB47_9
+; RV32IFD-NEXT:  .LBB47_5: # %entry
+; RV32IFD-NEXT:    li a4, 1
+; RV32IFD-NEXT:    mv a3, a0
+; RV32IFD-NEXT:    bltu a0, a1, .LBB47_2
+; RV32IFD-NEXT:  .LBB47_6: # %entry
+; RV32IFD-NEXT:    li a3, 1
+; RV32IFD-NEXT:    bnez a2, .LBB47_3
+; RV32IFD-NEXT:  .LBB47_7:
+; RV32IFD-NEXT:    seqz a1, a0
+; RV32IFD-NEXT:    bnez a1, .LBB47_4
+; RV32IFD-NEXT:  .LBB47_8: # %entry
+; RV32IFD-NEXT:    li a5, 0
+; RV32IFD-NEXT:  .LBB47_9: # %entry
+; RV32IFD-NEXT:    xori a0, a0, 1
+; RV32IFD-NEXT:    or a0, a0, a2
+; RV32IFD-NEXT:    li a4, 0
+; RV32IFD-NEXT:    beqz a0, .LBB47_11
+; RV32IFD-NEXT:  # %bb.10: # %entry
+; RV32IFD-NEXT:    mv a4, a5
+; RV32IFD-NEXT:  .LBB47_11: # %entry
+; RV32IFD-NEXT:    bnez a1, .LBB47_13
+; RV32IFD-NEXT:  # %bb.12: # %entry
+; RV32IFD-NEXT:    li a5, 0
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    bnez a0, .LBB47_14
+; RV32IFD-NEXT:    j .LBB47_15
+; RV32IFD-NEXT:  .LBB47_13:
+; RV32IFD-NEXT:    lw a5, 12(sp)
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a0, .LBB47_15
+; RV32IFD-NEXT:  .LBB47_14: # %entry
+; RV32IFD-NEXT:    mv a1, a5
+; RV32IFD-NEXT:  .LBB47_15: # %entry
+; RV32IFD-NEXT:    bgez a2, .LBB47_20
+; RV32IFD-NEXT:  # %bb.16: # %entry
+; RV32IFD-NEXT:    mv a5, a4
+; RV32IFD-NEXT:    beqz a1, .LBB47_21
+; RV32IFD-NEXT:  .LBB47_17: # %entry
+; RV32IFD-NEXT:    mv a0, a4
+; RV32IFD-NEXT:    bnez a1, .LBB47_22
+; RV32IFD-NEXT:  .LBB47_18: # %entry
+; RV32IFD-NEXT:    beqz a2, .LBB47_23
+; RV32IFD-NEXT:  .LBB47_19: # %entry
+; RV32IFD-NEXT:    sgtz a5, a2
+; RV32IFD-NEXT:    beqz a5, .LBB47_24
+; RV32IFD-NEXT:    j .LBB47_25
+; RV32IFD-NEXT:  .LBB47_20: # %entry
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:    mv a5, a4
+; RV32IFD-NEXT:    bnez a1, .LBB47_17
+; RV32IFD-NEXT:  .LBB47_21: # %entry
+; RV32IFD-NEXT:    li a5, 0
+; RV32IFD-NEXT:    mv a0, a4
+; RV32IFD-NEXT:    beqz a1, .LBB47_18
+; RV32IFD-NEXT:  .LBB47_22: # %entry
+; RV32IFD-NEXT:    mv a0, a5
+; RV32IFD-NEXT:    bnez a2, .LBB47_19
+; RV32IFD-NEXT:  .LBB47_23:
+; RV32IFD-NEXT:    snez a5, a3
+; RV32IFD-NEXT:    bnez a5, .LBB47_25
+; RV32IFD-NEXT:  .LBB47_24: # %entry
+; RV32IFD-NEXT:    li a4, 0
+; RV32IFD-NEXT:  .LBB47_25: # %entry
+; RV32IFD-NEXT:    or a2, a3, a2
+; RV32IFD-NEXT:    bnez a2, .LBB47_30
+; RV32IFD-NEXT:  # %bb.26: # %entry
+; RV32IFD-NEXT:    mv a3, a1
+; RV32IFD-NEXT:    beqz a5, .LBB47_31
+; RV32IFD-NEXT:  .LBB47_27: # %entry
+; RV32IFD-NEXT:    beqz a2, .LBB47_29
+; RV32IFD-NEXT:  .LBB47_28: # %entry
+; RV32IFD-NEXT:    mv a1, a3
+; RV32IFD-NEXT:  .LBB47_29: # %entry
+; RV32IFD-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 32
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB47_30: # %entry
+; RV32IFD-NEXT:    mv a0, a4
+; RV32IFD-NEXT:    mv a3, a1
+; RV32IFD-NEXT:    bnez a5, .LBB47_27
+; RV32IFD-NEXT:  .LBB47_31: # %entry
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:    bnez a2, .LBB47_28
+; RV32IFD-NEXT:    j .LBB47_29
 entry:
   %conv = fptosi double %x to i128
   %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616)
@@ -3488,7 +3807,6 @@ define i64 @stest_f32i64_mm(float %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    call __fixsfti at plt
 ; RV32-NEXT:    lw a5, 8(sp)
@@ -3601,11 +3919,10 @@ define i64 @stest_f32i64_mm(float %x) {
 ;
 ; RV64-LABEL: stest_f32i64_mm:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    fmv.w.x ft0, a0
-; RV64-NEXT:    feq.s a0, ft0, ft0
+; RV64-NEXT:    feq.s a0, fa0, fa0
 ; RV64-NEXT:    beqz a0, .LBB48_2
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    fcvt.l.s a0, ft0, rtz
+; RV64-NEXT:    fcvt.l.s a0, fa0, rtz
 ; RV64-NEXT:  .LBB48_2: # %entry
 ; RV64-NEXT:    ret
 entry:
@@ -3623,7 +3940,6 @@ define i64 @utest_f32i64_mm(float %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    call __fixunssfti at plt
 ; RV32-NEXT:    lw a0, 20(sp)
@@ -3699,7 +4015,6 @@ define i64 @ustest_f32i64_mm(float %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    call __fixsfti at plt
 ; RV32-NEXT:    lw a0, 16(sp)
@@ -3857,8 +4172,8 @@ define i64 @stest_f16i64_mm(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
-; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    call __fixsfti at plt
 ; RV32-NEXT:    lw a5, 8(sp)
@@ -3975,6 +4290,7 @@ define i64 @stest_f16i64_mm(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
 ; RV64-NEXT:    call __fixsfti at plt
 ; RV64-NEXT:    li a2, -1
@@ -4035,8 +4351,8 @@ define i64 @utesth_f16i64_mm(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
-; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    call __fixunssfti at plt
 ; RV32-NEXT:    lw a0, 20(sp)
@@ -4083,6 +4399,7 @@ define i64 @utesth_f16i64_mm(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
 ; RV64-NEXT:    call __fixunssfti at plt
 ; RV64-NEXT:    mv a2, a0
@@ -4113,8 +4430,8 @@ define i64 @ustest_f16i64_mm(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    fmv.x.w a0, fa0
 ; RV32-NEXT:    call __extendhfsf2 at plt
-; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    call __fixsfti at plt
 ; RV32-NEXT:    lw a0, 16(sp)
@@ -4225,6 +4542,7 @@ define i64 @ustest_f16i64_mm(half %x) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    fmv.x.w a0, fa0
 ; RV64-NEXT:    call __extendhfsf2 at plt
 ; RV64-NEXT:    call __fixsfti at plt
 ; RV64-NEXT:    mv a2, a0

diff  --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
index 229e9e3106bbc..f47e77b821386 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
@@ -1,17 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+v -verify-machineinstrs < %s \
+; RUN:   -target-abi=lp64d | FileCheck %s
 
 ; i32 saturate
 
 define <2 x i32> @stest_f64i32(<2 x double> %x) {
 ; CHECK-LABEL: stest_f64i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a1
-; CHECK-NEXT:    fmv.d.x ft1, a0
-; CHECK-NEXT:    fcvt.l.d a1, ft0, rtz
+; CHECK-NEXT:    fcvt.l.d a1, fa1, rtz
 ; CHECK-NEXT:    lui a2, 524288
 ; CHECK-NEXT:    addiw a3, a2, -1
-; CHECK-NEXT:    fcvt.l.d a0, ft1, rtz
+; CHECK-NEXT:    fcvt.l.d a0, fa0, rtz
 ; CHECK-NEXT:    bge a1, a3, .LBB0_5
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    bge a0, a3, .LBB0_6
@@ -46,12 +45,10 @@ entry:
 define <2 x i32> @utest_f64i32(<2 x double> %x) {
 ; CHECK-LABEL: utest_f64i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    fmv.d.x ft1, a1
-; CHECK-NEXT:    fcvt.lu.d a0, ft0, rtz
+; CHECK-NEXT:    fcvt.lu.d a0, fa0, rtz
 ; CHECK-NEXT:    li a1, -1
 ; CHECK-NEXT:    srli a2, a1, 32
-; CHECK-NEXT:    fcvt.lu.d a1, ft1, rtz
+; CHECK-NEXT:    fcvt.lu.d a1, fa1, rtz
 ; CHECK-NEXT:    bgeu a0, a2, .LBB1_3
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    bgeu a1, a2, .LBB1_4
@@ -74,12 +71,10 @@ entry:
 define <2 x i32> @ustest_f64i32(<2 x double> %x) {
 ; CHECK-LABEL: ustest_f64i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a1
-; CHECK-NEXT:    fmv.d.x ft1, a0
-; CHECK-NEXT:    fcvt.l.d a1, ft0, rtz
+; CHECK-NEXT:    fcvt.l.d a1, fa1, rtz
 ; CHECK-NEXT:    li a0, -1
 ; CHECK-NEXT:    srli a2, a0, 32
-; CHECK-NEXT:    fcvt.l.d a0, ft1, rtz
+; CHECK-NEXT:    fcvt.l.d a0, fa0, rtz
 ; CHECK-NEXT:    bge a1, a2, .LBB2_5
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    bge a0, a2, .LBB2_6
@@ -114,61 +109,59 @@ entry:
 define <4 x i32> @stest_f32i32(<4 x float> %x) {
 ; CHECK-LABEL: stest_f32i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a4
-; CHECK-NEXT:    fmv.w.x ft2, a3
-; CHECK-NEXT:    fmv.w.x ft0, a2
-; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    lui a4, 524288
-; CHECK-NEXT:    addiw a6, a4, -1
-; CHECK-NEXT:    fcvt.l.s a3, ft2, rtz
-; CHECK-NEXT:    blt a2, a6, .LBB3_2
+; CHECK-NEXT:    fcvt.l.s a1, fa3, rtz
+; CHECK-NEXT:    lui a3, 524288
+; CHECK-NEXT:    addiw a6, a3, -1
+; CHECK-NEXT:    fcvt.l.s a2, fa2, rtz
+; CHECK-NEXT:    bge a1, a6, .LBB3_10
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a2, a6
+; CHECK-NEXT:    fcvt.l.s a4, fa1, rtz
+; CHECK-NEXT:    bge a2, a6, .LBB3_11
 ; CHECK-NEXT:  .LBB3_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a1
-; CHECK-NEXT:    fcvt.l.s a1, ft0, rtz
-; CHECK-NEXT:    bge a3, a6, .LBB3_11
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.l.s a5, ft1, rtz
-; CHECK-NEXT:    bge a1, a6, .LBB3_12
-; CHECK-NEXT:  .LBB3_4: # %entry
+; CHECK-NEXT:    fcvt.l.s a5, fa0, rtz
+; CHECK-NEXT:    bge a4, a6, .LBB3_12
+; CHECK-NEXT:  .LBB3_3: # %entry
 ; CHECK-NEXT:    bge a5, a6, .LBB3_13
+; CHECK-NEXT:  .LBB3_4: # %entry
+; CHECK-NEXT:    bge a3, a5, .LBB3_14
 ; CHECK-NEXT:  .LBB3_5: # %entry
-; CHECK-NEXT:    bge a4, a5, .LBB3_14
+; CHECK-NEXT:    bge a3, a4, .LBB3_15
 ; CHECK-NEXT:  .LBB3_6: # %entry
-; CHECK-NEXT:    bge a4, a1, .LBB3_15
+; CHECK-NEXT:    bge a3, a2, .LBB3_16
 ; CHECK-NEXT:  .LBB3_7: # %entry
-; CHECK-NEXT:    bge a4, a3, .LBB3_16
+; CHECK-NEXT:    blt a3, a1, .LBB3_9
 ; CHECK-NEXT:  .LBB3_8: # %entry
-; CHECK-NEXT:    blt a4, a2, .LBB3_10
+; CHECK-NEXT:    lui a1, 524288
 ; CHECK-NEXT:  .LBB3_9: # %entry
-; CHECK-NEXT:    lui a2, 524288
-; CHECK-NEXT:  .LBB3_10: # %entry
-; CHECK-NEXT:    sw a2, 12(a0)
-; CHECK-NEXT:    sw a3, 8(a0)
-; CHECK-NEXT:    sw a1, 4(a0)
+; CHECK-NEXT:    sw a1, 12(a0)
+; CHECK-NEXT:    sw a2, 8(a0)
+; CHECK-NEXT:    sw a4, 4(a0)
 ; CHECK-NEXT:    sw a5, 0(a0)
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB3_10: # %entry
+; CHECK-NEXT:    mv a1, a6
+; CHECK-NEXT:    fcvt.l.s a4, fa1, rtz
+; CHECK-NEXT:    blt a2, a6, .LBB3_2
 ; CHECK-NEXT:  .LBB3_11: # %entry
-; CHECK-NEXT:    mv a3, a6
-; CHECK-NEXT:    fcvt.l.s a5, ft1, rtz
-; CHECK-NEXT:    blt a1, a6, .LBB3_4
+; CHECK-NEXT:    mv a2, a6
+; CHECK-NEXT:    fcvt.l.s a5, fa0, rtz
+; CHECK-NEXT:    blt a4, a6, .LBB3_3
 ; CHECK-NEXT:  .LBB3_12: # %entry
-; CHECK-NEXT:    mv a1, a6
-; CHECK-NEXT:    blt a5, a6, .LBB3_5
+; CHECK-NEXT:    mv a4, a6
+; CHECK-NEXT:    blt a5, a6, .LBB3_4
 ; CHECK-NEXT:  .LBB3_13: # %entry
 ; CHECK-NEXT:    mv a5, a6
-; CHECK-NEXT:    blt a4, a5, .LBB3_6
+; CHECK-NEXT:    blt a3, a5, .LBB3_5
 ; CHECK-NEXT:  .LBB3_14: # %entry
 ; CHECK-NEXT:    lui a5, 524288
-; CHECK-NEXT:    blt a4, a1, .LBB3_7
+; CHECK-NEXT:    blt a3, a4, .LBB3_6
 ; CHECK-NEXT:  .LBB3_15: # %entry
-; CHECK-NEXT:    lui a1, 524288
-; CHECK-NEXT:    blt a4, a3, .LBB3_8
+; CHECK-NEXT:    lui a4, 524288
+; CHECK-NEXT:    blt a3, a2, .LBB3_7
 ; CHECK-NEXT:  .LBB3_16: # %entry
-; CHECK-NEXT:    lui a3, 524288
-; CHECK-NEXT:    bge a4, a2, .LBB3_9
-; CHECK-NEXT:    j .LBB3_10
+; CHECK-NEXT:    lui a2, 524288
+; CHECK-NEXT:    bge a3, a1, .LBB3_8
+; CHECK-NEXT:    j .LBB3_9
 entry:
   %conv = fptosi <4 x float> %x to <4 x i64>
   %0 = icmp slt <4 x i64> %conv, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
@@ -182,41 +175,39 @@ entry:
 define <4 x i32> @utest_f32i32(<4 x float> %x) {
 ; CHECK-LABEL: utest_f32i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a1
-; CHECK-NEXT:    fmv.w.x ft2, a2
-; CHECK-NEXT:    fmv.w.x ft0, a3
-; CHECK-NEXT:    fcvt.lu.s a1, ft1, rtz
+; CHECK-NEXT:    fcvt.lu.s a1, fa0, rtz
 ; CHECK-NEXT:    li a2, -1
 ; CHECK-NEXT:    srli a3, a2, 32
-; CHECK-NEXT:    fcvt.lu.s a2, ft2, rtz
-; CHECK-NEXT:    bltu a1, a3, .LBB4_2
+; CHECK-NEXT:    fcvt.lu.s a2, fa1, rtz
+; CHECK-NEXT:    bgeu a1, a3, .LBB4_6
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a1, a3
-; CHECK-NEXT:  .LBB4_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a4
-; CHECK-NEXT:    fcvt.lu.s a4, ft0, rtz
+; CHECK-NEXT:    fcvt.lu.s a4, fa2, rtz
 ; CHECK-NEXT:    bgeu a2, a3, .LBB4_7
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.lu.s a5, ft1, rtz
+; CHECK-NEXT:  .LBB4_2: # %entry
+; CHECK-NEXT:    fcvt.lu.s a5, fa3, rtz
 ; CHECK-NEXT:    bgeu a4, a3, .LBB4_8
+; CHECK-NEXT:  .LBB4_3: # %entry
+; CHECK-NEXT:    bltu a5, a3, .LBB4_5
 ; CHECK-NEXT:  .LBB4_4: # %entry
-; CHECK-NEXT:    bltu a5, a3, .LBB4_6
-; CHECK-NEXT:  .LBB4_5: # %entry
 ; CHECK-NEXT:    mv a5, a3
-; CHECK-NEXT:  .LBB4_6: # %entry
+; CHECK-NEXT:  .LBB4_5: # %entry
 ; CHECK-NEXT:    sw a5, 12(a0)
 ; CHECK-NEXT:    sw a4, 8(a0)
 ; CHECK-NEXT:    sw a2, 4(a0)
 ; CHECK-NEXT:    sw a1, 0(a0)
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB4_6: # %entry
+; CHECK-NEXT:    mv a1, a3
+; CHECK-NEXT:    fcvt.lu.s a4, fa2, rtz
+; CHECK-NEXT:    bltu a2, a3, .LBB4_2
 ; CHECK-NEXT:  .LBB4_7: # %entry
 ; CHECK-NEXT:    mv a2, a3
-; CHECK-NEXT:    fcvt.lu.s a5, ft1, rtz
-; CHECK-NEXT:    bltu a4, a3, .LBB4_4
+; CHECK-NEXT:    fcvt.lu.s a5, fa3, rtz
+; CHECK-NEXT:    bltu a4, a3, .LBB4_3
 ; CHECK-NEXT:  .LBB4_8: # %entry
 ; CHECK-NEXT:    mv a4, a3
-; CHECK-NEXT:    bgeu a5, a3, .LBB4_5
-; CHECK-NEXT:    j .LBB4_6
+; CHECK-NEXT:    bgeu a5, a3, .LBB4_4
+; CHECK-NEXT:    j .LBB4_5
 entry:
   %conv = fptoui <4 x float> %x to <4 x i64>
   %0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
@@ -228,61 +219,59 @@ entry:
 define <4 x i32> @ustest_f32i32(<4 x float> %x) {
 ; CHECK-LABEL: ustest_f32i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a4
-; CHECK-NEXT:    fmv.w.x ft2, a3
-; CHECK-NEXT:    fmv.w.x ft0, a2
-; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    li a3, -1
-; CHECK-NEXT:    srli a5, a3, 32
-; CHECK-NEXT:    fcvt.l.s a3, ft2, rtz
-; CHECK-NEXT:    blt a2, a5, .LBB5_2
+; CHECK-NEXT:    fcvt.l.s a1, fa3, rtz
+; CHECK-NEXT:    li a2, -1
+; CHECK-NEXT:    srli a5, a2, 32
+; CHECK-NEXT:    fcvt.l.s a2, fa2, rtz
+; CHECK-NEXT:    bge a1, a5, .LBB5_10
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a2, a5
+; CHECK-NEXT:    fcvt.l.s a3, fa1, rtz
+; CHECK-NEXT:    bge a2, a5, .LBB5_11
 ; CHECK-NEXT:  .LBB5_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a1
-; CHECK-NEXT:    fcvt.l.s a1, ft0, rtz
-; CHECK-NEXT:    bge a3, a5, .LBB5_11
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.l.s a4, ft1, rtz
-; CHECK-NEXT:    bge a1, a5, .LBB5_12
-; CHECK-NEXT:  .LBB5_4: # %entry
+; CHECK-NEXT:    fcvt.l.s a4, fa0, rtz
+; CHECK-NEXT:    bge a3, a5, .LBB5_12
+; CHECK-NEXT:  .LBB5_3: # %entry
 ; CHECK-NEXT:    bge a4, a5, .LBB5_13
-; CHECK-NEXT:  .LBB5_5: # %entry
+; CHECK-NEXT:  .LBB5_4: # %entry
 ; CHECK-NEXT:    blez a4, .LBB5_14
+; CHECK-NEXT:  .LBB5_5: # %entry
+; CHECK-NEXT:    blez a3, .LBB5_15
 ; CHECK-NEXT:  .LBB5_6: # %entry
-; CHECK-NEXT:    blez a1, .LBB5_15
+; CHECK-NEXT:    blez a2, .LBB5_16
 ; CHECK-NEXT:  .LBB5_7: # %entry
-; CHECK-NEXT:    blez a3, .LBB5_16
+; CHECK-NEXT:    bgtz a1, .LBB5_9
 ; CHECK-NEXT:  .LBB5_8: # %entry
-; CHECK-NEXT:    bgtz a2, .LBB5_10
+; CHECK-NEXT:    li a1, 0
 ; CHECK-NEXT:  .LBB5_9: # %entry
-; CHECK-NEXT:    li a2, 0
-; CHECK-NEXT:  .LBB5_10: # %entry
-; CHECK-NEXT:    sw a2, 12(a0)
-; CHECK-NEXT:    sw a3, 8(a0)
-; CHECK-NEXT:    sw a1, 4(a0)
+; CHECK-NEXT:    sw a1, 12(a0)
+; CHECK-NEXT:    sw a2, 8(a0)
+; CHECK-NEXT:    sw a3, 4(a0)
 ; CHECK-NEXT:    sw a4, 0(a0)
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB5_10: # %entry
+; CHECK-NEXT:    mv a1, a5
+; CHECK-NEXT:    fcvt.l.s a3, fa1, rtz
+; CHECK-NEXT:    blt a2, a5, .LBB5_2
 ; CHECK-NEXT:  .LBB5_11: # %entry
-; CHECK-NEXT:    mv a3, a5
-; CHECK-NEXT:    fcvt.l.s a4, ft1, rtz
-; CHECK-NEXT:    blt a1, a5, .LBB5_4
+; CHECK-NEXT:    mv a2, a5
+; CHECK-NEXT:    fcvt.l.s a4, fa0, rtz
+; CHECK-NEXT:    blt a3, a5, .LBB5_3
 ; CHECK-NEXT:  .LBB5_12: # %entry
-; CHECK-NEXT:    mv a1, a5
-; CHECK-NEXT:    blt a4, a5, .LBB5_5
+; CHECK-NEXT:    mv a3, a5
+; CHECK-NEXT:    blt a4, a5, .LBB5_4
 ; CHECK-NEXT:  .LBB5_13: # %entry
 ; CHECK-NEXT:    mv a4, a5
-; CHECK-NEXT:    bgtz a4, .LBB5_6
+; CHECK-NEXT:    bgtz a4, .LBB5_5
 ; CHECK-NEXT:  .LBB5_14: # %entry
 ; CHECK-NEXT:    li a4, 0
-; CHECK-NEXT:    bgtz a1, .LBB5_7
+; CHECK-NEXT:    bgtz a3, .LBB5_6
 ; CHECK-NEXT:  .LBB5_15: # %entry
-; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    bgtz a3, .LBB5_8
-; CHECK-NEXT:  .LBB5_16: # %entry
 ; CHECK-NEXT:    li a3, 0
-; CHECK-NEXT:    blez a2, .LBB5_9
-; CHECK-NEXT:    j .LBB5_10
+; CHECK-NEXT:    bgtz a2, .LBB5_7
+; CHECK-NEXT:  .LBB5_16: # %entry
+; CHECK-NEXT:    li a2, 0
+; CHECK-NEXT:    blez a1, .LBB5_8
+; CHECK-NEXT:    j .LBB5_9
 entry:
   %conv = fptosi <4 x float> %x to <4 x i64>
   %0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
@@ -303,63 +292,59 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s2, 32(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s3, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs2, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
 ; CHECK-NEXT:    .cfi_offset s3, -40
-; CHECK-NEXT:    .cfi_offset s4, -48
-; CHECK-NEXT:    lhu s2, 24(a1)
-; CHECK-NEXT:    lhu s1, 0(a1)
+; CHECK-NEXT:    .cfi_offset fs0, -48
+; CHECK-NEXT:    .cfi_offset fs1, -56
+; CHECK-NEXT:    .cfi_offset fs2, -64
+; CHECK-NEXT:    lhu s1, 24(a1)
+; CHECK-NEXT:    lhu s2, 0(a1)
 ; CHECK-NEXT:    lhu s3, 8(a1)
 ; CHECK-NEXT:    lhu a1, 16(a1)
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s4, a0
+; CHECK-NEXT:    fmv.s fs2, fa0
 ; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s3, a0
-; CHECK-NEXT:    mv a0, s1
-; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s1, a0
-; CHECK-NEXT:    fmv.w.x ft0, s3
-; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s4
-; CHECK-NEXT:    fcvt.l.s s3, ft0, rtz
+; CHECK-NEXT:    fmv.s fs1, fa0
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    fcvt.l.s a0, ft0, rtz
+; CHECK-NEXT:    fmv.s fs0, fa0
+; CHECK-NEXT:    fcvt.l.s s2, fs2, rtz
+; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    call __extendhfsf2 at plt
+; CHECK-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-NEXT:    lui a1, 524288
 ; CHECK-NEXT:    addiw a4, a1, -1
-; CHECK-NEXT:    blt a0, a4, .LBB6_2
+; CHECK-NEXT:    bge a0, a4, .LBB6_10
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a0, a4
+; CHECK-NEXT:    fcvt.l.s a2, fs1, rtz
+; CHECK-NEXT:    bge s2, a4, .LBB6_11
 ; CHECK-NEXT:  .LBB6_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s1
-; CHECK-NEXT:    flw ft1, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    bge s3, a4, .LBB6_11
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.l.s a3, ft0, rtz
+; CHECK-NEXT:    fcvt.l.s a3, fs0, rtz
 ; CHECK-NEXT:    bge a2, a4, .LBB6_12
-; CHECK-NEXT:  .LBB6_4: # %entry
+; CHECK-NEXT:  .LBB6_3: # %entry
 ; CHECK-NEXT:    bge a3, a4, .LBB6_13
-; CHECK-NEXT:  .LBB6_5: # %entry
+; CHECK-NEXT:  .LBB6_4: # %entry
 ; CHECK-NEXT:    bge a1, a3, .LBB6_14
-; CHECK-NEXT:  .LBB6_6: # %entry
+; CHECK-NEXT:  .LBB6_5: # %entry
 ; CHECK-NEXT:    bge a1, a2, .LBB6_15
+; CHECK-NEXT:  .LBB6_6: # %entry
+; CHECK-NEXT:    bge a1, s2, .LBB6_16
 ; CHECK-NEXT:  .LBB6_7: # %entry
-; CHECK-NEXT:    bge a1, s3, .LBB6_16
+; CHECK-NEXT:    blt a1, a0, .LBB6_9
 ; CHECK-NEXT:  .LBB6_8: # %entry
-; CHECK-NEXT:    blt a1, a0, .LBB6_10
-; CHECK-NEXT:  .LBB6_9: # %entry
 ; CHECK-NEXT:    lui a0, 524288
-; CHECK-NEXT:  .LBB6_10: # %entry
+; CHECK-NEXT:  .LBB6_9: # %entry
 ; CHECK-NEXT:    sw a0, 12(s0)
-; CHECK-NEXT:    sw s3, 8(s0)
+; CHECK-NEXT:    sw s2, 8(s0)
 ; CHECK-NEXT:    sw a2, 4(s0)
 ; CHECK-NEXT:    sw a3, 0(s0)
 ; CHECK-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
@@ -367,29 +352,35 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs2, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 64
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB6_10: # %entry
+; CHECK-NEXT:    mv a0, a4
+; CHECK-NEXT:    fcvt.l.s a2, fs1, rtz
+; CHECK-NEXT:    blt s2, a4, .LBB6_2
 ; CHECK-NEXT:  .LBB6_11: # %entry
-; CHECK-NEXT:    mv s3, a4
-; CHECK-NEXT:    fcvt.l.s a3, ft0, rtz
-; CHECK-NEXT:    blt a2, a4, .LBB6_4
+; CHECK-NEXT:    mv s2, a4
+; CHECK-NEXT:    fcvt.l.s a3, fs0, rtz
+; CHECK-NEXT:    blt a2, a4, .LBB6_3
 ; CHECK-NEXT:  .LBB6_12: # %entry
 ; CHECK-NEXT:    mv a2, a4
-; CHECK-NEXT:    blt a3, a4, .LBB6_5
+; CHECK-NEXT:    blt a3, a4, .LBB6_4
 ; CHECK-NEXT:  .LBB6_13: # %entry
 ; CHECK-NEXT:    mv a3, a4
-; CHECK-NEXT:    blt a1, a3, .LBB6_6
+; CHECK-NEXT:    blt a1, a3, .LBB6_5
 ; CHECK-NEXT:  .LBB6_14: # %entry
 ; CHECK-NEXT:    lui a3, 524288
-; CHECK-NEXT:    blt a1, a2, .LBB6_7
+; CHECK-NEXT:    blt a1, a2, .LBB6_6
 ; CHECK-NEXT:  .LBB6_15: # %entry
 ; CHECK-NEXT:    lui a2, 524288
-; CHECK-NEXT:    blt a1, s3, .LBB6_8
+; CHECK-NEXT:    blt a1, s2, .LBB6_7
 ; CHECK-NEXT:  .LBB6_16: # %entry
-; CHECK-NEXT:    lui s3, 524288
-; CHECK-NEXT:    bge a1, a0, .LBB6_9
-; CHECK-NEXT:    j .LBB6_10
+; CHECK-NEXT:    lui s2, 524288
+; CHECK-NEXT:    bge a1, a0, .LBB6_8
+; CHECK-NEXT:    j .LBB6_9
 entry:
   %conv = fptosi <4 x half> %x to <4 x i64>
   %0 = icmp slt <4 x i64> %conv, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
@@ -410,73 +401,75 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s2, 32(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s3, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs2, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
 ; CHECK-NEXT:    .cfi_offset s3, -40
-; CHECK-NEXT:    .cfi_offset s4, -48
-; CHECK-NEXT:    lhu s2, 0(a1)
-; CHECK-NEXT:    lhu s1, 24(a1)
+; CHECK-NEXT:    .cfi_offset fs0, -48
+; CHECK-NEXT:    .cfi_offset fs1, -56
+; CHECK-NEXT:    .cfi_offset fs2, -64
+; CHECK-NEXT:    lhu s1, 0(a1)
+; CHECK-NEXT:    lhu s2, 24(a1)
 ; CHECK-NEXT:    lhu s3, 16(a1)
 ; CHECK-NEXT:    lhu a1, 8(a1)
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s4, a0
+; CHECK-NEXT:    fmv.s fs2, fa0
 ; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s3, a0
-; CHECK-NEXT:    mv a0, s1
-; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s1, a0
-; CHECK-NEXT:    fmv.w.x ft0, s3
-; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s4
-; CHECK-NEXT:    fcvt.lu.s s3, ft0, rtz
+; CHECK-NEXT:    fmv.s fs1, fa0
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    fcvt.lu.s a0, ft0, rtz
+; CHECK-NEXT:    fmv.s fs0, fa0
+; CHECK-NEXT:    fcvt.lu.s s2, fs2, rtz
+; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    call __extendhfsf2 at plt
+; CHECK-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; CHECK-NEXT:    li a1, -1
 ; CHECK-NEXT:    srli a1, a1, 32
-; CHECK-NEXT:    bltu a0, a1, .LBB7_2
+; CHECK-NEXT:    bgeu a0, a1, .LBB7_6
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    fcvt.lu.s a2, fs1, rtz
+; CHECK-NEXT:    bgeu s2, a1, .LBB7_7
 ; CHECK-NEXT:  .LBB7_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s1
-; CHECK-NEXT:    flw ft1, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    fcvt.lu.s a2, ft1, rtz
-; CHECK-NEXT:    bgeu s3, a1, .LBB7_7
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.lu.s a3, ft0, rtz
+; CHECK-NEXT:    fcvt.lu.s a3, fs0, rtz
 ; CHECK-NEXT:    bgeu a2, a1, .LBB7_8
+; CHECK-NEXT:  .LBB7_3: # %entry
+; CHECK-NEXT:    bltu a3, a1, .LBB7_5
 ; CHECK-NEXT:  .LBB7_4: # %entry
-; CHECK-NEXT:    bltu a3, a1, .LBB7_6
-; CHECK-NEXT:  .LBB7_5: # %entry
 ; CHECK-NEXT:    mv a3, a1
-; CHECK-NEXT:  .LBB7_6: # %entry
+; CHECK-NEXT:  .LBB7_5: # %entry
 ; CHECK-NEXT:    sw a3, 12(s0)
 ; CHECK-NEXT:    sw a2, 8(s0)
-; CHECK-NEXT:    sw s3, 4(s0)
+; CHECK-NEXT:    sw s2, 4(s0)
 ; CHECK-NEXT:    sw a0, 0(s0)
 ; CHECK-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs2, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 64
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB7_6: # %entry
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    fcvt.lu.s a2, fs1, rtz
+; CHECK-NEXT:    bltu s2, a1, .LBB7_2
 ; CHECK-NEXT:  .LBB7_7: # %entry
-; CHECK-NEXT:    mv s3, a1
-; CHECK-NEXT:    fcvt.lu.s a3, ft0, rtz
-; CHECK-NEXT:    bltu a2, a1, .LBB7_4
+; CHECK-NEXT:    mv s2, a1
+; CHECK-NEXT:    fcvt.lu.s a3, fs0, rtz
+; CHECK-NEXT:    bltu a2, a1, .LBB7_3
 ; CHECK-NEXT:  .LBB7_8: # %entry
 ; CHECK-NEXT:    mv a2, a1
-; CHECK-NEXT:    bgeu a3, a1, .LBB7_5
-; CHECK-NEXT:    j .LBB7_6
+; CHECK-NEXT:    bgeu a3, a1, .LBB7_4
+; CHECK-NEXT:    j .LBB7_5
 entry:
   %conv = fptoui <4 x half> %x to <4 x i64>
   %0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
@@ -495,63 +488,59 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s2, 32(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s3, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs2, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
 ; CHECK-NEXT:    .cfi_offset s3, -40
-; CHECK-NEXT:    .cfi_offset s4, -48
-; CHECK-NEXT:    lhu s2, 24(a1)
-; CHECK-NEXT:    lhu s1, 0(a1)
+; CHECK-NEXT:    .cfi_offset fs0, -48
+; CHECK-NEXT:    .cfi_offset fs1, -56
+; CHECK-NEXT:    .cfi_offset fs2, -64
+; CHECK-NEXT:    lhu s1, 24(a1)
+; CHECK-NEXT:    lhu s2, 0(a1)
 ; CHECK-NEXT:    lhu s3, 8(a1)
 ; CHECK-NEXT:    lhu a1, 16(a1)
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s4, a0
+; CHECK-NEXT:    fmv.s fs2, fa0
 ; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s3, a0
-; CHECK-NEXT:    mv a0, s1
-; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s1, a0
-; CHECK-NEXT:    fmv.w.x ft0, s3
-; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s4
-; CHECK-NEXT:    fcvt.l.s s3, ft0, rtz
+; CHECK-NEXT:    fmv.s fs1, fa0
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    fcvt.l.s a0, ft0, rtz
+; CHECK-NEXT:    fmv.s fs0, fa0
+; CHECK-NEXT:    fcvt.l.s s2, fs2, rtz
+; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    call __extendhfsf2 at plt
+; CHECK-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-NEXT:    li a1, -1
 ; CHECK-NEXT:    srli a3, a1, 32
-; CHECK-NEXT:    blt a0, a3, .LBB8_2
+; CHECK-NEXT:    bge a0, a3, .LBB8_10
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a0, a3
+; CHECK-NEXT:    fcvt.l.s a1, fs1, rtz
+; CHECK-NEXT:    bge s2, a3, .LBB8_11
 ; CHECK-NEXT:  .LBB8_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s1
-; CHECK-NEXT:    flw ft1, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    fcvt.l.s a1, ft1, rtz
-; CHECK-NEXT:    bge s3, a3, .LBB8_11
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.l.s a2, ft0, rtz
+; CHECK-NEXT:    fcvt.l.s a2, fs0, rtz
 ; CHECK-NEXT:    bge a1, a3, .LBB8_12
-; CHECK-NEXT:  .LBB8_4: # %entry
+; CHECK-NEXT:  .LBB8_3: # %entry
 ; CHECK-NEXT:    bge a2, a3, .LBB8_13
-; CHECK-NEXT:  .LBB8_5: # %entry
+; CHECK-NEXT:  .LBB8_4: # %entry
 ; CHECK-NEXT:    blez a2, .LBB8_14
-; CHECK-NEXT:  .LBB8_6: # %entry
+; CHECK-NEXT:  .LBB8_5: # %entry
 ; CHECK-NEXT:    blez a1, .LBB8_15
+; CHECK-NEXT:  .LBB8_6: # %entry
+; CHECK-NEXT:    blez s2, .LBB8_16
 ; CHECK-NEXT:  .LBB8_7: # %entry
-; CHECK-NEXT:    blez s3, .LBB8_16
+; CHECK-NEXT:    bgtz a0, .LBB8_9
 ; CHECK-NEXT:  .LBB8_8: # %entry
-; CHECK-NEXT:    bgtz a0, .LBB8_10
-; CHECK-NEXT:  .LBB8_9: # %entry
 ; CHECK-NEXT:    li a0, 0
-; CHECK-NEXT:  .LBB8_10: # %entry
+; CHECK-NEXT:  .LBB8_9: # %entry
 ; CHECK-NEXT:    sw a0, 12(s0)
-; CHECK-NEXT:    sw s3, 8(s0)
+; CHECK-NEXT:    sw s2, 8(s0)
 ; CHECK-NEXT:    sw a1, 4(s0)
 ; CHECK-NEXT:    sw a2, 0(s0)
 ; CHECK-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
@@ -559,29 +548,35 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs2, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 64
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB8_10: # %entry
+; CHECK-NEXT:    mv a0, a3
+; CHECK-NEXT:    fcvt.l.s a1, fs1, rtz
+; CHECK-NEXT:    blt s2, a3, .LBB8_2
 ; CHECK-NEXT:  .LBB8_11: # %entry
-; CHECK-NEXT:    mv s3, a3
-; CHECK-NEXT:    fcvt.l.s a2, ft0, rtz
-; CHECK-NEXT:    blt a1, a3, .LBB8_4
+; CHECK-NEXT:    mv s2, a3
+; CHECK-NEXT:    fcvt.l.s a2, fs0, rtz
+; CHECK-NEXT:    blt a1, a3, .LBB8_3
 ; CHECK-NEXT:  .LBB8_12: # %entry
 ; CHECK-NEXT:    mv a1, a3
-; CHECK-NEXT:    blt a2, a3, .LBB8_5
+; CHECK-NEXT:    blt a2, a3, .LBB8_4
 ; CHECK-NEXT:  .LBB8_13: # %entry
 ; CHECK-NEXT:    mv a2, a3
-; CHECK-NEXT:    bgtz a2, .LBB8_6
+; CHECK-NEXT:    bgtz a2, .LBB8_5
 ; CHECK-NEXT:  .LBB8_14: # %entry
 ; CHECK-NEXT:    li a2, 0
-; CHECK-NEXT:    bgtz a1, .LBB8_7
+; CHECK-NEXT:    bgtz a1, .LBB8_6
 ; CHECK-NEXT:  .LBB8_15: # %entry
 ; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    bgtz s3, .LBB8_8
+; CHECK-NEXT:    bgtz s2, .LBB8_7
 ; CHECK-NEXT:  .LBB8_16: # %entry
-; CHECK-NEXT:    li s3, 0
-; CHECK-NEXT:    blez a0, .LBB8_9
-; CHECK-NEXT:    j .LBB8_10
+; CHECK-NEXT:    li s2, 0
+; CHECK-NEXT:    blez a0, .LBB8_8
+; CHECK-NEXT:    j .LBB8_9
 entry:
   %conv = fptosi <4 x half> %x to <4 x i64>
   %0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
@@ -597,12 +592,10 @@ entry:
 define <2 x i16> @stest_f64i16(<2 x double> %x) {
 ; CHECK-LABEL: stest_f64i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a1
-; CHECK-NEXT:    fmv.d.x ft1, a0
-; CHECK-NEXT:    fcvt.w.d a1, ft0, rtz
+; CHECK-NEXT:    fcvt.w.d a1, fa1, rtz
 ; CHECK-NEXT:    lui a0, 8
 ; CHECK-NEXT:    addiw a2, a0, -1
-; CHECK-NEXT:    fcvt.w.d a0, ft1, rtz
+; CHECK-NEXT:    fcvt.w.d a0, fa0, rtz
 ; CHECK-NEXT:    bge a1, a2, .LBB9_5
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    bge a0, a2, .LBB9_6
@@ -639,12 +632,10 @@ entry:
 define <2 x i16> @utest_f64i16(<2 x double> %x) {
 ; CHECK-LABEL: utest_f64i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    fmv.d.x ft1, a1
-; CHECK-NEXT:    fcvt.wu.d a0, ft0, rtz
+; CHECK-NEXT:    fcvt.wu.d a0, fa0, rtz
 ; CHECK-NEXT:    lui a1, 16
 ; CHECK-NEXT:    addiw a2, a1, -1
-; CHECK-NEXT:    fcvt.wu.d a1, ft1, rtz
+; CHECK-NEXT:    fcvt.wu.d a1, fa1, rtz
 ; CHECK-NEXT:    bgeu a0, a2, .LBB10_3
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    bgeu a1, a2, .LBB10_4
@@ -667,12 +658,10 @@ entry:
 define <2 x i16> @ustest_f64i16(<2 x double> %x) {
 ; CHECK-LABEL: ustest_f64i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a1
-; CHECK-NEXT:    fmv.d.x ft1, a0
-; CHECK-NEXT:    fcvt.w.d a1, ft0, rtz
+; CHECK-NEXT:    fcvt.w.d a1, fa1, rtz
 ; CHECK-NEXT:    lui a0, 16
 ; CHECK-NEXT:    addiw a2, a0, -1
-; CHECK-NEXT:    fcvt.w.d a0, ft1, rtz
+; CHECK-NEXT:    fcvt.w.d a0, fa0, rtz
 ; CHECK-NEXT:    bge a1, a2, .LBB11_5
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    bge a0, a2, .LBB11_6
@@ -707,63 +696,61 @@ entry:
 define <4 x i16> @stest_f32i16(<4 x float> %x) {
 ; CHECK-LABEL: stest_f32i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a4
-; CHECK-NEXT:    fmv.w.x ft2, a3
-; CHECK-NEXT:    fmv.w.x ft0, a2
-; CHECK-NEXT:    fcvt.w.s a2, ft1, rtz
-; CHECK-NEXT:    lui a3, 8
-; CHECK-NEXT:    addiw a5, a3, -1
-; CHECK-NEXT:    fcvt.w.s a3, ft2, rtz
-; CHECK-NEXT:    blt a2, a5, .LBB12_2
+; CHECK-NEXT:    fcvt.w.s a1, fa3, rtz
+; CHECK-NEXT:    lui a2, 8
+; CHECK-NEXT:    addiw a5, a2, -1
+; CHECK-NEXT:    fcvt.w.s a2, fa2, rtz
+; CHECK-NEXT:    bge a1, a5, .LBB12_10
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a2, a5
+; CHECK-NEXT:    fcvt.w.s a3, fa1, rtz
+; CHECK-NEXT:    bge a2, a5, .LBB12_11
 ; CHECK-NEXT:  .LBB12_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a1
-; CHECK-NEXT:    fcvt.w.s a1, ft0, rtz
-; CHECK-NEXT:    bge a3, a5, .LBB12_11
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.w.s a4, ft1, rtz
-; CHECK-NEXT:    bge a1, a5, .LBB12_12
-; CHECK-NEXT:  .LBB12_4: # %entry
+; CHECK-NEXT:    fcvt.w.s a4, fa0, rtz
+; CHECK-NEXT:    bge a3, a5, .LBB12_12
+; CHECK-NEXT:  .LBB12_3: # %entry
 ; CHECK-NEXT:    bge a4, a5, .LBB12_13
-; CHECK-NEXT:  .LBB12_5: # %entry
+; CHECK-NEXT:  .LBB12_4: # %entry
 ; CHECK-NEXT:    lui a5, 1048568
 ; CHECK-NEXT:    bge a5, a4, .LBB12_14
+; CHECK-NEXT:  .LBB12_5: # %entry
+; CHECK-NEXT:    bge a5, a3, .LBB12_15
 ; CHECK-NEXT:  .LBB12_6: # %entry
-; CHECK-NEXT:    bge a5, a1, .LBB12_15
+; CHECK-NEXT:    bge a5, a2, .LBB12_16
 ; CHECK-NEXT:  .LBB12_7: # %entry
-; CHECK-NEXT:    bge a5, a3, .LBB12_16
+; CHECK-NEXT:    blt a5, a1, .LBB12_9
 ; CHECK-NEXT:  .LBB12_8: # %entry
-; CHECK-NEXT:    blt a5, a2, .LBB12_10
+; CHECK-NEXT:    lui a1, 1048568
 ; CHECK-NEXT:  .LBB12_9: # %entry
-; CHECK-NEXT:    lui a2, 1048568
-; CHECK-NEXT:  .LBB12_10: # %entry
-; CHECK-NEXT:    sh a2, 6(a0)
-; CHECK-NEXT:    sh a3, 4(a0)
-; CHECK-NEXT:    sh a1, 2(a0)
+; CHECK-NEXT:    sh a1, 6(a0)
+; CHECK-NEXT:    sh a2, 4(a0)
+; CHECK-NEXT:    sh a3, 2(a0)
 ; CHECK-NEXT:    sh a4, 0(a0)
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB12_10: # %entry
+; CHECK-NEXT:    mv a1, a5
+; CHECK-NEXT:    fcvt.w.s a3, fa1, rtz
+; CHECK-NEXT:    blt a2, a5, .LBB12_2
 ; CHECK-NEXT:  .LBB12_11: # %entry
-; CHECK-NEXT:    mv a3, a5
-; CHECK-NEXT:    fcvt.w.s a4, ft1, rtz
-; CHECK-NEXT:    blt a1, a5, .LBB12_4
+; CHECK-NEXT:    mv a2, a5
+; CHECK-NEXT:    fcvt.w.s a4, fa0, rtz
+; CHECK-NEXT:    blt a3, a5, .LBB12_3
 ; CHECK-NEXT:  .LBB12_12: # %entry
-; CHECK-NEXT:    mv a1, a5
-; CHECK-NEXT:    blt a4, a5, .LBB12_5
+; CHECK-NEXT:    mv a3, a5
+; CHECK-NEXT:    blt a4, a5, .LBB12_4
 ; CHECK-NEXT:  .LBB12_13: # %entry
 ; CHECK-NEXT:    mv a4, a5
 ; CHECK-NEXT:    lui a5, 1048568
-; CHECK-NEXT:    blt a5, a4, .LBB12_6
+; CHECK-NEXT:    blt a5, a4, .LBB12_5
 ; CHECK-NEXT:  .LBB12_14: # %entry
 ; CHECK-NEXT:    lui a4, 1048568
-; CHECK-NEXT:    blt a5, a1, .LBB12_7
+; CHECK-NEXT:    blt a5, a3, .LBB12_6
 ; CHECK-NEXT:  .LBB12_15: # %entry
-; CHECK-NEXT:    lui a1, 1048568
-; CHECK-NEXT:    blt a5, a3, .LBB12_8
-; CHECK-NEXT:  .LBB12_16: # %entry
 ; CHECK-NEXT:    lui a3, 1048568
-; CHECK-NEXT:    bge a5, a2, .LBB12_9
-; CHECK-NEXT:    j .LBB12_10
+; CHECK-NEXT:    blt a5, a2, .LBB12_7
+; CHECK-NEXT:  .LBB12_16: # %entry
+; CHECK-NEXT:    lui a2, 1048568
+; CHECK-NEXT:    bge a5, a1, .LBB12_8
+; CHECK-NEXT:    j .LBB12_9
 entry:
   %conv = fptosi <4 x float> %x to <4 x i32>
   %0 = icmp slt <4 x i32> %conv, <i32 32767, i32 32767, i32 32767, i32 32767>
@@ -777,41 +764,39 @@ entry:
 define <4 x i16> @utest_f32i16(<4 x float> %x) {
 ; CHECK-LABEL: utest_f32i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a1
-; CHECK-NEXT:    fmv.w.x ft2, a2
-; CHECK-NEXT:    fmv.w.x ft0, a3
-; CHECK-NEXT:    fcvt.wu.s a1, ft1, rtz
+; CHECK-NEXT:    fcvt.wu.s a1, fa0, rtz
 ; CHECK-NEXT:    lui a2, 16
 ; CHECK-NEXT:    addiw a3, a2, -1
-; CHECK-NEXT:    fcvt.wu.s a2, ft2, rtz
-; CHECK-NEXT:    bltu a1, a3, .LBB13_2
+; CHECK-NEXT:    fcvt.wu.s a2, fa1, rtz
+; CHECK-NEXT:    bgeu a1, a3, .LBB13_6
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a1, a3
-; CHECK-NEXT:  .LBB13_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a4
-; CHECK-NEXT:    fcvt.wu.s a4, ft0, rtz
+; CHECK-NEXT:    fcvt.wu.s a4, fa2, rtz
 ; CHECK-NEXT:    bgeu a2, a3, .LBB13_7
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.wu.s a5, ft1, rtz
+; CHECK-NEXT:  .LBB13_2: # %entry
+; CHECK-NEXT:    fcvt.wu.s a5, fa3, rtz
 ; CHECK-NEXT:    bgeu a4, a3, .LBB13_8
+; CHECK-NEXT:  .LBB13_3: # %entry
+; CHECK-NEXT:    bltu a5, a3, .LBB13_5
 ; CHECK-NEXT:  .LBB13_4: # %entry
-; CHECK-NEXT:    bltu a5, a3, .LBB13_6
-; CHECK-NEXT:  .LBB13_5: # %entry
 ; CHECK-NEXT:    mv a5, a3
-; CHECK-NEXT:  .LBB13_6: # %entry
+; CHECK-NEXT:  .LBB13_5: # %entry
 ; CHECK-NEXT:    sh a5, 6(a0)
 ; CHECK-NEXT:    sh a4, 4(a0)
 ; CHECK-NEXT:    sh a2, 2(a0)
 ; CHECK-NEXT:    sh a1, 0(a0)
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB13_6: # %entry
+; CHECK-NEXT:    mv a1, a3
+; CHECK-NEXT:    fcvt.wu.s a4, fa2, rtz
+; CHECK-NEXT:    bltu a2, a3, .LBB13_2
 ; CHECK-NEXT:  .LBB13_7: # %entry
 ; CHECK-NEXT:    mv a2, a3
-; CHECK-NEXT:    fcvt.wu.s a5, ft1, rtz
-; CHECK-NEXT:    bltu a4, a3, .LBB13_4
+; CHECK-NEXT:    fcvt.wu.s a5, fa3, rtz
+; CHECK-NEXT:    bltu a4, a3, .LBB13_3
 ; CHECK-NEXT:  .LBB13_8: # %entry
 ; CHECK-NEXT:    mv a4, a3
-; CHECK-NEXT:    bgeu a5, a3, .LBB13_5
-; CHECK-NEXT:    j .LBB13_6
+; CHECK-NEXT:    bgeu a5, a3, .LBB13_4
+; CHECK-NEXT:    j .LBB13_5
 entry:
   %conv = fptoui <4 x float> %x to <4 x i32>
   %0 = icmp ult <4 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535>
@@ -823,61 +808,59 @@ entry:
 define <4 x i16> @ustest_f32i16(<4 x float> %x) {
 ; CHECK-LABEL: ustest_f32i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a4
-; CHECK-NEXT:    fmv.w.x ft2, a3
-; CHECK-NEXT:    fmv.w.x ft0, a2
-; CHECK-NEXT:    fcvt.w.s a2, ft1, rtz
-; CHECK-NEXT:    lui a3, 16
-; CHECK-NEXT:    addiw a5, a3, -1
-; CHECK-NEXT:    fcvt.w.s a3, ft2, rtz
-; CHECK-NEXT:    blt a2, a5, .LBB14_2
+; CHECK-NEXT:    fcvt.w.s a1, fa3, rtz
+; CHECK-NEXT:    lui a2, 16
+; CHECK-NEXT:    addiw a5, a2, -1
+; CHECK-NEXT:    fcvt.w.s a2, fa2, rtz
+; CHECK-NEXT:    bge a1, a5, .LBB14_10
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a2, a5
+; CHECK-NEXT:    fcvt.w.s a3, fa1, rtz
+; CHECK-NEXT:    bge a2, a5, .LBB14_11
 ; CHECK-NEXT:  .LBB14_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a1
-; CHECK-NEXT:    fcvt.w.s a1, ft0, rtz
-; CHECK-NEXT:    bge a3, a5, .LBB14_11
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.w.s a4, ft1, rtz
-; CHECK-NEXT:    bge a1, a5, .LBB14_12
-; CHECK-NEXT:  .LBB14_4: # %entry
+; CHECK-NEXT:    fcvt.w.s a4, fa0, rtz
+; CHECK-NEXT:    bge a3, a5, .LBB14_12
+; CHECK-NEXT:  .LBB14_3: # %entry
 ; CHECK-NEXT:    bge a4, a5, .LBB14_13
-; CHECK-NEXT:  .LBB14_5: # %entry
+; CHECK-NEXT:  .LBB14_4: # %entry
 ; CHECK-NEXT:    blez a4, .LBB14_14
+; CHECK-NEXT:  .LBB14_5: # %entry
+; CHECK-NEXT:    blez a3, .LBB14_15
 ; CHECK-NEXT:  .LBB14_6: # %entry
-; CHECK-NEXT:    blez a1, .LBB14_15
+; CHECK-NEXT:    blez a2, .LBB14_16
 ; CHECK-NEXT:  .LBB14_7: # %entry
-; CHECK-NEXT:    blez a3, .LBB14_16
+; CHECK-NEXT:    bgtz a1, .LBB14_9
 ; CHECK-NEXT:  .LBB14_8: # %entry
-; CHECK-NEXT:    bgtz a2, .LBB14_10
+; CHECK-NEXT:    li a1, 0
 ; CHECK-NEXT:  .LBB14_9: # %entry
-; CHECK-NEXT:    li a2, 0
-; CHECK-NEXT:  .LBB14_10: # %entry
-; CHECK-NEXT:    sh a2, 6(a0)
-; CHECK-NEXT:    sh a3, 4(a0)
-; CHECK-NEXT:    sh a1, 2(a0)
+; CHECK-NEXT:    sh a1, 6(a0)
+; CHECK-NEXT:    sh a2, 4(a0)
+; CHECK-NEXT:    sh a3, 2(a0)
 ; CHECK-NEXT:    sh a4, 0(a0)
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB14_10: # %entry
+; CHECK-NEXT:    mv a1, a5
+; CHECK-NEXT:    fcvt.w.s a3, fa1, rtz
+; CHECK-NEXT:    blt a2, a5, .LBB14_2
 ; CHECK-NEXT:  .LBB14_11: # %entry
-; CHECK-NEXT:    mv a3, a5
-; CHECK-NEXT:    fcvt.w.s a4, ft1, rtz
-; CHECK-NEXT:    blt a1, a5, .LBB14_4
+; CHECK-NEXT:    mv a2, a5
+; CHECK-NEXT:    fcvt.w.s a4, fa0, rtz
+; CHECK-NEXT:    blt a3, a5, .LBB14_3
 ; CHECK-NEXT:  .LBB14_12: # %entry
-; CHECK-NEXT:    mv a1, a5
-; CHECK-NEXT:    blt a4, a5, .LBB14_5
+; CHECK-NEXT:    mv a3, a5
+; CHECK-NEXT:    blt a4, a5, .LBB14_4
 ; CHECK-NEXT:  .LBB14_13: # %entry
 ; CHECK-NEXT:    mv a4, a5
-; CHECK-NEXT:    bgtz a4, .LBB14_6
+; CHECK-NEXT:    bgtz a4, .LBB14_5
 ; CHECK-NEXT:  .LBB14_14: # %entry
 ; CHECK-NEXT:    li a4, 0
-; CHECK-NEXT:    bgtz a1, .LBB14_7
+; CHECK-NEXT:    bgtz a3, .LBB14_6
 ; CHECK-NEXT:  .LBB14_15: # %entry
-; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    bgtz a3, .LBB14_8
-; CHECK-NEXT:  .LBB14_16: # %entry
 ; CHECK-NEXT:    li a3, 0
-; CHECK-NEXT:    blez a2, .LBB14_9
-; CHECK-NEXT:    j .LBB14_10
+; CHECK-NEXT:    bgtz a2, .LBB14_7
+; CHECK-NEXT:  .LBB14_16: # %entry
+; CHECK-NEXT:    li a2, 0
+; CHECK-NEXT:    blez a1, .LBB14_8
+; CHECK-NEXT:    j .LBB14_9
 entry:
   %conv = fptosi <4 x float> %x to <4 x i32>
   %0 = icmp slt <4 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535>
@@ -891,18 +874,24 @@ entry:
 define <8 x i16> @stest_f16i16(<8 x half> %x) {
 ; CHECK-LABEL: stest_f16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -96
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
-; CHECK-NEXT:    sd ra, 88(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s0, 80(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s1, 72(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 64(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s3, 56(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s4, 48(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s5, 40(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s6, 32(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s7, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s8, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    addi sp, sp, -128
+; CHECK-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s2, 96(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s3, 88(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s4, 80(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s5, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s6, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s7, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs1, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs2, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs3, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs5, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs6, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
@@ -912,157 +901,169 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s5, -56
 ; CHECK-NEXT:    .cfi_offset s6, -64
 ; CHECK-NEXT:    .cfi_offset s7, -72
-; CHECK-NEXT:    .cfi_offset s8, -80
-; CHECK-NEXT:    lhu s6, 56(a1)
-; CHECK-NEXT:    lhu s1, 0(a1)
-; CHECK-NEXT:    lhu s2, 8(a1)
-; CHECK-NEXT:    lhu s3, 16(a1)
-; CHECK-NEXT:    lhu s4, 24(a1)
-; CHECK-NEXT:    lhu s5, 32(a1)
+; CHECK-NEXT:    .cfi_offset fs0, -80
+; CHECK-NEXT:    .cfi_offset fs1, -88
+; CHECK-NEXT:    .cfi_offset fs2, -96
+; CHECK-NEXT:    .cfi_offset fs3, -104
+; CHECK-NEXT:    .cfi_offset fs4, -112
+; CHECK-NEXT:    .cfi_offset fs5, -120
+; CHECK-NEXT:    .cfi_offset fs6, -128
+; CHECK-NEXT:    lhu s1, 56(a1)
+; CHECK-NEXT:    lhu s2, 0(a1)
+; CHECK-NEXT:    lhu s3, 8(a1)
+; CHECK-NEXT:    lhu s4, 16(a1)
+; CHECK-NEXT:    lhu s5, 24(a1)
+; CHECK-NEXT:    lhu s6, 32(a1)
 ; CHECK-NEXT:    lhu s7, 40(a1)
 ; CHECK-NEXT:    lhu a1, 48(a1)
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s8, a0
+; CHECK-NEXT:    fmv.s fs6, fa0
 ; CHECK-NEXT:    mv a0, s7
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s7, a0
+; CHECK-NEXT:    fmv.s fs5, fa0
+; CHECK-NEXT:    mv a0, s6
+; CHECK-NEXT:    call __extendhfsf2 at plt
+; CHECK-NEXT:    fmv.s fs4, fa0
 ; CHECK-NEXT:    mv a0, s5
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s5, a0
+; CHECK-NEXT:    fmv.s fs3, fa0
 ; CHECK-NEXT:    mv a0, s4
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s4, a0
+; CHECK-NEXT:    fmv.s fs2, fa0
 ; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s3, a0
+; CHECK-NEXT:    fmv.s fs1, fa0
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    fmv.s fs0, fa0
+; CHECK-NEXT:    fcvt.l.s s2, fs6, rtz
 ; CHECK-NEXT:    mv a0, s1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s1, a0
-; CHECK-NEXT:    fmv.w.x ft0, s7
-; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s8
-; CHECK-NEXT:    fcvt.l.s s7, ft0, rtz
-; CHECK-NEXT:    mv a0, s6
-; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    fcvt.l.s a0, ft0, rtz
+; CHECK-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-NEXT:    lui a1, 8
 ; CHECK-NEXT:    addiw a7, a1, -1
-; CHECK-NEXT:    blt a0, a7, .LBB15_2
+; CHECK-NEXT:    bge a0, a7, .LBB15_18
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a0, a7
+; CHECK-NEXT:    fcvt.l.s a1, fs5, rtz
+; CHECK-NEXT:    bge s2, a7, .LBB15_19
 ; CHECK-NEXT:  .LBB15_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s5
-; CHECK-NEXT:    flw ft0, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    fcvt.l.s a1, ft0, rtz
-; CHECK-NEXT:    blt s7, a7, .LBB15_4
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    mv s7, a7
+; CHECK-NEXT:    fcvt.l.s a2, fs4, rtz
+; CHECK-NEXT:    bge a1, a7, .LBB15_20
+; CHECK-NEXT:  .LBB15_3: # %entry
+; CHECK-NEXT:    fcvt.l.s a3, fs3, rtz
+; CHECK-NEXT:    bge a2, a7, .LBB15_21
 ; CHECK-NEXT:  .LBB15_4: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s4
-; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    blt a1, a7, .LBB15_6
-; CHECK-NEXT:  # %bb.5: # %entry
-; CHECK-NEXT:    mv a1, a7
-; CHECK-NEXT:  .LBB15_6: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s3
-; CHECK-NEXT:    fcvt.l.s a3, ft0, rtz
-; CHECK-NEXT:    blt a2, a7, .LBB15_8
-; CHECK-NEXT:  # %bb.7: # %entry
-; CHECK-NEXT:    mv a2, a7
-; CHECK-NEXT:  .LBB15_8: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s2
-; CHECK-NEXT:    fcvt.l.s a4, ft1, rtz
-; CHECK-NEXT:    blt a3, a7, .LBB15_10
-; CHECK-NEXT:  # %bb.9: # %entry
-; CHECK-NEXT:    mv a3, a7
-; CHECK-NEXT:  .LBB15_10: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s1
-; CHECK-NEXT:    fcvt.l.s a5, ft0, rtz
+; CHECK-NEXT:    fcvt.l.s a4, fs2, rtz
+; CHECK-NEXT:    bge a3, a7, .LBB15_22
+; CHECK-NEXT:  .LBB15_5: # %entry
+; CHECK-NEXT:    fcvt.l.s a5, fs1, rtz
 ; CHECK-NEXT:    bge a4, a7, .LBB15_23
-; CHECK-NEXT:  # %bb.11: # %entry
-; CHECK-NEXT:    fcvt.l.s a6, ft1, rtz
+; CHECK-NEXT:  .LBB15_6: # %entry
+; CHECK-NEXT:    fcvt.l.s a6, fs0, rtz
 ; CHECK-NEXT:    bge a5, a7, .LBB15_24
-; CHECK-NEXT:  .LBB15_12: # %entry
+; CHECK-NEXT:  .LBB15_7: # %entry
 ; CHECK-NEXT:    bge a6, a7, .LBB15_25
-; CHECK-NEXT:  .LBB15_13: # %entry
+; CHECK-NEXT:  .LBB15_8: # %entry
 ; CHECK-NEXT:    lui a7, 1048568
 ; CHECK-NEXT:    bge a7, a6, .LBB15_26
-; CHECK-NEXT:  .LBB15_14: # %entry
+; CHECK-NEXT:  .LBB15_9: # %entry
 ; CHECK-NEXT:    bge a7, a5, .LBB15_27
-; CHECK-NEXT:  .LBB15_15: # %entry
+; CHECK-NEXT:  .LBB15_10: # %entry
 ; CHECK-NEXT:    bge a7, a4, .LBB15_28
-; CHECK-NEXT:  .LBB15_16: # %entry
+; CHECK-NEXT:  .LBB15_11: # %entry
 ; CHECK-NEXT:    bge a7, a3, .LBB15_29
-; CHECK-NEXT:  .LBB15_17: # %entry
+; CHECK-NEXT:  .LBB15_12: # %entry
 ; CHECK-NEXT:    bge a7, a2, .LBB15_30
-; CHECK-NEXT:  .LBB15_18: # %entry
+; CHECK-NEXT:  .LBB15_13: # %entry
 ; CHECK-NEXT:    bge a7, a1, .LBB15_31
-; CHECK-NEXT:  .LBB15_19: # %entry
-; CHECK-NEXT:    bge a7, s7, .LBB15_32
-; CHECK-NEXT:  .LBB15_20: # %entry
-; CHECK-NEXT:    blt a7, a0, .LBB15_22
-; CHECK-NEXT:  .LBB15_21: # %entry
+; CHECK-NEXT:  .LBB15_14: # %entry
+; CHECK-NEXT:    bge a7, s2, .LBB15_32
+; CHECK-NEXT:  .LBB15_15: # %entry
+; CHECK-NEXT:    blt a7, a0, .LBB15_17
+; CHECK-NEXT:  .LBB15_16: # %entry
 ; CHECK-NEXT:    lui a0, 1048568
-; CHECK-NEXT:  .LBB15_22: # %entry
+; CHECK-NEXT:  .LBB15_17: # %entry
 ; CHECK-NEXT:    sh a0, 14(s0)
-; CHECK-NEXT:    sh s7, 12(s0)
+; CHECK-NEXT:    sh s2, 12(s0)
 ; CHECK-NEXT:    sh a1, 10(s0)
 ; CHECK-NEXT:    sh a2, 8(s0)
 ; CHECK-NEXT:    sh a3, 6(s0)
 ; CHECK-NEXT:    sh a4, 4(s0)
 ; CHECK-NEXT:    sh a5, 2(s0)
 ; CHECK-NEXT:    sh a6, 0(s0)
-; CHECK-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 64(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s3, 56(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s4, 48(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s6, 32(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s7, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s8, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    addi sp, sp, 96
+; CHECK-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s2, 96(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s3, 88(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s6, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s7, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs1, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs2, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs3, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs5, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs6, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 128
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB15_18: # %entry
+; CHECK-NEXT:    mv a0, a7
+; CHECK-NEXT:    fcvt.l.s a1, fs5, rtz
+; CHECK-NEXT:    blt s2, a7, .LBB15_2
+; CHECK-NEXT:  .LBB15_19: # %entry
+; CHECK-NEXT:    mv s2, a7
+; CHECK-NEXT:    fcvt.l.s a2, fs4, rtz
+; CHECK-NEXT:    blt a1, a7, .LBB15_3
+; CHECK-NEXT:  .LBB15_20: # %entry
+; CHECK-NEXT:    mv a1, a7
+; CHECK-NEXT:    fcvt.l.s a3, fs3, rtz
+; CHECK-NEXT:    blt a2, a7, .LBB15_4
+; CHECK-NEXT:  .LBB15_21: # %entry
+; CHECK-NEXT:    mv a2, a7
+; CHECK-NEXT:    fcvt.l.s a4, fs2, rtz
+; CHECK-NEXT:    blt a3, a7, .LBB15_5
+; CHECK-NEXT:  .LBB15_22: # %entry
+; CHECK-NEXT:    mv a3, a7
+; CHECK-NEXT:    fcvt.l.s a5, fs1, rtz
+; CHECK-NEXT:    blt a4, a7, .LBB15_6
 ; CHECK-NEXT:  .LBB15_23: # %entry
 ; CHECK-NEXT:    mv a4, a7
-; CHECK-NEXT:    fcvt.l.s a6, ft1, rtz
-; CHECK-NEXT:    blt a5, a7, .LBB15_12
+; CHECK-NEXT:    fcvt.l.s a6, fs0, rtz
+; CHECK-NEXT:    blt a5, a7, .LBB15_7
 ; CHECK-NEXT:  .LBB15_24: # %entry
 ; CHECK-NEXT:    mv a5, a7
-; CHECK-NEXT:    blt a6, a7, .LBB15_13
+; CHECK-NEXT:    blt a6, a7, .LBB15_8
 ; CHECK-NEXT:  .LBB15_25: # %entry
 ; CHECK-NEXT:    mv a6, a7
 ; CHECK-NEXT:    lui a7, 1048568
-; CHECK-NEXT:    blt a7, a6, .LBB15_14
+; CHECK-NEXT:    blt a7, a6, .LBB15_9
 ; CHECK-NEXT:  .LBB15_26: # %entry
 ; CHECK-NEXT:    lui a6, 1048568
-; CHECK-NEXT:    blt a7, a5, .LBB15_15
+; CHECK-NEXT:    blt a7, a5, .LBB15_10
 ; CHECK-NEXT:  .LBB15_27: # %entry
 ; CHECK-NEXT:    lui a5, 1048568
-; CHECK-NEXT:    blt a7, a4, .LBB15_16
+; CHECK-NEXT:    blt a7, a4, .LBB15_11
 ; CHECK-NEXT:  .LBB15_28: # %entry
 ; CHECK-NEXT:    lui a4, 1048568
-; CHECK-NEXT:    blt a7, a3, .LBB15_17
+; CHECK-NEXT:    blt a7, a3, .LBB15_12
 ; CHECK-NEXT:  .LBB15_29: # %entry
 ; CHECK-NEXT:    lui a3, 1048568
-; CHECK-NEXT:    blt a7, a2, .LBB15_18
+; CHECK-NEXT:    blt a7, a2, .LBB15_13
 ; CHECK-NEXT:  .LBB15_30: # %entry
 ; CHECK-NEXT:    lui a2, 1048568
-; CHECK-NEXT:    blt a7, a1, .LBB15_19
+; CHECK-NEXT:    blt a7, a1, .LBB15_14
 ; CHECK-NEXT:  .LBB15_31: # %entry
 ; CHECK-NEXT:    lui a1, 1048568
-; CHECK-NEXT:    blt a7, s7, .LBB15_20
+; CHECK-NEXT:    blt a7, s2, .LBB15_15
 ; CHECK-NEXT:  .LBB15_32: # %entry
-; CHECK-NEXT:    lui s7, 1048568
-; CHECK-NEXT:    bge a7, a0, .LBB15_21
-; CHECK-NEXT:    j .LBB15_22
+; CHECK-NEXT:    lui s2, 1048568
+; CHECK-NEXT:    bge a7, a0, .LBB15_16
+; CHECK-NEXT:    j .LBB15_17
 entry:
   %conv = fptosi <8 x half> %x to <8 x i32>
   %0 = icmp slt <8 x i32> %conv, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
@@ -1076,18 +1077,24 @@ entry:
 define <8 x i16> @utesth_f16i16(<8 x half> %x) {
 ; CHECK-LABEL: utesth_f16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -96
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
-; CHECK-NEXT:    sd ra, 88(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s0, 80(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s1, 72(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 64(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s3, 56(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s4, 48(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s5, 40(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s6, 32(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s7, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s8, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    addi sp, sp, -128
+; CHECK-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s2, 96(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s3, 88(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s4, 80(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s5, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s6, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s7, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs1, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs2, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs3, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs5, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs6, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
@@ -1097,115 +1104,127 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s5, -56
 ; CHECK-NEXT:    .cfi_offset s6, -64
 ; CHECK-NEXT:    .cfi_offset s7, -72
-; CHECK-NEXT:    .cfi_offset s8, -80
-; CHECK-NEXT:    lhu s6, 0(a1)
-; CHECK-NEXT:    lhu s1, 56(a1)
-; CHECK-NEXT:    lhu s2, 48(a1)
-; CHECK-NEXT:    lhu s3, 40(a1)
-; CHECK-NEXT:    lhu s4, 32(a1)
-; CHECK-NEXT:    lhu s5, 24(a1)
+; CHECK-NEXT:    .cfi_offset fs0, -80
+; CHECK-NEXT:    .cfi_offset fs1, -88
+; CHECK-NEXT:    .cfi_offset fs2, -96
+; CHECK-NEXT:    .cfi_offset fs3, -104
+; CHECK-NEXT:    .cfi_offset fs4, -112
+; CHECK-NEXT:    .cfi_offset fs5, -120
+; CHECK-NEXT:    .cfi_offset fs6, -128
+; CHECK-NEXT:    lhu s1, 0(a1)
+; CHECK-NEXT:    lhu s2, 56(a1)
+; CHECK-NEXT:    lhu s3, 48(a1)
+; CHECK-NEXT:    lhu s4, 40(a1)
+; CHECK-NEXT:    lhu s5, 32(a1)
+; CHECK-NEXT:    lhu s6, 24(a1)
 ; CHECK-NEXT:    lhu s7, 16(a1)
 ; CHECK-NEXT:    lhu a1, 8(a1)
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s8, a0
+; CHECK-NEXT:    fmv.s fs6, fa0
 ; CHECK-NEXT:    mv a0, s7
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s7, a0
+; CHECK-NEXT:    fmv.s fs5, fa0
+; CHECK-NEXT:    mv a0, s6
+; CHECK-NEXT:    call __extendhfsf2 at plt
+; CHECK-NEXT:    fmv.s fs4, fa0
 ; CHECK-NEXT:    mv a0, s5
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s5, a0
+; CHECK-NEXT:    fmv.s fs3, fa0
 ; CHECK-NEXT:    mv a0, s4
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s4, a0
+; CHECK-NEXT:    fmv.s fs2, fa0
 ; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s3, a0
+; CHECK-NEXT:    fmv.s fs1, fa0
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    fmv.s fs0, fa0
+; CHECK-NEXT:    fcvt.lu.s s2, fs6, rtz
 ; CHECK-NEXT:    mv a0, s1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s1, a0
-; CHECK-NEXT:    fmv.w.x ft0, s7
-; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s8
-; CHECK-NEXT:    fcvt.lu.s s7, ft0, rtz
-; CHECK-NEXT:    mv a0, s6
-; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    fcvt.lu.s a0, ft0, rtz
+; CHECK-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; CHECK-NEXT:    lui a1, 16
 ; CHECK-NEXT:    addiw a1, a1, -1
-; CHECK-NEXT:    bltu a0, a1, .LBB16_2
+; CHECK-NEXT:    bgeu a0, a1, .LBB16_10
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    fcvt.lu.s a2, fs5, rtz
+; CHECK-NEXT:    bgeu s2, a1, .LBB16_11
 ; CHECK-NEXT:  .LBB16_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s5
-; CHECK-NEXT:    flw ft0, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    fcvt.lu.s a2, ft0, rtz
-; CHECK-NEXT:    bltu s7, a1, .LBB16_4
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    mv s7, a1
+; CHECK-NEXT:    fcvt.lu.s a3, fs4, rtz
+; CHECK-NEXT:    bgeu a2, a1, .LBB16_12
+; CHECK-NEXT:  .LBB16_3: # %entry
+; CHECK-NEXT:    fcvt.lu.s a4, fs3, rtz
+; CHECK-NEXT:    bgeu a3, a1, .LBB16_13
 ; CHECK-NEXT:  .LBB16_4: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s4
-; CHECK-NEXT:    fcvt.lu.s a3, ft1, rtz
-; CHECK-NEXT:    bltu a2, a1, .LBB16_6
-; CHECK-NEXT:  # %bb.5: # %entry
-; CHECK-NEXT:    mv a2, a1
-; CHECK-NEXT:  .LBB16_6: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s3
-; CHECK-NEXT:    fcvt.lu.s a4, ft0, rtz
-; CHECK-NEXT:    bltu a3, a1, .LBB16_8
-; CHECK-NEXT:  # %bb.7: # %entry
-; CHECK-NEXT:    mv a3, a1
-; CHECK-NEXT:  .LBB16_8: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s2
-; CHECK-NEXT:    fcvt.lu.s a5, ft1, rtz
-; CHECK-NEXT:    bltu a4, a1, .LBB16_10
-; CHECK-NEXT:  # %bb.9: # %entry
-; CHECK-NEXT:    mv a4, a1
-; CHECK-NEXT:  .LBB16_10: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s1
-; CHECK-NEXT:    fcvt.lu.s a6, ft0, rtz
+; CHECK-NEXT:    fcvt.lu.s a5, fs2, rtz
+; CHECK-NEXT:    bgeu a4, a1, .LBB16_14
+; CHECK-NEXT:  .LBB16_5: # %entry
+; CHECK-NEXT:    fcvt.lu.s a6, fs1, rtz
 ; CHECK-NEXT:    bgeu a5, a1, .LBB16_15
-; CHECK-NEXT:  # %bb.11: # %entry
-; CHECK-NEXT:    fcvt.lu.s a7, ft1, rtz
+; CHECK-NEXT:  .LBB16_6: # %entry
+; CHECK-NEXT:    fcvt.lu.s a7, fs0, rtz
 ; CHECK-NEXT:    bgeu a6, a1, .LBB16_16
-; CHECK-NEXT:  .LBB16_12: # %entry
-; CHECK-NEXT:    bltu a7, a1, .LBB16_14
-; CHECK-NEXT:  .LBB16_13: # %entry
+; CHECK-NEXT:  .LBB16_7: # %entry
+; CHECK-NEXT:    bltu a7, a1, .LBB16_9
+; CHECK-NEXT:  .LBB16_8: # %entry
 ; CHECK-NEXT:    mv a7, a1
-; CHECK-NEXT:  .LBB16_14: # %entry
+; CHECK-NEXT:  .LBB16_9: # %entry
 ; CHECK-NEXT:    sh a7, 14(s0)
 ; CHECK-NEXT:    sh a6, 12(s0)
 ; CHECK-NEXT:    sh a5, 10(s0)
 ; CHECK-NEXT:    sh a4, 8(s0)
 ; CHECK-NEXT:    sh a3, 6(s0)
 ; CHECK-NEXT:    sh a2, 4(s0)
-; CHECK-NEXT:    sh s7, 2(s0)
+; CHECK-NEXT:    sh s2, 2(s0)
 ; CHECK-NEXT:    sh a0, 0(s0)
-; CHECK-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 64(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s3, 56(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s4, 48(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s6, 32(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s7, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s8, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    addi sp, sp, 96
+; CHECK-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s2, 96(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s3, 88(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s6, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s7, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs1, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs2, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs3, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs5, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs6, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 128
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB16_10: # %entry
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    fcvt.lu.s a2, fs5, rtz
+; CHECK-NEXT:    bltu s2, a1, .LBB16_2
+; CHECK-NEXT:  .LBB16_11: # %entry
+; CHECK-NEXT:    mv s2, a1
+; CHECK-NEXT:    fcvt.lu.s a3, fs4, rtz
+; CHECK-NEXT:    bltu a2, a1, .LBB16_3
+; CHECK-NEXT:  .LBB16_12: # %entry
+; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    fcvt.lu.s a4, fs3, rtz
+; CHECK-NEXT:    bltu a3, a1, .LBB16_4
+; CHECK-NEXT:  .LBB16_13: # %entry
+; CHECK-NEXT:    mv a3, a1
+; CHECK-NEXT:    fcvt.lu.s a5, fs2, rtz
+; CHECK-NEXT:    bltu a4, a1, .LBB16_5
+; CHECK-NEXT:  .LBB16_14: # %entry
+; CHECK-NEXT:    mv a4, a1
+; CHECK-NEXT:    fcvt.lu.s a6, fs1, rtz
+; CHECK-NEXT:    bltu a5, a1, .LBB16_6
 ; CHECK-NEXT:  .LBB16_15: # %entry
 ; CHECK-NEXT:    mv a5, a1
-; CHECK-NEXT:    fcvt.lu.s a7, ft1, rtz
-; CHECK-NEXT:    bltu a6, a1, .LBB16_12
+; CHECK-NEXT:    fcvt.lu.s a7, fs0, rtz
+; CHECK-NEXT:    bltu a6, a1, .LBB16_7
 ; CHECK-NEXT:  .LBB16_16: # %entry
 ; CHECK-NEXT:    mv a6, a1
-; CHECK-NEXT:    bgeu a7, a1, .LBB16_13
-; CHECK-NEXT:    j .LBB16_14
+; CHECK-NEXT:    bgeu a7, a1, .LBB16_8
+; CHECK-NEXT:    j .LBB16_9
 entry:
   %conv = fptoui <8 x half> %x to <8 x i32>
   %0 = icmp ult <8 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
@@ -1217,18 +1236,24 @@ entry:
 define <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-LABEL: ustest_f16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -96
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
-; CHECK-NEXT:    sd ra, 88(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s0, 80(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s1, 72(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 64(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s3, 56(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s4, 48(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s5, 40(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s6, 32(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s7, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s8, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    addi sp, sp, -128
+; CHECK-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s2, 96(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s3, 88(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s4, 80(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s5, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s6, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s7, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs1, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs2, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs3, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs5, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs6, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
@@ -1238,155 +1263,167 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s5, -56
 ; CHECK-NEXT:    .cfi_offset s6, -64
 ; CHECK-NEXT:    .cfi_offset s7, -72
-; CHECK-NEXT:    .cfi_offset s8, -80
-; CHECK-NEXT:    lhu s6, 56(a1)
-; CHECK-NEXT:    lhu s1, 0(a1)
-; CHECK-NEXT:    lhu s2, 8(a1)
-; CHECK-NEXT:    lhu s3, 16(a1)
-; CHECK-NEXT:    lhu s4, 24(a1)
-; CHECK-NEXT:    lhu s5, 32(a1)
+; CHECK-NEXT:    .cfi_offset fs0, -80
+; CHECK-NEXT:    .cfi_offset fs1, -88
+; CHECK-NEXT:    .cfi_offset fs2, -96
+; CHECK-NEXT:    .cfi_offset fs3, -104
+; CHECK-NEXT:    .cfi_offset fs4, -112
+; CHECK-NEXT:    .cfi_offset fs5, -120
+; CHECK-NEXT:    .cfi_offset fs6, -128
+; CHECK-NEXT:    lhu s1, 56(a1)
+; CHECK-NEXT:    lhu s2, 0(a1)
+; CHECK-NEXT:    lhu s3, 8(a1)
+; CHECK-NEXT:    lhu s4, 16(a1)
+; CHECK-NEXT:    lhu s5, 24(a1)
+; CHECK-NEXT:    lhu s6, 32(a1)
 ; CHECK-NEXT:    lhu s7, 40(a1)
 ; CHECK-NEXT:    lhu a1, 48(a1)
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s8, a0
+; CHECK-NEXT:    fmv.s fs6, fa0
 ; CHECK-NEXT:    mv a0, s7
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s7, a0
+; CHECK-NEXT:    fmv.s fs5, fa0
+; CHECK-NEXT:    mv a0, s6
+; CHECK-NEXT:    call __extendhfsf2 at plt
+; CHECK-NEXT:    fmv.s fs4, fa0
 ; CHECK-NEXT:    mv a0, s5
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s5, a0
+; CHECK-NEXT:    fmv.s fs3, fa0
 ; CHECK-NEXT:    mv a0, s4
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s4, a0
+; CHECK-NEXT:    fmv.s fs2, fa0
 ; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s3, a0
+; CHECK-NEXT:    fmv.s fs1, fa0
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    fmv.s fs0, fa0
+; CHECK-NEXT:    fcvt.l.s s2, fs6, rtz
 ; CHECK-NEXT:    mv a0, s1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s1, a0
-; CHECK-NEXT:    fmv.w.x ft0, s7
-; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s8
-; CHECK-NEXT:    fcvt.l.s s7, ft0, rtz
-; CHECK-NEXT:    mv a0, s6
-; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    fcvt.l.s a0, ft0, rtz
+; CHECK-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-NEXT:    lui a1, 16
 ; CHECK-NEXT:    addiw a7, a1, -1
-; CHECK-NEXT:    blt a0, a7, .LBB17_2
+; CHECK-NEXT:    bge a0, a7, .LBB17_18
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a0, a7
+; CHECK-NEXT:    fcvt.l.s a1, fs5, rtz
+; CHECK-NEXT:    bge s2, a7, .LBB17_19
 ; CHECK-NEXT:  .LBB17_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s5
-; CHECK-NEXT:    flw ft0, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    fcvt.l.s a1, ft0, rtz
-; CHECK-NEXT:    blt s7, a7, .LBB17_4
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    mv s7, a7
+; CHECK-NEXT:    fcvt.l.s a2, fs4, rtz
+; CHECK-NEXT:    bge a1, a7, .LBB17_20
+; CHECK-NEXT:  .LBB17_3: # %entry
+; CHECK-NEXT:    fcvt.l.s a3, fs3, rtz
+; CHECK-NEXT:    bge a2, a7, .LBB17_21
 ; CHECK-NEXT:  .LBB17_4: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s4
-; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    blt a1, a7, .LBB17_6
-; CHECK-NEXT:  # %bb.5: # %entry
-; CHECK-NEXT:    mv a1, a7
-; CHECK-NEXT:  .LBB17_6: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s3
-; CHECK-NEXT:    fcvt.l.s a3, ft0, rtz
-; CHECK-NEXT:    blt a2, a7, .LBB17_8
-; CHECK-NEXT:  # %bb.7: # %entry
-; CHECK-NEXT:    mv a2, a7
-; CHECK-NEXT:  .LBB17_8: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s2
-; CHECK-NEXT:    fcvt.l.s a4, ft1, rtz
-; CHECK-NEXT:    blt a3, a7, .LBB17_10
-; CHECK-NEXT:  # %bb.9: # %entry
-; CHECK-NEXT:    mv a3, a7
-; CHECK-NEXT:  .LBB17_10: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s1
-; CHECK-NEXT:    fcvt.l.s a5, ft0, rtz
+; CHECK-NEXT:    fcvt.l.s a4, fs2, rtz
+; CHECK-NEXT:    bge a3, a7, .LBB17_22
+; CHECK-NEXT:  .LBB17_5: # %entry
+; CHECK-NEXT:    fcvt.l.s a5, fs1, rtz
 ; CHECK-NEXT:    bge a4, a7, .LBB17_23
-; CHECK-NEXT:  # %bb.11: # %entry
-; CHECK-NEXT:    fcvt.l.s a6, ft1, rtz
+; CHECK-NEXT:  .LBB17_6: # %entry
+; CHECK-NEXT:    fcvt.l.s a6, fs0, rtz
 ; CHECK-NEXT:    bge a5, a7, .LBB17_24
-; CHECK-NEXT:  .LBB17_12: # %entry
+; CHECK-NEXT:  .LBB17_7: # %entry
 ; CHECK-NEXT:    bge a6, a7, .LBB17_25
-; CHECK-NEXT:  .LBB17_13: # %entry
+; CHECK-NEXT:  .LBB17_8: # %entry
 ; CHECK-NEXT:    blez a6, .LBB17_26
-; CHECK-NEXT:  .LBB17_14: # %entry
+; CHECK-NEXT:  .LBB17_9: # %entry
 ; CHECK-NEXT:    blez a5, .LBB17_27
-; CHECK-NEXT:  .LBB17_15: # %entry
+; CHECK-NEXT:  .LBB17_10: # %entry
 ; CHECK-NEXT:    blez a4, .LBB17_28
-; CHECK-NEXT:  .LBB17_16: # %entry
+; CHECK-NEXT:  .LBB17_11: # %entry
 ; CHECK-NEXT:    blez a3, .LBB17_29
-; CHECK-NEXT:  .LBB17_17: # %entry
+; CHECK-NEXT:  .LBB17_12: # %entry
 ; CHECK-NEXT:    blez a2, .LBB17_30
-; CHECK-NEXT:  .LBB17_18: # %entry
+; CHECK-NEXT:  .LBB17_13: # %entry
 ; CHECK-NEXT:    blez a1, .LBB17_31
-; CHECK-NEXT:  .LBB17_19: # %entry
-; CHECK-NEXT:    blez s7, .LBB17_32
-; CHECK-NEXT:  .LBB17_20: # %entry
-; CHECK-NEXT:    bgtz a0, .LBB17_22
-; CHECK-NEXT:  .LBB17_21: # %entry
+; CHECK-NEXT:  .LBB17_14: # %entry
+; CHECK-NEXT:    blez s2, .LBB17_32
+; CHECK-NEXT:  .LBB17_15: # %entry
+; CHECK-NEXT:    bgtz a0, .LBB17_17
+; CHECK-NEXT:  .LBB17_16: # %entry
 ; CHECK-NEXT:    li a0, 0
-; CHECK-NEXT:  .LBB17_22: # %entry
+; CHECK-NEXT:  .LBB17_17: # %entry
 ; CHECK-NEXT:    sh a0, 14(s0)
-; CHECK-NEXT:    sh s7, 12(s0)
+; CHECK-NEXT:    sh s2, 12(s0)
 ; CHECK-NEXT:    sh a1, 10(s0)
 ; CHECK-NEXT:    sh a2, 8(s0)
 ; CHECK-NEXT:    sh a3, 6(s0)
 ; CHECK-NEXT:    sh a4, 4(s0)
 ; CHECK-NEXT:    sh a5, 2(s0)
 ; CHECK-NEXT:    sh a6, 0(s0)
-; CHECK-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 64(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s3, 56(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s4, 48(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s6, 32(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s7, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s8, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    addi sp, sp, 96
+; CHECK-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s2, 96(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s3, 88(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s6, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s7, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs1, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs2, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs3, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs5, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs6, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 128
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB17_18: # %entry
+; CHECK-NEXT:    mv a0, a7
+; CHECK-NEXT:    fcvt.l.s a1, fs5, rtz
+; CHECK-NEXT:    blt s2, a7, .LBB17_2
+; CHECK-NEXT:  .LBB17_19: # %entry
+; CHECK-NEXT:    mv s2, a7
+; CHECK-NEXT:    fcvt.l.s a2, fs4, rtz
+; CHECK-NEXT:    blt a1, a7, .LBB17_3
+; CHECK-NEXT:  .LBB17_20: # %entry
+; CHECK-NEXT:    mv a1, a7
+; CHECK-NEXT:    fcvt.l.s a3, fs3, rtz
+; CHECK-NEXT:    blt a2, a7, .LBB17_4
+; CHECK-NEXT:  .LBB17_21: # %entry
+; CHECK-NEXT:    mv a2, a7
+; CHECK-NEXT:    fcvt.l.s a4, fs2, rtz
+; CHECK-NEXT:    blt a3, a7, .LBB17_5
+; CHECK-NEXT:  .LBB17_22: # %entry
+; CHECK-NEXT:    mv a3, a7
+; CHECK-NEXT:    fcvt.l.s a5, fs1, rtz
+; CHECK-NEXT:    blt a4, a7, .LBB17_6
 ; CHECK-NEXT:  .LBB17_23: # %entry
 ; CHECK-NEXT:    mv a4, a7
-; CHECK-NEXT:    fcvt.l.s a6, ft1, rtz
-; CHECK-NEXT:    blt a5, a7, .LBB17_12
+; CHECK-NEXT:    fcvt.l.s a6, fs0, rtz
+; CHECK-NEXT:    blt a5, a7, .LBB17_7
 ; CHECK-NEXT:  .LBB17_24: # %entry
 ; CHECK-NEXT:    mv a5, a7
-; CHECK-NEXT:    blt a6, a7, .LBB17_13
+; CHECK-NEXT:    blt a6, a7, .LBB17_8
 ; CHECK-NEXT:  .LBB17_25: # %entry
 ; CHECK-NEXT:    mv a6, a7
-; CHECK-NEXT:    bgtz a6, .LBB17_14
+; CHECK-NEXT:    bgtz a6, .LBB17_9
 ; CHECK-NEXT:  .LBB17_26: # %entry
 ; CHECK-NEXT:    li a6, 0
-; CHECK-NEXT:    bgtz a5, .LBB17_15
+; CHECK-NEXT:    bgtz a5, .LBB17_10
 ; CHECK-NEXT:  .LBB17_27: # %entry
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    bgtz a4, .LBB17_16
+; CHECK-NEXT:    bgtz a4, .LBB17_11
 ; CHECK-NEXT:  .LBB17_28: # %entry
 ; CHECK-NEXT:    li a4, 0
-; CHECK-NEXT:    bgtz a3, .LBB17_17
+; CHECK-NEXT:    bgtz a3, .LBB17_12
 ; CHECK-NEXT:  .LBB17_29: # %entry
 ; CHECK-NEXT:    li a3, 0
-; CHECK-NEXT:    bgtz a2, .LBB17_18
+; CHECK-NEXT:    bgtz a2, .LBB17_13
 ; CHECK-NEXT:  .LBB17_30: # %entry
 ; CHECK-NEXT:    li a2, 0
-; CHECK-NEXT:    bgtz a1, .LBB17_19
+; CHECK-NEXT:    bgtz a1, .LBB17_14
 ; CHECK-NEXT:  .LBB17_31: # %entry
 ; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    bgtz s7, .LBB17_20
+; CHECK-NEXT:    bgtz s2, .LBB17_15
 ; CHECK-NEXT:  .LBB17_32: # %entry
-; CHECK-NEXT:    li s7, 0
-; CHECK-NEXT:    blez a0, .LBB17_21
-; CHECK-NEXT:    j .LBB17_22
+; CHECK-NEXT:    li s2, 0
+; CHECK-NEXT:    blez a0, .LBB17_16
+; CHECK-NEXT:    j .LBB17_17
 entry:
   %conv = fptosi <8 x half> %x to <8 x i32>
   %0 = icmp slt <8 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
@@ -1407,16 +1444,16 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
-; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s2, a1
+; CHECK-NEXT:    .cfi_offset fs0, -32
+; CHECK-NEXT:    fmv.d fs0, fa1
 ; CHECK-NEXT:    call __fixdfti at plt
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    fmv.d fa0, fs0
 ; CHECK-NEXT:    call __fixdfti at plt
 ; CHECK-NEXT:    mv a2, a0
 ; CHECK-NEXT:    li a0, -1
@@ -1473,7 +1510,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
 entry:
@@ -1494,17 +1531,17 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
-; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s2, a0
-; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    .cfi_offset fs0, -32
+; CHECK-NEXT:    fmv.d fs0, fa0
+; CHECK-NEXT:    fmv.d fa0, fa1
 ; CHECK-NEXT:    call __fixunsdfti at plt
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    fmv.d fa0, fs0
 ; CHECK-NEXT:    call __fixunsdfti at plt
 ; CHECK-NEXT:    beqz a1, .LBB19_2
 ; CHECK-NEXT:  # %bb.1: # %entry
@@ -1518,7 +1555,7 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
 entry:
@@ -1537,17 +1574,17 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
-; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s2, a0
-; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    .cfi_offset fs0, -32
+; CHECK-NEXT:    fmv.d fs0, fa0
+; CHECK-NEXT:    fmv.d fa0, fa1
 ; CHECK-NEXT:    call __fixdfti at plt
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    fmv.d fa0, fs0
 ; CHECK-NEXT:    call __fixdfti at plt
 ; CHECK-NEXT:    mv a2, a1
 ; CHECK-NEXT:    bgtz a1, .LBB20_7
@@ -1597,7 +1634,7 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
 entry:
@@ -1618,16 +1655,16 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
-; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s2, a1
+; CHECK-NEXT:    .cfi_offset fs0, -32
+; CHECK-NEXT:    fmv.s fs0, fa1
 ; CHECK-NEXT:    call __fixsfti at plt
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    fmv.s fa0, fs0
 ; CHECK-NEXT:    call __fixsfti at plt
 ; CHECK-NEXT:    mv a2, a0
 ; CHECK-NEXT:    li a0, -1
@@ -1684,7 +1721,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
 entry:
@@ -1705,17 +1742,17 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
-; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s2, a0
-; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    .cfi_offset fs0, -32
+; CHECK-NEXT:    fmv.s fs0, fa0
+; CHECK-NEXT:    fmv.s fa0, fa1
 ; CHECK-NEXT:    call __fixunssfti at plt
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    fmv.s fa0, fs0
 ; CHECK-NEXT:    call __fixunssfti at plt
 ; CHECK-NEXT:    beqz a1, .LBB22_2
 ; CHECK-NEXT:  # %bb.1: # %entry
@@ -1729,7 +1766,7 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
 entry:
@@ -1748,17 +1785,17 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
-; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s2, a0
-; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    .cfi_offset fs0, -32
+; CHECK-NEXT:    fmv.s fs0, fa0
+; CHECK-NEXT:    fmv.s fa0, fa1
 ; CHECK-NEXT:    call __fixsfti at plt
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    fmv.s fa0, fs0
 ; CHECK-NEXT:    call __fixsfti at plt
 ; CHECK-NEXT:    mv a2, a1
 ; CHECK-NEXT:    bgtz a1, .LBB23_7
@@ -1808,7 +1845,7 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
 entry:
@@ -2045,12 +2082,10 @@ entry:
 define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
 ; CHECK-LABEL: stest_f64i32_mm:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a1
-; CHECK-NEXT:    fmv.d.x ft1, a0
-; CHECK-NEXT:    fcvt.l.d a1, ft0, rtz
+; CHECK-NEXT:    fcvt.l.d a1, fa1, rtz
 ; CHECK-NEXT:    lui a2, 524288
 ; CHECK-NEXT:    addiw a3, a2, -1
-; CHECK-NEXT:    fcvt.l.d a0, ft1, rtz
+; CHECK-NEXT:    fcvt.l.d a0, fa0, rtz
 ; CHECK-NEXT:    bge a1, a3, .LBB27_5
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    bge a0, a3, .LBB27_6
@@ -2083,12 +2118,10 @@ entry:
 define <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
 ; CHECK-LABEL: utest_f64i32_mm:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    fmv.d.x ft1, a1
-; CHECK-NEXT:    fcvt.lu.d a0, ft0, rtz
+; CHECK-NEXT:    fcvt.lu.d a0, fa0, rtz
 ; CHECK-NEXT:    li a1, -1
 ; CHECK-NEXT:    srli a2, a1, 32
-; CHECK-NEXT:    fcvt.lu.d a1, ft1, rtz
+; CHECK-NEXT:    fcvt.lu.d a1, fa1, rtz
 ; CHECK-NEXT:    bgeu a0, a2, .LBB28_3
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    bgeu a1, a2, .LBB28_4
@@ -2110,12 +2143,10 @@ entry:
 define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
 ; CHECK-LABEL: ustest_f64i32_mm:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a1
-; CHECK-NEXT:    fmv.d.x ft1, a0
-; CHECK-NEXT:    fcvt.l.d a1, ft0, rtz
+; CHECK-NEXT:    fcvt.l.d a1, fa1, rtz
 ; CHECK-NEXT:    li a0, -1
 ; CHECK-NEXT:    srli a2, a0, 32
-; CHECK-NEXT:    fcvt.l.d a0, ft1, rtz
+; CHECK-NEXT:    fcvt.l.d a0, fa0, rtz
 ; CHECK-NEXT:    bge a1, a2, .LBB29_5
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    bge a0, a2, .LBB29_6
@@ -2148,61 +2179,59 @@ entry:
 define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
 ; CHECK-LABEL: stest_f32i32_mm:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a4
-; CHECK-NEXT:    fmv.w.x ft2, a3
-; CHECK-NEXT:    fmv.w.x ft0, a2
-; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    lui a4, 524288
-; CHECK-NEXT:    addiw a6, a4, -1
-; CHECK-NEXT:    fcvt.l.s a3, ft2, rtz
-; CHECK-NEXT:    blt a2, a6, .LBB30_2
+; CHECK-NEXT:    fcvt.l.s a1, fa3, rtz
+; CHECK-NEXT:    lui a3, 524288
+; CHECK-NEXT:    addiw a6, a3, -1
+; CHECK-NEXT:    fcvt.l.s a2, fa2, rtz
+; CHECK-NEXT:    bge a1, a6, .LBB30_10
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a2, a6
+; CHECK-NEXT:    fcvt.l.s a4, fa1, rtz
+; CHECK-NEXT:    bge a2, a6, .LBB30_11
 ; CHECK-NEXT:  .LBB30_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a1
-; CHECK-NEXT:    fcvt.l.s a1, ft0, rtz
-; CHECK-NEXT:    bge a3, a6, .LBB30_11
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.l.s a5, ft1, rtz
-; CHECK-NEXT:    bge a1, a6, .LBB30_12
-; CHECK-NEXT:  .LBB30_4: # %entry
+; CHECK-NEXT:    fcvt.l.s a5, fa0, rtz
+; CHECK-NEXT:    bge a4, a6, .LBB30_12
+; CHECK-NEXT:  .LBB30_3: # %entry
 ; CHECK-NEXT:    bge a5, a6, .LBB30_13
+; CHECK-NEXT:  .LBB30_4: # %entry
+; CHECK-NEXT:    bge a3, a5, .LBB30_14
 ; CHECK-NEXT:  .LBB30_5: # %entry
-; CHECK-NEXT:    bge a4, a5, .LBB30_14
+; CHECK-NEXT:    bge a3, a4, .LBB30_15
 ; CHECK-NEXT:  .LBB30_6: # %entry
-; CHECK-NEXT:    bge a4, a1, .LBB30_15
+; CHECK-NEXT:    bge a3, a2, .LBB30_16
 ; CHECK-NEXT:  .LBB30_7: # %entry
-; CHECK-NEXT:    bge a4, a3, .LBB30_16
+; CHECK-NEXT:    blt a3, a1, .LBB30_9
 ; CHECK-NEXT:  .LBB30_8: # %entry
-; CHECK-NEXT:    blt a4, a2, .LBB30_10
+; CHECK-NEXT:    lui a1, 524288
 ; CHECK-NEXT:  .LBB30_9: # %entry
-; CHECK-NEXT:    lui a2, 524288
-; CHECK-NEXT:  .LBB30_10: # %entry
-; CHECK-NEXT:    sw a2, 12(a0)
-; CHECK-NEXT:    sw a3, 8(a0)
-; CHECK-NEXT:    sw a1, 4(a0)
+; CHECK-NEXT:    sw a1, 12(a0)
+; CHECK-NEXT:    sw a2, 8(a0)
+; CHECK-NEXT:    sw a4, 4(a0)
 ; CHECK-NEXT:    sw a5, 0(a0)
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB30_10: # %entry
+; CHECK-NEXT:    mv a1, a6
+; CHECK-NEXT:    fcvt.l.s a4, fa1, rtz
+; CHECK-NEXT:    blt a2, a6, .LBB30_2
 ; CHECK-NEXT:  .LBB30_11: # %entry
-; CHECK-NEXT:    mv a3, a6
-; CHECK-NEXT:    fcvt.l.s a5, ft1, rtz
-; CHECK-NEXT:    blt a1, a6, .LBB30_4
+; CHECK-NEXT:    mv a2, a6
+; CHECK-NEXT:    fcvt.l.s a5, fa0, rtz
+; CHECK-NEXT:    blt a4, a6, .LBB30_3
 ; CHECK-NEXT:  .LBB30_12: # %entry
-; CHECK-NEXT:    mv a1, a6
-; CHECK-NEXT:    blt a5, a6, .LBB30_5
+; CHECK-NEXT:    mv a4, a6
+; CHECK-NEXT:    blt a5, a6, .LBB30_4
 ; CHECK-NEXT:  .LBB30_13: # %entry
 ; CHECK-NEXT:    mv a5, a6
-; CHECK-NEXT:    blt a4, a5, .LBB30_6
+; CHECK-NEXT:    blt a3, a5, .LBB30_5
 ; CHECK-NEXT:  .LBB30_14: # %entry
 ; CHECK-NEXT:    lui a5, 524288
-; CHECK-NEXT:    blt a4, a1, .LBB30_7
+; CHECK-NEXT:    blt a3, a4, .LBB30_6
 ; CHECK-NEXT:  .LBB30_15: # %entry
-; CHECK-NEXT:    lui a1, 524288
-; CHECK-NEXT:    blt a4, a3, .LBB30_8
+; CHECK-NEXT:    lui a4, 524288
+; CHECK-NEXT:    blt a3, a2, .LBB30_7
 ; CHECK-NEXT:  .LBB30_16: # %entry
-; CHECK-NEXT:    lui a3, 524288
-; CHECK-NEXT:    bge a4, a2, .LBB30_9
-; CHECK-NEXT:    j .LBB30_10
+; CHECK-NEXT:    lui a2, 524288
+; CHECK-NEXT:    bge a3, a1, .LBB30_8
+; CHECK-NEXT:    j .LBB30_9
 entry:
   %conv = fptosi <4 x float> %x to <4 x i64>
   %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
@@ -2214,41 +2243,39 @@ entry:
 define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
 ; CHECK-LABEL: utest_f32i32_mm:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a1
-; CHECK-NEXT:    fmv.w.x ft2, a2
-; CHECK-NEXT:    fmv.w.x ft0, a3
-; CHECK-NEXT:    fcvt.lu.s a1, ft1, rtz
+; CHECK-NEXT:    fcvt.lu.s a1, fa0, rtz
 ; CHECK-NEXT:    li a2, -1
 ; CHECK-NEXT:    srli a3, a2, 32
-; CHECK-NEXT:    fcvt.lu.s a2, ft2, rtz
-; CHECK-NEXT:    bltu a1, a3, .LBB31_2
+; CHECK-NEXT:    fcvt.lu.s a2, fa1, rtz
+; CHECK-NEXT:    bgeu a1, a3, .LBB31_6
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a1, a3
-; CHECK-NEXT:  .LBB31_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a4
-; CHECK-NEXT:    fcvt.lu.s a4, ft0, rtz
+; CHECK-NEXT:    fcvt.lu.s a4, fa2, rtz
 ; CHECK-NEXT:    bgeu a2, a3, .LBB31_7
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.lu.s a5, ft1, rtz
+; CHECK-NEXT:  .LBB31_2: # %entry
+; CHECK-NEXT:    fcvt.lu.s a5, fa3, rtz
 ; CHECK-NEXT:    bgeu a4, a3, .LBB31_8
+; CHECK-NEXT:  .LBB31_3: # %entry
+; CHECK-NEXT:    bltu a5, a3, .LBB31_5
 ; CHECK-NEXT:  .LBB31_4: # %entry
-; CHECK-NEXT:    bltu a5, a3, .LBB31_6
-; CHECK-NEXT:  .LBB31_5: # %entry
 ; CHECK-NEXT:    mv a5, a3
-; CHECK-NEXT:  .LBB31_6: # %entry
+; CHECK-NEXT:  .LBB31_5: # %entry
 ; CHECK-NEXT:    sw a5, 12(a0)
 ; CHECK-NEXT:    sw a4, 8(a0)
 ; CHECK-NEXT:    sw a2, 4(a0)
 ; CHECK-NEXT:    sw a1, 0(a0)
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB31_6: # %entry
+; CHECK-NEXT:    mv a1, a3
+; CHECK-NEXT:    fcvt.lu.s a4, fa2, rtz
+; CHECK-NEXT:    bltu a2, a3, .LBB31_2
 ; CHECK-NEXT:  .LBB31_7: # %entry
 ; CHECK-NEXT:    mv a2, a3
-; CHECK-NEXT:    fcvt.lu.s a5, ft1, rtz
-; CHECK-NEXT:    bltu a4, a3, .LBB31_4
+; CHECK-NEXT:    fcvt.lu.s a5, fa3, rtz
+; CHECK-NEXT:    bltu a4, a3, .LBB31_3
 ; CHECK-NEXT:  .LBB31_8: # %entry
 ; CHECK-NEXT:    mv a4, a3
-; CHECK-NEXT:    bgeu a5, a3, .LBB31_5
-; CHECK-NEXT:    j .LBB31_6
+; CHECK-NEXT:    bgeu a5, a3, .LBB31_4
+; CHECK-NEXT:    j .LBB31_5
 entry:
   %conv = fptoui <4 x float> %x to <4 x i64>
   %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
@@ -2259,61 +2286,59 @@ entry:
 define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
 ; CHECK-LABEL: ustest_f32i32_mm:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a4
-; CHECK-NEXT:    fmv.w.x ft2, a3
-; CHECK-NEXT:    fmv.w.x ft0, a2
-; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    li a3, -1
-; CHECK-NEXT:    srli a5, a3, 32
-; CHECK-NEXT:    fcvt.l.s a3, ft2, rtz
-; CHECK-NEXT:    blt a2, a5, .LBB32_2
+; CHECK-NEXT:    fcvt.l.s a1, fa3, rtz
+; CHECK-NEXT:    li a2, -1
+; CHECK-NEXT:    srli a5, a2, 32
+; CHECK-NEXT:    fcvt.l.s a2, fa2, rtz
+; CHECK-NEXT:    bge a1, a5, .LBB32_10
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a2, a5
+; CHECK-NEXT:    fcvt.l.s a3, fa1, rtz
+; CHECK-NEXT:    bge a2, a5, .LBB32_11
 ; CHECK-NEXT:  .LBB32_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a1
-; CHECK-NEXT:    fcvt.l.s a1, ft0, rtz
-; CHECK-NEXT:    bge a3, a5, .LBB32_11
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.l.s a4, ft1, rtz
-; CHECK-NEXT:    bge a1, a5, .LBB32_12
-; CHECK-NEXT:  .LBB32_4: # %entry
+; CHECK-NEXT:    fcvt.l.s a4, fa0, rtz
+; CHECK-NEXT:    bge a3, a5, .LBB32_12
+; CHECK-NEXT:  .LBB32_3: # %entry
 ; CHECK-NEXT:    bge a4, a5, .LBB32_13
-; CHECK-NEXT:  .LBB32_5: # %entry
+; CHECK-NEXT:  .LBB32_4: # %entry
 ; CHECK-NEXT:    blez a4, .LBB32_14
+; CHECK-NEXT:  .LBB32_5: # %entry
+; CHECK-NEXT:    blez a3, .LBB32_15
 ; CHECK-NEXT:  .LBB32_6: # %entry
-; CHECK-NEXT:    blez a1, .LBB32_15
+; CHECK-NEXT:    blez a2, .LBB32_16
 ; CHECK-NEXT:  .LBB32_7: # %entry
-; CHECK-NEXT:    blez a3, .LBB32_16
+; CHECK-NEXT:    bgtz a1, .LBB32_9
 ; CHECK-NEXT:  .LBB32_8: # %entry
-; CHECK-NEXT:    bgtz a2, .LBB32_10
+; CHECK-NEXT:    li a1, 0
 ; CHECK-NEXT:  .LBB32_9: # %entry
-; CHECK-NEXT:    li a2, 0
-; CHECK-NEXT:  .LBB32_10: # %entry
-; CHECK-NEXT:    sw a2, 12(a0)
-; CHECK-NEXT:    sw a3, 8(a0)
-; CHECK-NEXT:    sw a1, 4(a0)
+; CHECK-NEXT:    sw a1, 12(a0)
+; CHECK-NEXT:    sw a2, 8(a0)
+; CHECK-NEXT:    sw a3, 4(a0)
 ; CHECK-NEXT:    sw a4, 0(a0)
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB32_10: # %entry
+; CHECK-NEXT:    mv a1, a5
+; CHECK-NEXT:    fcvt.l.s a3, fa1, rtz
+; CHECK-NEXT:    blt a2, a5, .LBB32_2
 ; CHECK-NEXT:  .LBB32_11: # %entry
-; CHECK-NEXT:    mv a3, a5
-; CHECK-NEXT:    fcvt.l.s a4, ft1, rtz
-; CHECK-NEXT:    blt a1, a5, .LBB32_4
+; CHECK-NEXT:    mv a2, a5
+; CHECK-NEXT:    fcvt.l.s a4, fa0, rtz
+; CHECK-NEXT:    blt a3, a5, .LBB32_3
 ; CHECK-NEXT:  .LBB32_12: # %entry
-; CHECK-NEXT:    mv a1, a5
-; CHECK-NEXT:    blt a4, a5, .LBB32_5
+; CHECK-NEXT:    mv a3, a5
+; CHECK-NEXT:    blt a4, a5, .LBB32_4
 ; CHECK-NEXT:  .LBB32_13: # %entry
 ; CHECK-NEXT:    mv a4, a5
-; CHECK-NEXT:    bgtz a4, .LBB32_6
+; CHECK-NEXT:    bgtz a4, .LBB32_5
 ; CHECK-NEXT:  .LBB32_14: # %entry
 ; CHECK-NEXT:    li a4, 0
-; CHECK-NEXT:    bgtz a1, .LBB32_7
+; CHECK-NEXT:    bgtz a3, .LBB32_6
 ; CHECK-NEXT:  .LBB32_15: # %entry
-; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    bgtz a3, .LBB32_8
-; CHECK-NEXT:  .LBB32_16: # %entry
 ; CHECK-NEXT:    li a3, 0
-; CHECK-NEXT:    blez a2, .LBB32_9
-; CHECK-NEXT:    j .LBB32_10
+; CHECK-NEXT:    bgtz a2, .LBB32_7
+; CHECK-NEXT:  .LBB32_16: # %entry
+; CHECK-NEXT:    li a2, 0
+; CHECK-NEXT:    blez a1, .LBB32_8
+; CHECK-NEXT:    j .LBB32_9
 entry:
   %conv = fptosi <4 x float> %x to <4 x i64>
   %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
@@ -2332,63 +2357,59 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s2, 32(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s3, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs2, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
 ; CHECK-NEXT:    .cfi_offset s3, -40
-; CHECK-NEXT:    .cfi_offset s4, -48
-; CHECK-NEXT:    lhu s2, 24(a1)
-; CHECK-NEXT:    lhu s1, 0(a1)
+; CHECK-NEXT:    .cfi_offset fs0, -48
+; CHECK-NEXT:    .cfi_offset fs1, -56
+; CHECK-NEXT:    .cfi_offset fs2, -64
+; CHECK-NEXT:    lhu s1, 24(a1)
+; CHECK-NEXT:    lhu s2, 0(a1)
 ; CHECK-NEXT:    lhu s3, 8(a1)
 ; CHECK-NEXT:    lhu a1, 16(a1)
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s4, a0
+; CHECK-NEXT:    fmv.s fs2, fa0
 ; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s3, a0
-; CHECK-NEXT:    mv a0, s1
-; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s1, a0
-; CHECK-NEXT:    fmv.w.x ft0, s3
-; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s4
-; CHECK-NEXT:    fcvt.l.s s3, ft0, rtz
+; CHECK-NEXT:    fmv.s fs1, fa0
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    fcvt.l.s a0, ft0, rtz
+; CHECK-NEXT:    fmv.s fs0, fa0
+; CHECK-NEXT:    fcvt.l.s s2, fs2, rtz
+; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    call __extendhfsf2 at plt
+; CHECK-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-NEXT:    lui a1, 524288
 ; CHECK-NEXT:    addiw a4, a1, -1
-; CHECK-NEXT:    blt a0, a4, .LBB33_2
+; CHECK-NEXT:    bge a0, a4, .LBB33_10
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a0, a4
+; CHECK-NEXT:    fcvt.l.s a2, fs1, rtz
+; CHECK-NEXT:    bge s2, a4, .LBB33_11
 ; CHECK-NEXT:  .LBB33_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s1
-; CHECK-NEXT:    flw ft1, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    bge s3, a4, .LBB33_11
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.l.s a3, ft0, rtz
+; CHECK-NEXT:    fcvt.l.s a3, fs0, rtz
 ; CHECK-NEXT:    bge a2, a4, .LBB33_12
-; CHECK-NEXT:  .LBB33_4: # %entry
+; CHECK-NEXT:  .LBB33_3: # %entry
 ; CHECK-NEXT:    bge a3, a4, .LBB33_13
-; CHECK-NEXT:  .LBB33_5: # %entry
+; CHECK-NEXT:  .LBB33_4: # %entry
 ; CHECK-NEXT:    bge a1, a3, .LBB33_14
-; CHECK-NEXT:  .LBB33_6: # %entry
+; CHECK-NEXT:  .LBB33_5: # %entry
 ; CHECK-NEXT:    bge a1, a2, .LBB33_15
+; CHECK-NEXT:  .LBB33_6: # %entry
+; CHECK-NEXT:    bge a1, s2, .LBB33_16
 ; CHECK-NEXT:  .LBB33_7: # %entry
-; CHECK-NEXT:    bge a1, s3, .LBB33_16
+; CHECK-NEXT:    blt a1, a0, .LBB33_9
 ; CHECK-NEXT:  .LBB33_8: # %entry
-; CHECK-NEXT:    blt a1, a0, .LBB33_10
-; CHECK-NEXT:  .LBB33_9: # %entry
 ; CHECK-NEXT:    lui a0, 524288
-; CHECK-NEXT:  .LBB33_10: # %entry
+; CHECK-NEXT:  .LBB33_9: # %entry
 ; CHECK-NEXT:    sw a0, 12(s0)
-; CHECK-NEXT:    sw s3, 8(s0)
+; CHECK-NEXT:    sw s2, 8(s0)
 ; CHECK-NEXT:    sw a2, 4(s0)
 ; CHECK-NEXT:    sw a3, 0(s0)
 ; CHECK-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
@@ -2396,29 +2417,35 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs2, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 64
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB33_10: # %entry
+; CHECK-NEXT:    mv a0, a4
+; CHECK-NEXT:    fcvt.l.s a2, fs1, rtz
+; CHECK-NEXT:    blt s2, a4, .LBB33_2
 ; CHECK-NEXT:  .LBB33_11: # %entry
-; CHECK-NEXT:    mv s3, a4
-; CHECK-NEXT:    fcvt.l.s a3, ft0, rtz
-; CHECK-NEXT:    blt a2, a4, .LBB33_4
+; CHECK-NEXT:    mv s2, a4
+; CHECK-NEXT:    fcvt.l.s a3, fs0, rtz
+; CHECK-NEXT:    blt a2, a4, .LBB33_3
 ; CHECK-NEXT:  .LBB33_12: # %entry
 ; CHECK-NEXT:    mv a2, a4
-; CHECK-NEXT:    blt a3, a4, .LBB33_5
+; CHECK-NEXT:    blt a3, a4, .LBB33_4
 ; CHECK-NEXT:  .LBB33_13: # %entry
 ; CHECK-NEXT:    mv a3, a4
-; CHECK-NEXT:    blt a1, a3, .LBB33_6
+; CHECK-NEXT:    blt a1, a3, .LBB33_5
 ; CHECK-NEXT:  .LBB33_14: # %entry
 ; CHECK-NEXT:    lui a3, 524288
-; CHECK-NEXT:    blt a1, a2, .LBB33_7
+; CHECK-NEXT:    blt a1, a2, .LBB33_6
 ; CHECK-NEXT:  .LBB33_15: # %entry
 ; CHECK-NEXT:    lui a2, 524288
-; CHECK-NEXT:    blt a1, s3, .LBB33_8
+; CHECK-NEXT:    blt a1, s2, .LBB33_7
 ; CHECK-NEXT:  .LBB33_16: # %entry
-; CHECK-NEXT:    lui s3, 524288
-; CHECK-NEXT:    bge a1, a0, .LBB33_9
-; CHECK-NEXT:    j .LBB33_10
+; CHECK-NEXT:    lui s2, 524288
+; CHECK-NEXT:    bge a1, a0, .LBB33_8
+; CHECK-NEXT:    j .LBB33_9
 entry:
   %conv = fptosi <4 x half> %x to <4 x i64>
   %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
@@ -2437,73 +2464,75 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s2, 32(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s3, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs2, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
 ; CHECK-NEXT:    .cfi_offset s3, -40
-; CHECK-NEXT:    .cfi_offset s4, -48
-; CHECK-NEXT:    lhu s2, 0(a1)
-; CHECK-NEXT:    lhu s1, 24(a1)
+; CHECK-NEXT:    .cfi_offset fs0, -48
+; CHECK-NEXT:    .cfi_offset fs1, -56
+; CHECK-NEXT:    .cfi_offset fs2, -64
+; CHECK-NEXT:    lhu s1, 0(a1)
+; CHECK-NEXT:    lhu s2, 24(a1)
 ; CHECK-NEXT:    lhu s3, 16(a1)
 ; CHECK-NEXT:    lhu a1, 8(a1)
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s4, a0
+; CHECK-NEXT:    fmv.s fs2, fa0
 ; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s3, a0
-; CHECK-NEXT:    mv a0, s1
-; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s1, a0
-; CHECK-NEXT:    fmv.w.x ft0, s3
-; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s4
-; CHECK-NEXT:    fcvt.lu.s s3, ft0, rtz
+; CHECK-NEXT:    fmv.s fs1, fa0
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    fcvt.lu.s a0, ft0, rtz
+; CHECK-NEXT:    fmv.s fs0, fa0
+; CHECK-NEXT:    fcvt.lu.s s2, fs2, rtz
+; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    call __extendhfsf2 at plt
+; CHECK-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; CHECK-NEXT:    li a1, -1
 ; CHECK-NEXT:    srli a1, a1, 32
-; CHECK-NEXT:    bltu a0, a1, .LBB34_2
+; CHECK-NEXT:    bgeu a0, a1, .LBB34_6
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    fcvt.lu.s a2, fs1, rtz
+; CHECK-NEXT:    bgeu s2, a1, .LBB34_7
 ; CHECK-NEXT:  .LBB34_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s1
-; CHECK-NEXT:    flw ft1, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    fcvt.lu.s a2, ft1, rtz
-; CHECK-NEXT:    bgeu s3, a1, .LBB34_7
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.lu.s a3, ft0, rtz
+; CHECK-NEXT:    fcvt.lu.s a3, fs0, rtz
 ; CHECK-NEXT:    bgeu a2, a1, .LBB34_8
+; CHECK-NEXT:  .LBB34_3: # %entry
+; CHECK-NEXT:    bltu a3, a1, .LBB34_5
 ; CHECK-NEXT:  .LBB34_4: # %entry
-; CHECK-NEXT:    bltu a3, a1, .LBB34_6
-; CHECK-NEXT:  .LBB34_5: # %entry
 ; CHECK-NEXT:    mv a3, a1
-; CHECK-NEXT:  .LBB34_6: # %entry
+; CHECK-NEXT:  .LBB34_5: # %entry
 ; CHECK-NEXT:    sw a3, 12(s0)
 ; CHECK-NEXT:    sw a2, 8(s0)
-; CHECK-NEXT:    sw s3, 4(s0)
+; CHECK-NEXT:    sw s2, 4(s0)
 ; CHECK-NEXT:    sw a0, 0(s0)
 ; CHECK-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs2, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 64
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB34_6: # %entry
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    fcvt.lu.s a2, fs1, rtz
+; CHECK-NEXT:    bltu s2, a1, .LBB34_2
 ; CHECK-NEXT:  .LBB34_7: # %entry
-; CHECK-NEXT:    mv s3, a1
-; CHECK-NEXT:    fcvt.lu.s a3, ft0, rtz
-; CHECK-NEXT:    bltu a2, a1, .LBB34_4
+; CHECK-NEXT:    mv s2, a1
+; CHECK-NEXT:    fcvt.lu.s a3, fs0, rtz
+; CHECK-NEXT:    bltu a2, a1, .LBB34_3
 ; CHECK-NEXT:  .LBB34_8: # %entry
 ; CHECK-NEXT:    mv a2, a1
-; CHECK-NEXT:    bgeu a3, a1, .LBB34_5
-; CHECK-NEXT:    j .LBB34_6
+; CHECK-NEXT:    bgeu a3, a1, .LBB34_4
+; CHECK-NEXT:    j .LBB34_5
 entry:
   %conv = fptoui <4 x half> %x to <4 x i64>
   %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
@@ -2521,63 +2550,59 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s2, 32(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s3, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs2, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
 ; CHECK-NEXT:    .cfi_offset s3, -40
-; CHECK-NEXT:    .cfi_offset s4, -48
-; CHECK-NEXT:    lhu s2, 24(a1)
-; CHECK-NEXT:    lhu s1, 0(a1)
+; CHECK-NEXT:    .cfi_offset fs0, -48
+; CHECK-NEXT:    .cfi_offset fs1, -56
+; CHECK-NEXT:    .cfi_offset fs2, -64
+; CHECK-NEXT:    lhu s1, 24(a1)
+; CHECK-NEXT:    lhu s2, 0(a1)
 ; CHECK-NEXT:    lhu s3, 8(a1)
 ; CHECK-NEXT:    lhu a1, 16(a1)
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s4, a0
+; CHECK-NEXT:    fmv.s fs2, fa0
 ; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s3, a0
-; CHECK-NEXT:    mv a0, s1
-; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s1, a0
-; CHECK-NEXT:    fmv.w.x ft0, s3
-; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s4
-; CHECK-NEXT:    fcvt.l.s s3, ft0, rtz
+; CHECK-NEXT:    fmv.s fs1, fa0
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    fcvt.l.s a0, ft0, rtz
+; CHECK-NEXT:    fmv.s fs0, fa0
+; CHECK-NEXT:    fcvt.l.s s2, fs2, rtz
+; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    call __extendhfsf2 at plt
+; CHECK-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-NEXT:    li a1, -1
 ; CHECK-NEXT:    srli a3, a1, 32
-; CHECK-NEXT:    blt a0, a3, .LBB35_2
+; CHECK-NEXT:    bge a0, a3, .LBB35_10
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a0, a3
+; CHECK-NEXT:    fcvt.l.s a1, fs1, rtz
+; CHECK-NEXT:    bge s2, a3, .LBB35_11
 ; CHECK-NEXT:  .LBB35_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s1
-; CHECK-NEXT:    flw ft1, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    fcvt.l.s a1, ft1, rtz
-; CHECK-NEXT:    bge s3, a3, .LBB35_11
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.l.s a2, ft0, rtz
+; CHECK-NEXT:    fcvt.l.s a2, fs0, rtz
 ; CHECK-NEXT:    bge a1, a3, .LBB35_12
-; CHECK-NEXT:  .LBB35_4: # %entry
+; CHECK-NEXT:  .LBB35_3: # %entry
 ; CHECK-NEXT:    bge a2, a3, .LBB35_13
-; CHECK-NEXT:  .LBB35_5: # %entry
+; CHECK-NEXT:  .LBB35_4: # %entry
 ; CHECK-NEXT:    blez a2, .LBB35_14
-; CHECK-NEXT:  .LBB35_6: # %entry
+; CHECK-NEXT:  .LBB35_5: # %entry
 ; CHECK-NEXT:    blez a1, .LBB35_15
+; CHECK-NEXT:  .LBB35_6: # %entry
+; CHECK-NEXT:    blez s2, .LBB35_16
 ; CHECK-NEXT:  .LBB35_7: # %entry
-; CHECK-NEXT:    blez s3, .LBB35_16
+; CHECK-NEXT:    bgtz a0, .LBB35_9
 ; CHECK-NEXT:  .LBB35_8: # %entry
-; CHECK-NEXT:    bgtz a0, .LBB35_10
-; CHECK-NEXT:  .LBB35_9: # %entry
 ; CHECK-NEXT:    li a0, 0
-; CHECK-NEXT:  .LBB35_10: # %entry
+; CHECK-NEXT:  .LBB35_9: # %entry
 ; CHECK-NEXT:    sw a0, 12(s0)
-; CHECK-NEXT:    sw s3, 8(s0)
+; CHECK-NEXT:    sw s2, 8(s0)
 ; CHECK-NEXT:    sw a1, 4(s0)
 ; CHECK-NEXT:    sw a2, 0(s0)
 ; CHECK-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
@@ -2585,29 +2610,35 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs2, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 64
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB35_10: # %entry
+; CHECK-NEXT:    mv a0, a3
+; CHECK-NEXT:    fcvt.l.s a1, fs1, rtz
+; CHECK-NEXT:    blt s2, a3, .LBB35_2
 ; CHECK-NEXT:  .LBB35_11: # %entry
-; CHECK-NEXT:    mv s3, a3
-; CHECK-NEXT:    fcvt.l.s a2, ft0, rtz
-; CHECK-NEXT:    blt a1, a3, .LBB35_4
+; CHECK-NEXT:    mv s2, a3
+; CHECK-NEXT:    fcvt.l.s a2, fs0, rtz
+; CHECK-NEXT:    blt a1, a3, .LBB35_3
 ; CHECK-NEXT:  .LBB35_12: # %entry
 ; CHECK-NEXT:    mv a1, a3
-; CHECK-NEXT:    blt a2, a3, .LBB35_5
+; CHECK-NEXT:    blt a2, a3, .LBB35_4
 ; CHECK-NEXT:  .LBB35_13: # %entry
 ; CHECK-NEXT:    mv a2, a3
-; CHECK-NEXT:    bgtz a2, .LBB35_6
+; CHECK-NEXT:    bgtz a2, .LBB35_5
 ; CHECK-NEXT:  .LBB35_14: # %entry
 ; CHECK-NEXT:    li a2, 0
-; CHECK-NEXT:    bgtz a1, .LBB35_7
+; CHECK-NEXT:    bgtz a1, .LBB35_6
 ; CHECK-NEXT:  .LBB35_15: # %entry
 ; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    bgtz s3, .LBB35_8
+; CHECK-NEXT:    bgtz s2, .LBB35_7
 ; CHECK-NEXT:  .LBB35_16: # %entry
-; CHECK-NEXT:    li s3, 0
-; CHECK-NEXT:    blez a0, .LBB35_9
-; CHECK-NEXT:    j .LBB35_10
+; CHECK-NEXT:    li s2, 0
+; CHECK-NEXT:    blez a0, .LBB35_8
+; CHECK-NEXT:    j .LBB35_9
 entry:
   %conv = fptosi <4 x half> %x to <4 x i64>
   %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
@@ -2621,12 +2652,10 @@ entry:
 define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
 ; CHECK-LABEL: stest_f64i16_mm:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a1
-; CHECK-NEXT:    fmv.d.x ft1, a0
-; CHECK-NEXT:    fcvt.w.d a1, ft0, rtz
+; CHECK-NEXT:    fcvt.w.d a1, fa1, rtz
 ; CHECK-NEXT:    lui a0, 8
 ; CHECK-NEXT:    addiw a2, a0, -1
-; CHECK-NEXT:    fcvt.w.d a0, ft1, rtz
+; CHECK-NEXT:    fcvt.w.d a0, fa0, rtz
 ; CHECK-NEXT:    bge a1, a2, .LBB36_5
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    bge a0, a2, .LBB36_6
@@ -2661,12 +2690,10 @@ entry:
 define <2 x i16> @utest_f64i16_mm(<2 x double> %x) {
 ; CHECK-LABEL: utest_f64i16_mm:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    fmv.d.x ft1, a1
-; CHECK-NEXT:    fcvt.wu.d a0, ft0, rtz
+; CHECK-NEXT:    fcvt.wu.d a0, fa0, rtz
 ; CHECK-NEXT:    lui a1, 16
 ; CHECK-NEXT:    addiw a2, a1, -1
-; CHECK-NEXT:    fcvt.wu.d a1, ft1, rtz
+; CHECK-NEXT:    fcvt.wu.d a1, fa1, rtz
 ; CHECK-NEXT:    bgeu a0, a2, .LBB37_3
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    bgeu a1, a2, .LBB37_4
@@ -2688,12 +2715,10 @@ entry:
 define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
 ; CHECK-LABEL: ustest_f64i16_mm:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a1
-; CHECK-NEXT:    fmv.d.x ft1, a0
-; CHECK-NEXT:    fcvt.w.d a1, ft0, rtz
+; CHECK-NEXT:    fcvt.w.d a1, fa1, rtz
 ; CHECK-NEXT:    lui a0, 16
 ; CHECK-NEXT:    addiw a2, a0, -1
-; CHECK-NEXT:    fcvt.w.d a0, ft1, rtz
+; CHECK-NEXT:    fcvt.w.d a0, fa0, rtz
 ; CHECK-NEXT:    bge a1, a2, .LBB38_5
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    bge a0, a2, .LBB38_6
@@ -2726,63 +2751,61 @@ entry:
 define <4 x i16> @stest_f32i16_mm(<4 x float> %x) {
 ; CHECK-LABEL: stest_f32i16_mm:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a4
-; CHECK-NEXT:    fmv.w.x ft2, a3
-; CHECK-NEXT:    fmv.w.x ft0, a2
-; CHECK-NEXT:    fcvt.w.s a2, ft1, rtz
-; CHECK-NEXT:    lui a3, 8
-; CHECK-NEXT:    addiw a5, a3, -1
-; CHECK-NEXT:    fcvt.w.s a3, ft2, rtz
-; CHECK-NEXT:    blt a2, a5, .LBB39_2
+; CHECK-NEXT:    fcvt.w.s a1, fa3, rtz
+; CHECK-NEXT:    lui a2, 8
+; CHECK-NEXT:    addiw a5, a2, -1
+; CHECK-NEXT:    fcvt.w.s a2, fa2, rtz
+; CHECK-NEXT:    bge a1, a5, .LBB39_10
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a2, a5
+; CHECK-NEXT:    fcvt.w.s a3, fa1, rtz
+; CHECK-NEXT:    bge a2, a5, .LBB39_11
 ; CHECK-NEXT:  .LBB39_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a1
-; CHECK-NEXT:    fcvt.w.s a1, ft0, rtz
-; CHECK-NEXT:    bge a3, a5, .LBB39_11
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.w.s a4, ft1, rtz
-; CHECK-NEXT:    bge a1, a5, .LBB39_12
-; CHECK-NEXT:  .LBB39_4: # %entry
+; CHECK-NEXT:    fcvt.w.s a4, fa0, rtz
+; CHECK-NEXT:    bge a3, a5, .LBB39_12
+; CHECK-NEXT:  .LBB39_3: # %entry
 ; CHECK-NEXT:    bge a4, a5, .LBB39_13
-; CHECK-NEXT:  .LBB39_5: # %entry
+; CHECK-NEXT:  .LBB39_4: # %entry
 ; CHECK-NEXT:    lui a5, 1048568
 ; CHECK-NEXT:    bge a5, a4, .LBB39_14
+; CHECK-NEXT:  .LBB39_5: # %entry
+; CHECK-NEXT:    bge a5, a3, .LBB39_15
 ; CHECK-NEXT:  .LBB39_6: # %entry
-; CHECK-NEXT:    bge a5, a1, .LBB39_15
+; CHECK-NEXT:    bge a5, a2, .LBB39_16
 ; CHECK-NEXT:  .LBB39_7: # %entry
-; CHECK-NEXT:    bge a5, a3, .LBB39_16
+; CHECK-NEXT:    blt a5, a1, .LBB39_9
 ; CHECK-NEXT:  .LBB39_8: # %entry
-; CHECK-NEXT:    blt a5, a2, .LBB39_10
+; CHECK-NEXT:    lui a1, 1048568
 ; CHECK-NEXT:  .LBB39_9: # %entry
-; CHECK-NEXT:    lui a2, 1048568
-; CHECK-NEXT:  .LBB39_10: # %entry
-; CHECK-NEXT:    sh a2, 6(a0)
-; CHECK-NEXT:    sh a3, 4(a0)
-; CHECK-NEXT:    sh a1, 2(a0)
+; CHECK-NEXT:    sh a1, 6(a0)
+; CHECK-NEXT:    sh a2, 4(a0)
+; CHECK-NEXT:    sh a3, 2(a0)
 ; CHECK-NEXT:    sh a4, 0(a0)
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB39_10: # %entry
+; CHECK-NEXT:    mv a1, a5
+; CHECK-NEXT:    fcvt.w.s a3, fa1, rtz
+; CHECK-NEXT:    blt a2, a5, .LBB39_2
 ; CHECK-NEXT:  .LBB39_11: # %entry
-; CHECK-NEXT:    mv a3, a5
-; CHECK-NEXT:    fcvt.w.s a4, ft1, rtz
-; CHECK-NEXT:    blt a1, a5, .LBB39_4
+; CHECK-NEXT:    mv a2, a5
+; CHECK-NEXT:    fcvt.w.s a4, fa0, rtz
+; CHECK-NEXT:    blt a3, a5, .LBB39_3
 ; CHECK-NEXT:  .LBB39_12: # %entry
-; CHECK-NEXT:    mv a1, a5
-; CHECK-NEXT:    blt a4, a5, .LBB39_5
+; CHECK-NEXT:    mv a3, a5
+; CHECK-NEXT:    blt a4, a5, .LBB39_4
 ; CHECK-NEXT:  .LBB39_13: # %entry
 ; CHECK-NEXT:    mv a4, a5
 ; CHECK-NEXT:    lui a5, 1048568
-; CHECK-NEXT:    blt a5, a4, .LBB39_6
+; CHECK-NEXT:    blt a5, a4, .LBB39_5
 ; CHECK-NEXT:  .LBB39_14: # %entry
 ; CHECK-NEXT:    lui a4, 1048568
-; CHECK-NEXT:    blt a5, a1, .LBB39_7
+; CHECK-NEXT:    blt a5, a3, .LBB39_6
 ; CHECK-NEXT:  .LBB39_15: # %entry
-; CHECK-NEXT:    lui a1, 1048568
-; CHECK-NEXT:    blt a5, a3, .LBB39_8
-; CHECK-NEXT:  .LBB39_16: # %entry
 ; CHECK-NEXT:    lui a3, 1048568
-; CHECK-NEXT:    bge a5, a2, .LBB39_9
-; CHECK-NEXT:    j .LBB39_10
+; CHECK-NEXT:    blt a5, a2, .LBB39_7
+; CHECK-NEXT:  .LBB39_16: # %entry
+; CHECK-NEXT:    lui a2, 1048568
+; CHECK-NEXT:    bge a5, a1, .LBB39_8
+; CHECK-NEXT:    j .LBB39_9
 entry:
   %conv = fptosi <4 x float> %x to <4 x i32>
   %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
@@ -2794,41 +2817,39 @@ entry:
 define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
 ; CHECK-LABEL: utest_f32i16_mm:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a1
-; CHECK-NEXT:    fmv.w.x ft2, a2
-; CHECK-NEXT:    fmv.w.x ft0, a3
-; CHECK-NEXT:    fcvt.wu.s a1, ft1, rtz
+; CHECK-NEXT:    fcvt.wu.s a1, fa0, rtz
 ; CHECK-NEXT:    lui a2, 16
 ; CHECK-NEXT:    addiw a3, a2, -1
-; CHECK-NEXT:    fcvt.wu.s a2, ft2, rtz
-; CHECK-NEXT:    bltu a1, a3, .LBB40_2
+; CHECK-NEXT:    fcvt.wu.s a2, fa1, rtz
+; CHECK-NEXT:    bgeu a1, a3, .LBB40_6
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a1, a3
-; CHECK-NEXT:  .LBB40_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a4
-; CHECK-NEXT:    fcvt.wu.s a4, ft0, rtz
+; CHECK-NEXT:    fcvt.wu.s a4, fa2, rtz
 ; CHECK-NEXT:    bgeu a2, a3, .LBB40_7
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.wu.s a5, ft1, rtz
+; CHECK-NEXT:  .LBB40_2: # %entry
+; CHECK-NEXT:    fcvt.wu.s a5, fa3, rtz
 ; CHECK-NEXT:    bgeu a4, a3, .LBB40_8
+; CHECK-NEXT:  .LBB40_3: # %entry
+; CHECK-NEXT:    bltu a5, a3, .LBB40_5
 ; CHECK-NEXT:  .LBB40_4: # %entry
-; CHECK-NEXT:    bltu a5, a3, .LBB40_6
-; CHECK-NEXT:  .LBB40_5: # %entry
 ; CHECK-NEXT:    mv a5, a3
-; CHECK-NEXT:  .LBB40_6: # %entry
+; CHECK-NEXT:  .LBB40_5: # %entry
 ; CHECK-NEXT:    sh a5, 6(a0)
 ; CHECK-NEXT:    sh a4, 4(a0)
 ; CHECK-NEXT:    sh a2, 2(a0)
 ; CHECK-NEXT:    sh a1, 0(a0)
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB40_6: # %entry
+; CHECK-NEXT:    mv a1, a3
+; CHECK-NEXT:    fcvt.wu.s a4, fa2, rtz
+; CHECK-NEXT:    bltu a2, a3, .LBB40_2
 ; CHECK-NEXT:  .LBB40_7: # %entry
 ; CHECK-NEXT:    mv a2, a3
-; CHECK-NEXT:    fcvt.wu.s a5, ft1, rtz
-; CHECK-NEXT:    bltu a4, a3, .LBB40_4
+; CHECK-NEXT:    fcvt.wu.s a5, fa3, rtz
+; CHECK-NEXT:    bltu a4, a3, .LBB40_3
 ; CHECK-NEXT:  .LBB40_8: # %entry
 ; CHECK-NEXT:    mv a4, a3
-; CHECK-NEXT:    bgeu a5, a3, .LBB40_5
-; CHECK-NEXT:    j .LBB40_6
+; CHECK-NEXT:    bgeu a5, a3, .LBB40_4
+; CHECK-NEXT:    j .LBB40_5
 entry:
   %conv = fptoui <4 x float> %x to <4 x i32>
   %spec.store.select = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
@@ -2839,61 +2860,59 @@ entry:
 define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
 ; CHECK-LABEL: ustest_f32i16_mm:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a4
-; CHECK-NEXT:    fmv.w.x ft2, a3
-; CHECK-NEXT:    fmv.w.x ft0, a2
-; CHECK-NEXT:    fcvt.w.s a2, ft1, rtz
-; CHECK-NEXT:    lui a3, 16
-; CHECK-NEXT:    addiw a5, a3, -1
-; CHECK-NEXT:    fcvt.w.s a3, ft2, rtz
-; CHECK-NEXT:    blt a2, a5, .LBB41_2
+; CHECK-NEXT:    fcvt.w.s a1, fa3, rtz
+; CHECK-NEXT:    lui a2, 16
+; CHECK-NEXT:    addiw a5, a2, -1
+; CHECK-NEXT:    fcvt.w.s a2, fa2, rtz
+; CHECK-NEXT:    bge a1, a5, .LBB41_10
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a2, a5
+; CHECK-NEXT:    fcvt.w.s a3, fa1, rtz
+; CHECK-NEXT:    bge a2, a5, .LBB41_11
 ; CHECK-NEXT:  .LBB41_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, a1
-; CHECK-NEXT:    fcvt.w.s a1, ft0, rtz
-; CHECK-NEXT:    bge a3, a5, .LBB41_11
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.w.s a4, ft1, rtz
-; CHECK-NEXT:    bge a1, a5, .LBB41_12
-; CHECK-NEXT:  .LBB41_4: # %entry
+; CHECK-NEXT:    fcvt.w.s a4, fa0, rtz
+; CHECK-NEXT:    bge a3, a5, .LBB41_12
+; CHECK-NEXT:  .LBB41_3: # %entry
 ; CHECK-NEXT:    bge a4, a5, .LBB41_13
-; CHECK-NEXT:  .LBB41_5: # %entry
+; CHECK-NEXT:  .LBB41_4: # %entry
 ; CHECK-NEXT:    blez a4, .LBB41_14
+; CHECK-NEXT:  .LBB41_5: # %entry
+; CHECK-NEXT:    blez a3, .LBB41_15
 ; CHECK-NEXT:  .LBB41_6: # %entry
-; CHECK-NEXT:    blez a1, .LBB41_15
+; CHECK-NEXT:    blez a2, .LBB41_16
 ; CHECK-NEXT:  .LBB41_7: # %entry
-; CHECK-NEXT:    blez a3, .LBB41_16
+; CHECK-NEXT:    bgtz a1, .LBB41_9
 ; CHECK-NEXT:  .LBB41_8: # %entry
-; CHECK-NEXT:    bgtz a2, .LBB41_10
+; CHECK-NEXT:    li a1, 0
 ; CHECK-NEXT:  .LBB41_9: # %entry
-; CHECK-NEXT:    li a2, 0
-; CHECK-NEXT:  .LBB41_10: # %entry
-; CHECK-NEXT:    sh a2, 6(a0)
-; CHECK-NEXT:    sh a3, 4(a0)
-; CHECK-NEXT:    sh a1, 2(a0)
+; CHECK-NEXT:    sh a1, 6(a0)
+; CHECK-NEXT:    sh a2, 4(a0)
+; CHECK-NEXT:    sh a3, 2(a0)
 ; CHECK-NEXT:    sh a4, 0(a0)
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB41_10: # %entry
+; CHECK-NEXT:    mv a1, a5
+; CHECK-NEXT:    fcvt.w.s a3, fa1, rtz
+; CHECK-NEXT:    blt a2, a5, .LBB41_2
 ; CHECK-NEXT:  .LBB41_11: # %entry
-; CHECK-NEXT:    mv a3, a5
-; CHECK-NEXT:    fcvt.w.s a4, ft1, rtz
-; CHECK-NEXT:    blt a1, a5, .LBB41_4
+; CHECK-NEXT:    mv a2, a5
+; CHECK-NEXT:    fcvt.w.s a4, fa0, rtz
+; CHECK-NEXT:    blt a3, a5, .LBB41_3
 ; CHECK-NEXT:  .LBB41_12: # %entry
-; CHECK-NEXT:    mv a1, a5
-; CHECK-NEXT:    blt a4, a5, .LBB41_5
+; CHECK-NEXT:    mv a3, a5
+; CHECK-NEXT:    blt a4, a5, .LBB41_4
 ; CHECK-NEXT:  .LBB41_13: # %entry
 ; CHECK-NEXT:    mv a4, a5
-; CHECK-NEXT:    bgtz a4, .LBB41_6
+; CHECK-NEXT:    bgtz a4, .LBB41_5
 ; CHECK-NEXT:  .LBB41_14: # %entry
 ; CHECK-NEXT:    li a4, 0
-; CHECK-NEXT:    bgtz a1, .LBB41_7
+; CHECK-NEXT:    bgtz a3, .LBB41_6
 ; CHECK-NEXT:  .LBB41_15: # %entry
-; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    bgtz a3, .LBB41_8
-; CHECK-NEXT:  .LBB41_16: # %entry
 ; CHECK-NEXT:    li a3, 0
-; CHECK-NEXT:    blez a2, .LBB41_9
-; CHECK-NEXT:    j .LBB41_10
+; CHECK-NEXT:    bgtz a2, .LBB41_7
+; CHECK-NEXT:  .LBB41_16: # %entry
+; CHECK-NEXT:    li a2, 0
+; CHECK-NEXT:    blez a1, .LBB41_8
+; CHECK-NEXT:    j .LBB41_9
 entry:
   %conv = fptosi <4 x float> %x to <4 x i32>
   %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
@@ -2905,18 +2924,24 @@ entry:
 define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
 ; CHECK-LABEL: stest_f16i16_mm:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -96
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
-; CHECK-NEXT:    sd ra, 88(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s0, 80(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s1, 72(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 64(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s3, 56(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s4, 48(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s5, 40(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s6, 32(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s7, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s8, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    addi sp, sp, -128
+; CHECK-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s2, 96(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s3, 88(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s4, 80(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s5, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s6, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s7, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs1, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs2, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs3, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs5, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs6, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
@@ -2926,157 +2951,169 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s5, -56
 ; CHECK-NEXT:    .cfi_offset s6, -64
 ; CHECK-NEXT:    .cfi_offset s7, -72
-; CHECK-NEXT:    .cfi_offset s8, -80
-; CHECK-NEXT:    lhu s6, 56(a1)
-; CHECK-NEXT:    lhu s1, 0(a1)
-; CHECK-NEXT:    lhu s2, 8(a1)
-; CHECK-NEXT:    lhu s3, 16(a1)
-; CHECK-NEXT:    lhu s4, 24(a1)
-; CHECK-NEXT:    lhu s5, 32(a1)
+; CHECK-NEXT:    .cfi_offset fs0, -80
+; CHECK-NEXT:    .cfi_offset fs1, -88
+; CHECK-NEXT:    .cfi_offset fs2, -96
+; CHECK-NEXT:    .cfi_offset fs3, -104
+; CHECK-NEXT:    .cfi_offset fs4, -112
+; CHECK-NEXT:    .cfi_offset fs5, -120
+; CHECK-NEXT:    .cfi_offset fs6, -128
+; CHECK-NEXT:    lhu s1, 56(a1)
+; CHECK-NEXT:    lhu s2, 0(a1)
+; CHECK-NEXT:    lhu s3, 8(a1)
+; CHECK-NEXT:    lhu s4, 16(a1)
+; CHECK-NEXT:    lhu s5, 24(a1)
+; CHECK-NEXT:    lhu s6, 32(a1)
 ; CHECK-NEXT:    lhu s7, 40(a1)
 ; CHECK-NEXT:    lhu a1, 48(a1)
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s8, a0
+; CHECK-NEXT:    fmv.s fs6, fa0
 ; CHECK-NEXT:    mv a0, s7
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s7, a0
+; CHECK-NEXT:    fmv.s fs5, fa0
+; CHECK-NEXT:    mv a0, s6
+; CHECK-NEXT:    call __extendhfsf2 at plt
+; CHECK-NEXT:    fmv.s fs4, fa0
 ; CHECK-NEXT:    mv a0, s5
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s5, a0
+; CHECK-NEXT:    fmv.s fs3, fa0
 ; CHECK-NEXT:    mv a0, s4
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s4, a0
+; CHECK-NEXT:    fmv.s fs2, fa0
 ; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s3, a0
+; CHECK-NEXT:    fmv.s fs1, fa0
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    fmv.s fs0, fa0
+; CHECK-NEXT:    fcvt.l.s s2, fs6, rtz
 ; CHECK-NEXT:    mv a0, s1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s1, a0
-; CHECK-NEXT:    fmv.w.x ft0, s7
-; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s8
-; CHECK-NEXT:    fcvt.l.s s7, ft0, rtz
-; CHECK-NEXT:    mv a0, s6
-; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    fcvt.l.s a0, ft0, rtz
+; CHECK-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-NEXT:    lui a1, 8
 ; CHECK-NEXT:    addiw a7, a1, -1
-; CHECK-NEXT:    blt a0, a7, .LBB42_2
+; CHECK-NEXT:    bge a0, a7, .LBB42_18
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a0, a7
+; CHECK-NEXT:    fcvt.l.s a1, fs5, rtz
+; CHECK-NEXT:    bge s2, a7, .LBB42_19
 ; CHECK-NEXT:  .LBB42_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s5
-; CHECK-NEXT:    flw ft0, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    fcvt.l.s a1, ft0, rtz
-; CHECK-NEXT:    blt s7, a7, .LBB42_4
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    mv s7, a7
+; CHECK-NEXT:    fcvt.l.s a2, fs4, rtz
+; CHECK-NEXT:    bge a1, a7, .LBB42_20
+; CHECK-NEXT:  .LBB42_3: # %entry
+; CHECK-NEXT:    fcvt.l.s a3, fs3, rtz
+; CHECK-NEXT:    bge a2, a7, .LBB42_21
 ; CHECK-NEXT:  .LBB42_4: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s4
-; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    blt a1, a7, .LBB42_6
-; CHECK-NEXT:  # %bb.5: # %entry
-; CHECK-NEXT:    mv a1, a7
-; CHECK-NEXT:  .LBB42_6: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s3
-; CHECK-NEXT:    fcvt.l.s a3, ft0, rtz
-; CHECK-NEXT:    blt a2, a7, .LBB42_8
-; CHECK-NEXT:  # %bb.7: # %entry
-; CHECK-NEXT:    mv a2, a7
-; CHECK-NEXT:  .LBB42_8: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s2
-; CHECK-NEXT:    fcvt.l.s a4, ft1, rtz
-; CHECK-NEXT:    blt a3, a7, .LBB42_10
-; CHECK-NEXT:  # %bb.9: # %entry
-; CHECK-NEXT:    mv a3, a7
-; CHECK-NEXT:  .LBB42_10: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s1
-; CHECK-NEXT:    fcvt.l.s a5, ft0, rtz
+; CHECK-NEXT:    fcvt.l.s a4, fs2, rtz
+; CHECK-NEXT:    bge a3, a7, .LBB42_22
+; CHECK-NEXT:  .LBB42_5: # %entry
+; CHECK-NEXT:    fcvt.l.s a5, fs1, rtz
 ; CHECK-NEXT:    bge a4, a7, .LBB42_23
-; CHECK-NEXT:  # %bb.11: # %entry
-; CHECK-NEXT:    fcvt.l.s a6, ft1, rtz
+; CHECK-NEXT:  .LBB42_6: # %entry
+; CHECK-NEXT:    fcvt.l.s a6, fs0, rtz
 ; CHECK-NEXT:    bge a5, a7, .LBB42_24
-; CHECK-NEXT:  .LBB42_12: # %entry
+; CHECK-NEXT:  .LBB42_7: # %entry
 ; CHECK-NEXT:    bge a6, a7, .LBB42_25
-; CHECK-NEXT:  .LBB42_13: # %entry
+; CHECK-NEXT:  .LBB42_8: # %entry
 ; CHECK-NEXT:    lui a7, 1048568
 ; CHECK-NEXT:    bge a7, a6, .LBB42_26
-; CHECK-NEXT:  .LBB42_14: # %entry
+; CHECK-NEXT:  .LBB42_9: # %entry
 ; CHECK-NEXT:    bge a7, a5, .LBB42_27
-; CHECK-NEXT:  .LBB42_15: # %entry
+; CHECK-NEXT:  .LBB42_10: # %entry
 ; CHECK-NEXT:    bge a7, a4, .LBB42_28
-; CHECK-NEXT:  .LBB42_16: # %entry
+; CHECK-NEXT:  .LBB42_11: # %entry
 ; CHECK-NEXT:    bge a7, a3, .LBB42_29
-; CHECK-NEXT:  .LBB42_17: # %entry
+; CHECK-NEXT:  .LBB42_12: # %entry
 ; CHECK-NEXT:    bge a7, a2, .LBB42_30
-; CHECK-NEXT:  .LBB42_18: # %entry
+; CHECK-NEXT:  .LBB42_13: # %entry
 ; CHECK-NEXT:    bge a7, a1, .LBB42_31
-; CHECK-NEXT:  .LBB42_19: # %entry
-; CHECK-NEXT:    bge a7, s7, .LBB42_32
-; CHECK-NEXT:  .LBB42_20: # %entry
-; CHECK-NEXT:    blt a7, a0, .LBB42_22
-; CHECK-NEXT:  .LBB42_21: # %entry
+; CHECK-NEXT:  .LBB42_14: # %entry
+; CHECK-NEXT:    bge a7, s2, .LBB42_32
+; CHECK-NEXT:  .LBB42_15: # %entry
+; CHECK-NEXT:    blt a7, a0, .LBB42_17
+; CHECK-NEXT:  .LBB42_16: # %entry
 ; CHECK-NEXT:    lui a0, 1048568
-; CHECK-NEXT:  .LBB42_22: # %entry
+; CHECK-NEXT:  .LBB42_17: # %entry
 ; CHECK-NEXT:    sh a0, 14(s0)
-; CHECK-NEXT:    sh s7, 12(s0)
+; CHECK-NEXT:    sh s2, 12(s0)
 ; CHECK-NEXT:    sh a1, 10(s0)
 ; CHECK-NEXT:    sh a2, 8(s0)
 ; CHECK-NEXT:    sh a3, 6(s0)
 ; CHECK-NEXT:    sh a4, 4(s0)
 ; CHECK-NEXT:    sh a5, 2(s0)
 ; CHECK-NEXT:    sh a6, 0(s0)
-; CHECK-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 64(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s3, 56(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s4, 48(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s6, 32(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s7, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s8, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    addi sp, sp, 96
+; CHECK-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s2, 96(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s3, 88(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s6, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s7, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs1, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs2, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs3, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs5, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs6, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 128
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB42_18: # %entry
+; CHECK-NEXT:    mv a0, a7
+; CHECK-NEXT:    fcvt.l.s a1, fs5, rtz
+; CHECK-NEXT:    blt s2, a7, .LBB42_2
+; CHECK-NEXT:  .LBB42_19: # %entry
+; CHECK-NEXT:    mv s2, a7
+; CHECK-NEXT:    fcvt.l.s a2, fs4, rtz
+; CHECK-NEXT:    blt a1, a7, .LBB42_3
+; CHECK-NEXT:  .LBB42_20: # %entry
+; CHECK-NEXT:    mv a1, a7
+; CHECK-NEXT:    fcvt.l.s a3, fs3, rtz
+; CHECK-NEXT:    blt a2, a7, .LBB42_4
+; CHECK-NEXT:  .LBB42_21: # %entry
+; CHECK-NEXT:    mv a2, a7
+; CHECK-NEXT:    fcvt.l.s a4, fs2, rtz
+; CHECK-NEXT:    blt a3, a7, .LBB42_5
+; CHECK-NEXT:  .LBB42_22: # %entry
+; CHECK-NEXT:    mv a3, a7
+; CHECK-NEXT:    fcvt.l.s a5, fs1, rtz
+; CHECK-NEXT:    blt a4, a7, .LBB42_6
 ; CHECK-NEXT:  .LBB42_23: # %entry
 ; CHECK-NEXT:    mv a4, a7
-; CHECK-NEXT:    fcvt.l.s a6, ft1, rtz
-; CHECK-NEXT:    blt a5, a7, .LBB42_12
+; CHECK-NEXT:    fcvt.l.s a6, fs0, rtz
+; CHECK-NEXT:    blt a5, a7, .LBB42_7
 ; CHECK-NEXT:  .LBB42_24: # %entry
 ; CHECK-NEXT:    mv a5, a7
-; CHECK-NEXT:    blt a6, a7, .LBB42_13
+; CHECK-NEXT:    blt a6, a7, .LBB42_8
 ; CHECK-NEXT:  .LBB42_25: # %entry
 ; CHECK-NEXT:    mv a6, a7
 ; CHECK-NEXT:    lui a7, 1048568
-; CHECK-NEXT:    blt a7, a6, .LBB42_14
+; CHECK-NEXT:    blt a7, a6, .LBB42_9
 ; CHECK-NEXT:  .LBB42_26: # %entry
 ; CHECK-NEXT:    lui a6, 1048568
-; CHECK-NEXT:    blt a7, a5, .LBB42_15
+; CHECK-NEXT:    blt a7, a5, .LBB42_10
 ; CHECK-NEXT:  .LBB42_27: # %entry
 ; CHECK-NEXT:    lui a5, 1048568
-; CHECK-NEXT:    blt a7, a4, .LBB42_16
+; CHECK-NEXT:    blt a7, a4, .LBB42_11
 ; CHECK-NEXT:  .LBB42_28: # %entry
 ; CHECK-NEXT:    lui a4, 1048568
-; CHECK-NEXT:    blt a7, a3, .LBB42_17
+; CHECK-NEXT:    blt a7, a3, .LBB42_12
 ; CHECK-NEXT:  .LBB42_29: # %entry
 ; CHECK-NEXT:    lui a3, 1048568
-; CHECK-NEXT:    blt a7, a2, .LBB42_18
+; CHECK-NEXT:    blt a7, a2, .LBB42_13
 ; CHECK-NEXT:  .LBB42_30: # %entry
 ; CHECK-NEXT:    lui a2, 1048568
-; CHECK-NEXT:    blt a7, a1, .LBB42_19
+; CHECK-NEXT:    blt a7, a1, .LBB42_14
 ; CHECK-NEXT:  .LBB42_31: # %entry
 ; CHECK-NEXT:    lui a1, 1048568
-; CHECK-NEXT:    blt a7, s7, .LBB42_20
+; CHECK-NEXT:    blt a7, s2, .LBB42_15
 ; CHECK-NEXT:  .LBB42_32: # %entry
-; CHECK-NEXT:    lui s7, 1048568
-; CHECK-NEXT:    bge a7, a0, .LBB42_21
-; CHECK-NEXT:    j .LBB42_22
+; CHECK-NEXT:    lui s2, 1048568
+; CHECK-NEXT:    bge a7, a0, .LBB42_16
+; CHECK-NEXT:    j .LBB42_17
 entry:
   %conv = fptosi <8 x half> %x to <8 x i32>
   %spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
@@ -3088,18 +3125,24 @@ entry:
 define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-LABEL: utesth_f16i16_mm:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -96
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
-; CHECK-NEXT:    sd ra, 88(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s0, 80(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s1, 72(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 64(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s3, 56(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s4, 48(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s5, 40(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s6, 32(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s7, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s8, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    addi sp, sp, -128
+; CHECK-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s2, 96(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s3, 88(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s4, 80(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s5, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s6, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s7, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs1, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs2, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs3, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs5, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs6, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
@@ -3109,48 +3152,49 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s5, -56
 ; CHECK-NEXT:    .cfi_offset s6, -64
 ; CHECK-NEXT:    .cfi_offset s7, -72
-; CHECK-NEXT:    .cfi_offset s8, -80
-; CHECK-NEXT:    lhu s5, 0(a1)
-; CHECK-NEXT:    lhu s1, 56(a1)
-; CHECK-NEXT:    lhu s2, 48(a1)
-; CHECK-NEXT:    lhu s3, 40(a1)
-; CHECK-NEXT:    lhu s4, 32(a1)
+; CHECK-NEXT:    .cfi_offset fs0, -80
+; CHECK-NEXT:    .cfi_offset fs1, -88
+; CHECK-NEXT:    .cfi_offset fs2, -96
+; CHECK-NEXT:    .cfi_offset fs3, -104
+; CHECK-NEXT:    .cfi_offset fs4, -112
+; CHECK-NEXT:    .cfi_offset fs5, -120
+; CHECK-NEXT:    .cfi_offset fs6, -128
+; CHECK-NEXT:    lhu s1, 0(a1)
+; CHECK-NEXT:    lhu s2, 56(a1)
+; CHECK-NEXT:    lhu s3, 48(a1)
+; CHECK-NEXT:    lhu s4, 40(a1)
+; CHECK-NEXT:    lhu s5, 32(a1)
 ; CHECK-NEXT:    lhu s6, 24(a1)
 ; CHECK-NEXT:    lhu s7, 16(a1)
 ; CHECK-NEXT:    lhu a1, 8(a1)
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s8, a0
+; CHECK-NEXT:    fmv.s fs5, fa0
 ; CHECK-NEXT:    mv a0, s7
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s7, a0
+; CHECK-NEXT:    fmv.s fs6, fa0
 ; CHECK-NEXT:    mv a0, s6
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s6, a0
+; CHECK-NEXT:    fmv.s fs4, fa0
+; CHECK-NEXT:    mv a0, s5
+; CHECK-NEXT:    call __extendhfsf2 at plt
+; CHECK-NEXT:    fmv.s fs3, fa0
 ; CHECK-NEXT:    mv a0, s4
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s4, a0
+; CHECK-NEXT:    fmv.s fs2, fa0
 ; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s3, a0
+; CHECK-NEXT:    fmv.s fs1, fa0
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    fmv.s fs0, fa0
+; CHECK-NEXT:    fcvt.lu.s s3, fs6, rtz
+; CHECK-NEXT:    fcvt.lu.s a0, fs5, rtz
+; CHECK-NEXT:    sext.w s2, a0
 ; CHECK-NEXT:    mv a0, s1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s1, a0
-; CHECK-NEXT:    fmv.w.x ft0, s6
-; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s7
-; CHECK-NEXT:    fcvt.lu.s s7, ft0, rtz
-; CHECK-NEXT:    fmv.w.x ft0, s8
-; CHECK-NEXT:    fcvt.lu.s a0, ft0, rtz
-; CHECK-NEXT:    sext.w s6, a0
-; CHECK-NEXT:    mv a0, s5
-; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    fcvt.lu.s a0, ft0, rtz
+; CHECK-NEXT:    fcvt.lu.s a0, fa0, rtz
 ; CHECK-NEXT:    sext.w a0, a0
 ; CHECK-NEXT:    lui a1, 16
 ; CHECK-NEXT:    addiw a1, a1, -1
@@ -3158,36 +3202,31 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:  .LBB43_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s4
-; CHECK-NEXT:    flw ft1, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    fcvt.lu.s a3, ft1, rtz
-; CHECK-NEXT:    sext.w a2, s7
-; CHECK-NEXT:    bltu s6, a1, .LBB43_4
+; CHECK-NEXT:    fcvt.lu.s a3, fs4, rtz
+; CHECK-NEXT:    sext.w a2, s3
+; CHECK-NEXT:    bltu s2, a1, .LBB43_4
 ; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    mv s6, a1
+; CHECK-NEXT:    mv s2, a1
 ; CHECK-NEXT:  .LBB43_4: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s3
-; CHECK-NEXT:    fcvt.lu.s a4, ft0, rtz
+; CHECK-NEXT:    fcvt.lu.s a4, fs3, rtz
 ; CHECK-NEXT:    sext.w a3, a3
 ; CHECK-NEXT:    bltu a2, a1, .LBB43_6
 ; CHECK-NEXT:  # %bb.5: # %entry
 ; CHECK-NEXT:    mv a2, a1
 ; CHECK-NEXT:  .LBB43_6: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s2
-; CHECK-NEXT:    fcvt.lu.s a5, ft1, rtz
+; CHECK-NEXT:    fcvt.lu.s a5, fs2, rtz
 ; CHECK-NEXT:    sext.w a4, a4
 ; CHECK-NEXT:    bltu a3, a1, .LBB43_8
 ; CHECK-NEXT:  # %bb.7: # %entry
 ; CHECK-NEXT:    mv a3, a1
 ; CHECK-NEXT:  .LBB43_8: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s1
-; CHECK-NEXT:    fcvt.lu.s a6, ft0, rtz
+; CHECK-NEXT:    fcvt.lu.s a6, fs1, rtz
 ; CHECK-NEXT:    sext.w a5, a5
 ; CHECK-NEXT:    bltu a4, a1, .LBB43_10
 ; CHECK-NEXT:  # %bb.9: # %entry
 ; CHECK-NEXT:    mv a4, a1
 ; CHECK-NEXT:  .LBB43_10: # %entry
-; CHECK-NEXT:    fcvt.lu.s a7, ft1, rtz
+; CHECK-NEXT:    fcvt.lu.s a7, fs0, rtz
 ; CHECK-NEXT:    sext.w a6, a6
 ; CHECK-NEXT:    bgeu a5, a1, .LBB43_15
 ; CHECK-NEXT:  # %bb.11: # %entry
@@ -3204,19 +3243,25 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    sh a4, 8(s0)
 ; CHECK-NEXT:    sh a3, 6(s0)
 ; CHECK-NEXT:    sh a2, 4(s0)
-; CHECK-NEXT:    sh s6, 2(s0)
+; CHECK-NEXT:    sh s2, 2(s0)
 ; CHECK-NEXT:    sh a0, 0(s0)
-; CHECK-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 64(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s3, 56(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s4, 48(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s6, 32(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s7, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s8, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    addi sp, sp, 96
+; CHECK-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s2, 96(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s3, 88(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s6, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s7, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs1, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs2, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs3, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs5, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs6, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 128
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB43_15: # %entry
 ; CHECK-NEXT:    mv a5, a1
@@ -3236,18 +3281,24 @@ entry:
 define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-LABEL: ustest_f16i16_mm:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -96
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
-; CHECK-NEXT:    sd ra, 88(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s0, 80(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s1, 72(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 64(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s3, 56(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s4, 48(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s5, 40(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s6, 32(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s7, 24(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s8, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    addi sp, sp, -128
+; CHECK-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s2, 96(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s3, 88(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s4, 80(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s5, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s6, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s7, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs1, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs2, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs3, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs5, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs6, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
@@ -3257,155 +3308,167 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s5, -56
 ; CHECK-NEXT:    .cfi_offset s6, -64
 ; CHECK-NEXT:    .cfi_offset s7, -72
-; CHECK-NEXT:    .cfi_offset s8, -80
-; CHECK-NEXT:    lhu s6, 56(a1)
-; CHECK-NEXT:    lhu s1, 0(a1)
-; CHECK-NEXT:    lhu s2, 8(a1)
-; CHECK-NEXT:    lhu s3, 16(a1)
-; CHECK-NEXT:    lhu s4, 24(a1)
-; CHECK-NEXT:    lhu s5, 32(a1)
+; CHECK-NEXT:    .cfi_offset fs0, -80
+; CHECK-NEXT:    .cfi_offset fs1, -88
+; CHECK-NEXT:    .cfi_offset fs2, -96
+; CHECK-NEXT:    .cfi_offset fs3, -104
+; CHECK-NEXT:    .cfi_offset fs4, -112
+; CHECK-NEXT:    .cfi_offset fs5, -120
+; CHECK-NEXT:    .cfi_offset fs6, -128
+; CHECK-NEXT:    lhu s1, 56(a1)
+; CHECK-NEXT:    lhu s2, 0(a1)
+; CHECK-NEXT:    lhu s3, 8(a1)
+; CHECK-NEXT:    lhu s4, 16(a1)
+; CHECK-NEXT:    lhu s5, 24(a1)
+; CHECK-NEXT:    lhu s6, 32(a1)
 ; CHECK-NEXT:    lhu s7, 40(a1)
 ; CHECK-NEXT:    lhu a1, 48(a1)
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s8, a0
+; CHECK-NEXT:    fmv.s fs6, fa0
 ; CHECK-NEXT:    mv a0, s7
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s7, a0
+; CHECK-NEXT:    fmv.s fs5, fa0
+; CHECK-NEXT:    mv a0, s6
+; CHECK-NEXT:    call __extendhfsf2 at plt
+; CHECK-NEXT:    fmv.s fs4, fa0
 ; CHECK-NEXT:    mv a0, s5
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s5, a0
+; CHECK-NEXT:    fmv.s fs3, fa0
 ; CHECK-NEXT:    mv a0, s4
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s4, a0
+; CHECK-NEXT:    fmv.s fs2, fa0
 ; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s3, a0
+; CHECK-NEXT:    fmv.s fs1, fa0
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    fmv.s fs0, fa0
+; CHECK-NEXT:    fcvt.l.s s2, fs6, rtz
 ; CHECK-NEXT:    mv a0, s1
 ; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    mv s1, a0
-; CHECK-NEXT:    fmv.w.x ft0, s7
-; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s8
-; CHECK-NEXT:    fcvt.l.s s7, ft0, rtz
-; CHECK-NEXT:    mv a0, s6
-; CHECK-NEXT:    call __extendhfsf2 at plt
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    fcvt.l.s a0, ft0, rtz
+; CHECK-NEXT:    fcvt.l.s a0, fa0, rtz
 ; CHECK-NEXT:    lui a1, 16
 ; CHECK-NEXT:    addiw a7, a1, -1
-; CHECK-NEXT:    blt a0, a7, .LBB44_2
+; CHECK-NEXT:    bge a0, a7, .LBB44_18
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a0, a7
+; CHECK-NEXT:    fcvt.l.s a1, fs5, rtz
+; CHECK-NEXT:    bge s2, a7, .LBB44_19
 ; CHECK-NEXT:  .LBB44_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s5
-; CHECK-NEXT:    flw ft0, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    fcvt.l.s a1, ft0, rtz
-; CHECK-NEXT:    blt s7, a7, .LBB44_4
-; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    mv s7, a7
+; CHECK-NEXT:    fcvt.l.s a2, fs4, rtz
+; CHECK-NEXT:    bge a1, a7, .LBB44_20
+; CHECK-NEXT:  .LBB44_3: # %entry
+; CHECK-NEXT:    fcvt.l.s a3, fs3, rtz
+; CHECK-NEXT:    bge a2, a7, .LBB44_21
 ; CHECK-NEXT:  .LBB44_4: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s4
-; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    blt a1, a7, .LBB44_6
-; CHECK-NEXT:  # %bb.5: # %entry
-; CHECK-NEXT:    mv a1, a7
-; CHECK-NEXT:  .LBB44_6: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s3
-; CHECK-NEXT:    fcvt.l.s a3, ft0, rtz
-; CHECK-NEXT:    blt a2, a7, .LBB44_8
-; CHECK-NEXT:  # %bb.7: # %entry
-; CHECK-NEXT:    mv a2, a7
-; CHECK-NEXT:  .LBB44_8: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s2
-; CHECK-NEXT:    fcvt.l.s a4, ft1, rtz
-; CHECK-NEXT:    blt a3, a7, .LBB44_10
-; CHECK-NEXT:  # %bb.9: # %entry
-; CHECK-NEXT:    mv a3, a7
-; CHECK-NEXT:  .LBB44_10: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s1
-; CHECK-NEXT:    fcvt.l.s a5, ft0, rtz
+; CHECK-NEXT:    fcvt.l.s a4, fs2, rtz
+; CHECK-NEXT:    bge a3, a7, .LBB44_22
+; CHECK-NEXT:  .LBB44_5: # %entry
+; CHECK-NEXT:    fcvt.l.s a5, fs1, rtz
 ; CHECK-NEXT:    bge a4, a7, .LBB44_23
-; CHECK-NEXT:  # %bb.11: # %entry
-; CHECK-NEXT:    fcvt.l.s a6, ft1, rtz
+; CHECK-NEXT:  .LBB44_6: # %entry
+; CHECK-NEXT:    fcvt.l.s a6, fs0, rtz
 ; CHECK-NEXT:    bge a5, a7, .LBB44_24
-; CHECK-NEXT:  .LBB44_12: # %entry
+; CHECK-NEXT:  .LBB44_7: # %entry
 ; CHECK-NEXT:    bge a6, a7, .LBB44_25
-; CHECK-NEXT:  .LBB44_13: # %entry
+; CHECK-NEXT:  .LBB44_8: # %entry
 ; CHECK-NEXT:    blez a6, .LBB44_26
-; CHECK-NEXT:  .LBB44_14: # %entry
+; CHECK-NEXT:  .LBB44_9: # %entry
 ; CHECK-NEXT:    blez a5, .LBB44_27
-; CHECK-NEXT:  .LBB44_15: # %entry
+; CHECK-NEXT:  .LBB44_10: # %entry
 ; CHECK-NEXT:    blez a4, .LBB44_28
-; CHECK-NEXT:  .LBB44_16: # %entry
+; CHECK-NEXT:  .LBB44_11: # %entry
 ; CHECK-NEXT:    blez a3, .LBB44_29
-; CHECK-NEXT:  .LBB44_17: # %entry
+; CHECK-NEXT:  .LBB44_12: # %entry
 ; CHECK-NEXT:    blez a2, .LBB44_30
-; CHECK-NEXT:  .LBB44_18: # %entry
+; CHECK-NEXT:  .LBB44_13: # %entry
 ; CHECK-NEXT:    blez a1, .LBB44_31
-; CHECK-NEXT:  .LBB44_19: # %entry
-; CHECK-NEXT:    blez s7, .LBB44_32
-; CHECK-NEXT:  .LBB44_20: # %entry
-; CHECK-NEXT:    bgtz a0, .LBB44_22
-; CHECK-NEXT:  .LBB44_21: # %entry
+; CHECK-NEXT:  .LBB44_14: # %entry
+; CHECK-NEXT:    blez s2, .LBB44_32
+; CHECK-NEXT:  .LBB44_15: # %entry
+; CHECK-NEXT:    bgtz a0, .LBB44_17
+; CHECK-NEXT:  .LBB44_16: # %entry
 ; CHECK-NEXT:    li a0, 0
-; CHECK-NEXT:  .LBB44_22: # %entry
+; CHECK-NEXT:  .LBB44_17: # %entry
 ; CHECK-NEXT:    sh a0, 14(s0)
-; CHECK-NEXT:    sh s7, 12(s0)
+; CHECK-NEXT:    sh s2, 12(s0)
 ; CHECK-NEXT:    sh a1, 10(s0)
 ; CHECK-NEXT:    sh a2, 8(s0)
 ; CHECK-NEXT:    sh a3, 6(s0)
 ; CHECK-NEXT:    sh a4, 4(s0)
 ; CHECK-NEXT:    sh a5, 2(s0)
 ; CHECK-NEXT:    sh a6, 0(s0)
-; CHECK-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 64(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s3, 56(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s4, 48(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s6, 32(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s7, 24(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s8, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    addi sp, sp, 96
+; CHECK-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s2, 96(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s3, 88(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s6, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s7, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs1, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs2, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs3, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs5, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs6, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 128
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB44_18: # %entry
+; CHECK-NEXT:    mv a0, a7
+; CHECK-NEXT:    fcvt.l.s a1, fs5, rtz
+; CHECK-NEXT:    blt s2, a7, .LBB44_2
+; CHECK-NEXT:  .LBB44_19: # %entry
+; CHECK-NEXT:    mv s2, a7
+; CHECK-NEXT:    fcvt.l.s a2, fs4, rtz
+; CHECK-NEXT:    blt a1, a7, .LBB44_3
+; CHECK-NEXT:  .LBB44_20: # %entry
+; CHECK-NEXT:    mv a1, a7
+; CHECK-NEXT:    fcvt.l.s a3, fs3, rtz
+; CHECK-NEXT:    blt a2, a7, .LBB44_4
+; CHECK-NEXT:  .LBB44_21: # %entry
+; CHECK-NEXT:    mv a2, a7
+; CHECK-NEXT:    fcvt.l.s a4, fs2, rtz
+; CHECK-NEXT:    blt a3, a7, .LBB44_5
+; CHECK-NEXT:  .LBB44_22: # %entry
+; CHECK-NEXT:    mv a3, a7
+; CHECK-NEXT:    fcvt.l.s a5, fs1, rtz
+; CHECK-NEXT:    blt a4, a7, .LBB44_6
 ; CHECK-NEXT:  .LBB44_23: # %entry
 ; CHECK-NEXT:    mv a4, a7
-; CHECK-NEXT:    fcvt.l.s a6, ft1, rtz
-; CHECK-NEXT:    blt a5, a7, .LBB44_12
+; CHECK-NEXT:    fcvt.l.s a6, fs0, rtz
+; CHECK-NEXT:    blt a5, a7, .LBB44_7
 ; CHECK-NEXT:  .LBB44_24: # %entry
 ; CHECK-NEXT:    mv a5, a7
-; CHECK-NEXT:    blt a6, a7, .LBB44_13
+; CHECK-NEXT:    blt a6, a7, .LBB44_8
 ; CHECK-NEXT:  .LBB44_25: # %entry
 ; CHECK-NEXT:    mv a6, a7
-; CHECK-NEXT:    bgtz a6, .LBB44_14
+; CHECK-NEXT:    bgtz a6, .LBB44_9
 ; CHECK-NEXT:  .LBB44_26: # %entry
 ; CHECK-NEXT:    li a6, 0
-; CHECK-NEXT:    bgtz a5, .LBB44_15
+; CHECK-NEXT:    bgtz a5, .LBB44_10
 ; CHECK-NEXT:  .LBB44_27: # %entry
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    bgtz a4, .LBB44_16
+; CHECK-NEXT:    bgtz a4, .LBB44_11
 ; CHECK-NEXT:  .LBB44_28: # %entry
 ; CHECK-NEXT:    li a4, 0
-; CHECK-NEXT:    bgtz a3, .LBB44_17
+; CHECK-NEXT:    bgtz a3, .LBB44_12
 ; CHECK-NEXT:  .LBB44_29: # %entry
 ; CHECK-NEXT:    li a3, 0
-; CHECK-NEXT:    bgtz a2, .LBB44_18
+; CHECK-NEXT:    bgtz a2, .LBB44_13
 ; CHECK-NEXT:  .LBB44_30: # %entry
 ; CHECK-NEXT:    li a2, 0
-; CHECK-NEXT:    bgtz a1, .LBB44_19
+; CHECK-NEXT:    bgtz a1, .LBB44_14
 ; CHECK-NEXT:  .LBB44_31: # %entry
 ; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    bgtz s7, .LBB44_20
+; CHECK-NEXT:    bgtz s2, .LBB44_15
 ; CHECK-NEXT:  .LBB44_32: # %entry
-; CHECK-NEXT:    li s7, 0
-; CHECK-NEXT:    blez a0, .LBB44_21
-; CHECK-NEXT:    j .LBB44_22
+; CHECK-NEXT:    li s2, 0
+; CHECK-NEXT:    blez a0, .LBB44_16
+; CHECK-NEXT:    j .LBB44_17
 entry:
   %conv = fptosi <8 x half> %x to <8 x i32>
   %spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
@@ -3424,16 +3487,16 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
-; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s2, a1
+; CHECK-NEXT:    .cfi_offset fs0, -32
+; CHECK-NEXT:    fmv.d fs0, fa1
 ; CHECK-NEXT:    call __fixdfti at plt
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    fmv.d fa0, fs0
 ; CHECK-NEXT:    call __fixdfti at plt
 ; CHECK-NEXT:    mv a2, a0
 ; CHECK-NEXT:    li a0, -1
@@ -3480,7 +3543,7 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB45_17: # %entry
@@ -3539,17 +3602,17 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
-; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s2, a0
-; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    .cfi_offset fs0, -32
+; CHECK-NEXT:    fmv.d fs0, fa0
+; CHECK-NEXT:    fmv.d fa0, fa1
 ; CHECK-NEXT:    call __fixunsdfti at plt
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    fmv.d fa0, fs0
 ; CHECK-NEXT:    call __fixunsdfti at plt
 ; CHECK-NEXT:    mv a2, a0
 ; CHECK-NEXT:    mv a3, a1
@@ -3571,7 +3634,7 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB46_7: # %entry
@@ -3596,16 +3659,16 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
-; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s2, a1
+; CHECK-NEXT:    .cfi_offset fs0, -32
+; CHECK-NEXT:    fmv.d fs0, fa1
 ; CHECK-NEXT:    call __fixdfti at plt
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    fmv.d fa0, fs0
 ; CHECK-NEXT:    call __fixdfti at plt
 ; CHECK-NEXT:    mv a2, a1
 ; CHECK-NEXT:    li a5, 1
@@ -3640,7 +3703,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB47_12: # %entry
@@ -3692,16 +3755,16 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
-; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s2, a1
+; CHECK-NEXT:    .cfi_offset fs0, -32
+; CHECK-NEXT:    fmv.s fs0, fa1
 ; CHECK-NEXT:    call __fixsfti at plt
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    fmv.s fa0, fs0
 ; CHECK-NEXT:    call __fixsfti at plt
 ; CHECK-NEXT:    mv a2, a0
 ; CHECK-NEXT:    li a0, -1
@@ -3748,7 +3811,7 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB48_17: # %entry
@@ -3807,17 +3870,17 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
-; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s2, a0
-; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    .cfi_offset fs0, -32
+; CHECK-NEXT:    fmv.s fs0, fa0
+; CHECK-NEXT:    fmv.s fa0, fa1
 ; CHECK-NEXT:    call __fixunssfti at plt
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    fmv.s fa0, fs0
 ; CHECK-NEXT:    call __fixunssfti at plt
 ; CHECK-NEXT:    mv a2, a0
 ; CHECK-NEXT:    mv a3, a1
@@ -3839,7 +3902,7 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB49_7: # %entry
@@ -3864,16 +3927,16 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
-; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s2, a1
+; CHECK-NEXT:    .cfi_offset fs0, -32
+; CHECK-NEXT:    fmv.s fs0, fa1
 ; CHECK-NEXT:    call __fixsfti at plt
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    fmv.s fa0, fs0
 ; CHECK-NEXT:    call __fixsfti at plt
 ; CHECK-NEXT:    mv a2, a1
 ; CHECK-NEXT:    li a5, 1
@@ -3908,7 +3971,7 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB50_12: # %entry

diff  --git a/llvm/test/CodeGen/RISCV/frm-dependency.ll b/llvm/test/CodeGen/RISCV/frm-dependency.ll
index 0b439cb65f198..b0cc1946bc983 100644
--- a/llvm/test/CodeGen/RISCV/frm-dependency.ll
+++ b/llvm/test/CodeGen/RISCV/frm-dependency.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+f,+d -stop-after=finalize-isel < %s \
-; RUN:   | FileCheck -check-prefixes=RV32IF %s
+; RUN:   -target-abi=ilp32d | FileCheck -check-prefixes=RV32IF %s
 ; RUN: llc -mtriple=riscv64 -mattr=+f,+d -stop-after=finalize-isel < %s \
-; RUN:   | FileCheck -check-prefixes=RV64IF %s
+; RUN:   -target-abi=lp64d | FileCheck -check-prefixes=RV64IF %s
 
 ; Make sure an implicit FRM dependency is added to instructions with dynamic
 ; rounding.
@@ -10,28 +10,22 @@
 define float @fadd_s(float %a, float %b) nounwind {
   ; RV32IF-LABEL: name: fadd_s
   ; RV32IF: bb.0 (%ir-block.0):
-  ; RV32IF-NEXT:   liveins: $x10, $x11
+  ; RV32IF-NEXT:   liveins: $f10_f, $f11_f
   ; RV32IF-NEXT: {{  $}}
-  ; RV32IF-NEXT:   [[COPY:%[0-9]+]]:gpr = COPY $x11
-  ; RV32IF-NEXT:   [[COPY1:%[0-9]+]]:gpr = COPY $x10
-  ; RV32IF-NEXT:   [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]]
-  ; RV32IF-NEXT:   [[FMV_W_X1:%[0-9]+]]:fpr32 = FMV_W_X [[COPY1]]
-  ; RV32IF-NEXT:   %4:fpr32 = nofpexcept FADD_S killed [[FMV_W_X1]], killed [[FMV_W_X]], 7, implicit $frm
-  ; RV32IF-NEXT:   [[FMV_X_W:%[0-9]+]]:gpr = FMV_X_W killed %4
-  ; RV32IF-NEXT:   $x10 = COPY [[FMV_X_W]]
-  ; RV32IF-NEXT:   PseudoRET implicit $x10
+  ; RV32IF-NEXT:   [[COPY:%[0-9]+]]:fpr32 = COPY $f11_f
+  ; RV32IF-NEXT:   [[COPY1:%[0-9]+]]:fpr32 = COPY $f10_f
+  ; RV32IF-NEXT:   %2:fpr32 = nofpexcept FADD_S [[COPY1]], [[COPY]], 7, implicit $frm
+  ; RV32IF-NEXT:   $f10_f = COPY %2
+  ; RV32IF-NEXT:   PseudoRET implicit $f10_f
   ; RV64IF-LABEL: name: fadd_s
   ; RV64IF: bb.0 (%ir-block.0):
-  ; RV64IF-NEXT:   liveins: $x10, $x11
+  ; RV64IF-NEXT:   liveins: $f10_f, $f11_f
   ; RV64IF-NEXT: {{  $}}
-  ; RV64IF-NEXT:   [[COPY:%[0-9]+]]:gpr = COPY $x11
-  ; RV64IF-NEXT:   [[COPY1:%[0-9]+]]:gpr = COPY $x10
-  ; RV64IF-NEXT:   [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]]
-  ; RV64IF-NEXT:   [[FMV_W_X1:%[0-9]+]]:fpr32 = FMV_W_X [[COPY1]]
-  ; RV64IF-NEXT:   %4:fpr32 = nofpexcept FADD_S killed [[FMV_W_X1]], killed [[FMV_W_X]], 7, implicit $frm
-  ; RV64IF-NEXT:   [[FMV_X_W:%[0-9]+]]:gpr = FMV_X_W killed %4
-  ; RV64IF-NEXT:   $x10 = COPY [[FMV_X_W]]
-  ; RV64IF-NEXT:   PseudoRET implicit $x10
+  ; RV64IF-NEXT:   [[COPY:%[0-9]+]]:fpr32 = COPY $f11_f
+  ; RV64IF-NEXT:   [[COPY1:%[0-9]+]]:fpr32 = COPY $f10_f
+  ; RV64IF-NEXT:   %2:fpr32 = nofpexcept FADD_S [[COPY1]], [[COPY]], 7, implicit $frm
+  ; RV64IF-NEXT:   $f10_f = COPY %2
+  ; RV64IF-NEXT:   PseudoRET implicit $f10_f
   %1 = fadd float %a, %b
   ret float %1
 }
@@ -41,32 +35,24 @@ declare float @llvm.fma.f32(float, float, float)
 define float @fmadd_s(float %a, float %b, float %c) nounwind {
   ; RV32IF-LABEL: name: fmadd_s
   ; RV32IF: bb.0 (%ir-block.0):
-  ; RV32IF-NEXT:   liveins: $x10, $x11, $x12
+  ; RV32IF-NEXT:   liveins: $f10_f, $f11_f, $f12_f
   ; RV32IF-NEXT: {{  $}}
-  ; RV32IF-NEXT:   [[COPY:%[0-9]+]]:gpr = COPY $x12
-  ; RV32IF-NEXT:   [[COPY1:%[0-9]+]]:gpr = COPY $x11
-  ; RV32IF-NEXT:   [[COPY2:%[0-9]+]]:gpr = COPY $x10
-  ; RV32IF-NEXT:   [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]]
-  ; RV32IF-NEXT:   [[FMV_W_X1:%[0-9]+]]:fpr32 = FMV_W_X [[COPY1]]
-  ; RV32IF-NEXT:   [[FMV_W_X2:%[0-9]+]]:fpr32 = FMV_W_X [[COPY2]]
-  ; RV32IF-NEXT:   %6:fpr32 = nofpexcept FMADD_S killed [[FMV_W_X2]], killed [[FMV_W_X1]], killed [[FMV_W_X]], 7, implicit $frm
-  ; RV32IF-NEXT:   [[FMV_X_W:%[0-9]+]]:gpr = FMV_X_W killed %6
-  ; RV32IF-NEXT:   $x10 = COPY [[FMV_X_W]]
-  ; RV32IF-NEXT:   PseudoRET implicit $x10
+  ; RV32IF-NEXT:   [[COPY:%[0-9]+]]:fpr32 = COPY $f12_f
+  ; RV32IF-NEXT:   [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f
+  ; RV32IF-NEXT:   [[COPY2:%[0-9]+]]:fpr32 = COPY $f10_f
+  ; RV32IF-NEXT:   %3:fpr32 = nofpexcept FMADD_S [[COPY2]], [[COPY1]], [[COPY]], 7, implicit $frm
+  ; RV32IF-NEXT:   $f10_f = COPY %3
+  ; RV32IF-NEXT:   PseudoRET implicit $f10_f
   ; RV64IF-LABEL: name: fmadd_s
   ; RV64IF: bb.0 (%ir-block.0):
-  ; RV64IF-NEXT:   liveins: $x10, $x11, $x12
+  ; RV64IF-NEXT:   liveins: $f10_f, $f11_f, $f12_f
   ; RV64IF-NEXT: {{  $}}
-  ; RV64IF-NEXT:   [[COPY:%[0-9]+]]:gpr = COPY $x12
-  ; RV64IF-NEXT:   [[COPY1:%[0-9]+]]:gpr = COPY $x11
-  ; RV64IF-NEXT:   [[COPY2:%[0-9]+]]:gpr = COPY $x10
-  ; RV64IF-NEXT:   [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]]
-  ; RV64IF-NEXT:   [[FMV_W_X1:%[0-9]+]]:fpr32 = FMV_W_X [[COPY1]]
-  ; RV64IF-NEXT:   [[FMV_W_X2:%[0-9]+]]:fpr32 = FMV_W_X [[COPY2]]
-  ; RV64IF-NEXT:   %6:fpr32 = nofpexcept FMADD_S killed [[FMV_W_X2]], killed [[FMV_W_X1]], killed [[FMV_W_X]], 7, implicit $frm
-  ; RV64IF-NEXT:   [[FMV_X_W:%[0-9]+]]:gpr = FMV_X_W killed %6
-  ; RV64IF-NEXT:   $x10 = COPY [[FMV_X_W]]
-  ; RV64IF-NEXT:   PseudoRET implicit $x10
+  ; RV64IF-NEXT:   [[COPY:%[0-9]+]]:fpr32 = COPY $f12_f
+  ; RV64IF-NEXT:   [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f
+  ; RV64IF-NEXT:   [[COPY2:%[0-9]+]]:fpr32 = COPY $f10_f
+  ; RV64IF-NEXT:   %3:fpr32 = nofpexcept FMADD_S [[COPY2]], [[COPY1]], [[COPY]], 7, implicit $frm
+  ; RV64IF-NEXT:   $f10_f = COPY %3
+  ; RV64IF-NEXT:   PseudoRET implicit $f10_f
   %1 = call float @llvm.fma.f32(float %a, float %b, float %c)
   ret float %1
 }
@@ -75,21 +61,19 @@ define float @fmadd_s(float %a, float %b, float %c) nounwind {
 define i32 @fcvt_w_s(float %a) nounwind {
   ; RV32IF-LABEL: name: fcvt_w_s
   ; RV32IF: bb.0 (%ir-block.0):
-  ; RV32IF-NEXT:   liveins: $x10
+  ; RV32IF-NEXT:   liveins: $f10_f
   ; RV32IF-NEXT: {{  $}}
-  ; RV32IF-NEXT:   [[COPY:%[0-9]+]]:gpr = COPY $x10
-  ; RV32IF-NEXT:   [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]]
-  ; RV32IF-NEXT:   %2:gpr = nofpexcept FCVT_W_S killed [[FMV_W_X]], 1
-  ; RV32IF-NEXT:   $x10 = COPY %2
+  ; RV32IF-NEXT:   [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f
+  ; RV32IF-NEXT:   %1:gpr = nofpexcept FCVT_W_S [[COPY]], 1
+  ; RV32IF-NEXT:   $x10 = COPY %1
   ; RV32IF-NEXT:   PseudoRET implicit $x10
   ; RV64IF-LABEL: name: fcvt_w_s
   ; RV64IF: bb.0 (%ir-block.0):
-  ; RV64IF-NEXT:   liveins: $x10
+  ; RV64IF-NEXT:   liveins: $f10_f
   ; RV64IF-NEXT: {{  $}}
-  ; RV64IF-NEXT:   [[COPY:%[0-9]+]]:gpr = COPY $x10
-  ; RV64IF-NEXT:   [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]]
-  ; RV64IF-NEXT:   %2:gpr = nofpexcept FCVT_W_S killed [[FMV_W_X]], 1
-  ; RV64IF-NEXT:   $x10 = COPY %2
+  ; RV64IF-NEXT:   [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f
+  ; RV64IF-NEXT:   %1:gpr = nofpexcept FCVT_W_S [[COPY]], 1
+  ; RV64IF-NEXT:   $x10 = COPY %1
   ; RV64IF-NEXT:   PseudoRET implicit $x10
   %1 = fptosi float %a to i32
   ret i32 %1
@@ -104,21 +88,16 @@ define double @fcvt_d_w(i32 %a) nounwind {
   ; RV32IF-NEXT: {{  $}}
   ; RV32IF-NEXT:   [[COPY:%[0-9]+]]:gpr = COPY $x10
   ; RV32IF-NEXT:   %1:fpr64 = nofpexcept FCVT_D_W [[COPY]]
-  ; RV32IF-NEXT:   FSD killed %1, %stack.0, 0 :: (store (s64) into %stack.0)
-  ; RV32IF-NEXT:   [[LW:%[0-9]+]]:gpr = LW %stack.0, 0 :: (load (s32) from %stack.0, align 8)
-  ; RV32IF-NEXT:   [[LW1:%[0-9]+]]:gpr = LW %stack.0, 4 :: (load (s32) from %stack.0 + 4, basealign 8)
-  ; RV32IF-NEXT:   $x10 = COPY [[LW]]
-  ; RV32IF-NEXT:   $x11 = COPY [[LW1]]
-  ; RV32IF-NEXT:   PseudoRET implicit $x10, implicit $x11
+  ; RV32IF-NEXT:   $f10_d = COPY %1
+  ; RV32IF-NEXT:   PseudoRET implicit $f10_d
   ; RV64IF-LABEL: name: fcvt_d_w
   ; RV64IF: bb.0 (%ir-block.0):
   ; RV64IF-NEXT:   liveins: $x10
   ; RV64IF-NEXT: {{  $}}
   ; RV64IF-NEXT:   [[COPY:%[0-9]+]]:gpr = COPY $x10
   ; RV64IF-NEXT:   %1:fpr64 = nofpexcept FCVT_D_W [[COPY]]
-  ; RV64IF-NEXT:   [[FMV_X_D:%[0-9]+]]:gpr = FMV_X_D killed %1
-  ; RV64IF-NEXT:   $x10 = COPY [[FMV_X_D]]
-  ; RV64IF-NEXT:   PseudoRET implicit $x10
+  ; RV64IF-NEXT:   $f10_d = COPY %1
+  ; RV64IF-NEXT:   PseudoRET implicit $f10_d
   %1 = sitofp i32 %a to double
   ret double %1
 }

diff  --git a/llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll b/llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll
index cc8c1cfdceb33..80746863a41c7 100644
--- a/llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll
+++ b/llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \
-; RUN:   -disable-strictnode-mutation | FileCheck %s -check-prefix=RV64ID
+; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs -target-abi=lp64d \
+; RUN:   -disable-strictnode-mutation < %s | FileCheck %s -check-prefix=RV64ID
 
 ; This file exhaustively checks double<->i32 conversions. In general,
 ; fcvt.l[u].d can be selected instead of fcvt.w[u].d because poison is
@@ -10,8 +10,7 @@
 define i32 @aext_fptosi(double %a) nounwind strictfp {
 ; RV64ID-LABEL: aext_fptosi:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fmv.d.x ft0, a0
-; RV64ID-NEXT:    fcvt.w.d a0, ft0, rtz
+; RV64ID-NEXT:    fcvt.w.d a0, fa0, rtz
 ; RV64ID-NEXT:    ret
   %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict") strictfp
   ret i32 %1
@@ -21,8 +20,7 @@ declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
 define signext i32 @sext_fptosi(double %a) nounwind strictfp {
 ; RV64ID-LABEL: sext_fptosi:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fmv.d.x ft0, a0
-; RV64ID-NEXT:    fcvt.w.d a0, ft0, rtz
+; RV64ID-NEXT:    fcvt.w.d a0, fa0, rtz
 ; RV64ID-NEXT:    ret
   %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict") strictfp
   ret i32 %1
@@ -31,8 +29,7 @@ define signext i32 @sext_fptosi(double %a) nounwind strictfp {
 define zeroext i32 @zext_fptosi(double %a) nounwind strictfp {
 ; RV64ID-LABEL: zext_fptosi:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fmv.d.x ft0, a0
-; RV64ID-NEXT:    fcvt.w.d a0, ft0, rtz
+; RV64ID-NEXT:    fcvt.w.d a0, fa0, rtz
 ; RV64ID-NEXT:    slli a0, a0, 32
 ; RV64ID-NEXT:    srli a0, a0, 32
 ; RV64ID-NEXT:    ret
@@ -43,8 +40,7 @@ define zeroext i32 @zext_fptosi(double %a) nounwind strictfp {
 define i32 @aext_fptoui(double %a) nounwind strictfp {
 ; RV64ID-LABEL: aext_fptoui:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fmv.d.x ft0, a0
-; RV64ID-NEXT:    fcvt.wu.d a0, ft0, rtz
+; RV64ID-NEXT:    fcvt.wu.d a0, fa0, rtz
 ; RV64ID-NEXT:    ret
   %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp
   ret i32 %1
@@ -54,8 +50,7 @@ declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata)
 define signext i32 @sext_fptoui(double %a) nounwind strictfp {
 ; RV64ID-LABEL: sext_fptoui:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fmv.d.x ft0, a0
-; RV64ID-NEXT:    fcvt.wu.d a0, ft0, rtz
+; RV64ID-NEXT:    fcvt.wu.d a0, fa0, rtz
 ; RV64ID-NEXT:    ret
   %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp
   ret i32 %1
@@ -64,8 +59,7 @@ define signext i32 @sext_fptoui(double %a) nounwind strictfp {
 define zeroext i32 @zext_fptoui(double %a) nounwind strictfp {
 ; RV64ID-LABEL: zext_fptoui:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fmv.d.x ft0, a0
-; RV64ID-NEXT:    fcvt.lu.d a0, ft0, rtz
+; RV64ID-NEXT:    fcvt.lu.d a0, fa0, rtz
 ; RV64ID-NEXT:    ret
   %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp
   ret i32 %1
@@ -74,8 +68,7 @@ define zeroext i32 @zext_fptoui(double %a) nounwind strictfp {
 define double @uitofp_aext_i32_to_f64(i32 %a) nounwind strictfp {
 ; RV64ID-LABEL: uitofp_aext_i32_to_f64:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fcvt.d.wu ft0, a0
-; RV64ID-NEXT:    fmv.x.d a0, ft0
+; RV64ID-NEXT:    fcvt.d.wu fa0, a0
 ; RV64ID-NEXT:    ret
   %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
   ret double %1
@@ -85,8 +78,7 @@ declare double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata, m
 define double @uitofp_sext_i32_to_f64(i32 signext %a) nounwind strictfp {
 ; RV64ID-LABEL: uitofp_sext_i32_to_f64:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fcvt.d.wu ft0, a0
-; RV64ID-NEXT:    fmv.x.d a0, ft0
+; RV64ID-NEXT:    fcvt.d.wu fa0, a0
 ; RV64ID-NEXT:    ret
   %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
   ret double %1
@@ -95,8 +87,7 @@ define double @uitofp_sext_i32_to_f64(i32 signext %a) nounwind strictfp {
 define double @uitofp_zext_i32_to_f64(i32 zeroext %a) nounwind strictfp {
 ; RV64ID-LABEL: uitofp_zext_i32_to_f64:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fcvt.d.wu ft0, a0
-; RV64ID-NEXT:    fmv.x.d a0, ft0
+; RV64ID-NEXT:    fcvt.d.wu fa0, a0
 ; RV64ID-NEXT:    ret
   %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
   ret double %1
@@ -105,8 +96,7 @@ define double @uitofp_zext_i32_to_f64(i32 zeroext %a) nounwind strictfp {
 define double @sitofp_aext_i32_to_f64(i32 %a) nounwind strictfp {
 ; RV64ID-LABEL: sitofp_aext_i32_to_f64:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fcvt.d.w ft0, a0
-; RV64ID-NEXT:    fmv.x.d a0, ft0
+; RV64ID-NEXT:    fcvt.d.w fa0, a0
 ; RV64ID-NEXT:    ret
   %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
   ret double %1
@@ -116,8 +106,7 @@ declare double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata, m
 define double @sitofp_sext_i32_to_f64(i32 signext %a) nounwind strictfp {
 ; RV64ID-LABEL: sitofp_sext_i32_to_f64:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fcvt.d.w ft0, a0
-; RV64ID-NEXT:    fmv.x.d a0, ft0
+; RV64ID-NEXT:    fcvt.d.w fa0, a0
 ; RV64ID-NEXT:    ret
   %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
   ret double %1
@@ -126,8 +115,7 @@ define double @sitofp_sext_i32_to_f64(i32 signext %a) nounwind strictfp {
 define double @sitofp_zext_i32_to_f64(i32 zeroext %a) nounwind strictfp {
 ; RV64ID-LABEL: sitofp_zext_i32_to_f64:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fcvt.d.w ft0, a0
-; RV64ID-NEXT:    fmv.x.d a0, ft0
+; RV64ID-NEXT:    fcvt.d.w fa0, a0
 ; RV64ID-NEXT:    ret
   %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
   ret double %1

diff  --git a/llvm/test/CodeGen/RISCV/rv64d-double-convert.ll b/llvm/test/CodeGen/RISCV/rv64d-double-convert.ll
index b961e049bbc10..ed764fd6f641a 100644
--- a/llvm/test/CodeGen/RISCV/rv64d-double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/rv64d-double-convert.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefix=RV64ID
+; RUN:   -target-abi=lp64d | FileCheck %s -check-prefix=RV64ID
 
 ; This file exhaustively checks double<->i32 conversions. In general,
 ; fcvt.l[u].d can be selected instead of fcvt.w[u].d because poison is
@@ -10,8 +10,7 @@
 define i32 @aext_fptosi(double %a) nounwind {
 ; RV64ID-LABEL: aext_fptosi:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fmv.d.x ft0, a0
-; RV64ID-NEXT:    fcvt.w.d a0, ft0, rtz
+; RV64ID-NEXT:    fcvt.w.d a0, fa0, rtz
 ; RV64ID-NEXT:    ret
   %1 = fptosi double %a to i32
   ret i32 %1
@@ -20,8 +19,7 @@ define i32 @aext_fptosi(double %a) nounwind {
 define signext i32 @sext_fptosi(double %a) nounwind {
 ; RV64ID-LABEL: sext_fptosi:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fmv.d.x ft0, a0
-; RV64ID-NEXT:    fcvt.w.d a0, ft0, rtz
+; RV64ID-NEXT:    fcvt.w.d a0, fa0, rtz
 ; RV64ID-NEXT:    ret
   %1 = fptosi double %a to i32
   ret i32 %1
@@ -30,8 +28,7 @@ define signext i32 @sext_fptosi(double %a) nounwind {
 define zeroext i32 @zext_fptosi(double %a) nounwind {
 ; RV64ID-LABEL: zext_fptosi:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fmv.d.x ft0, a0
-; RV64ID-NEXT:    fcvt.w.d a0, ft0, rtz
+; RV64ID-NEXT:    fcvt.w.d a0, fa0, rtz
 ; RV64ID-NEXT:    slli a0, a0, 32
 ; RV64ID-NEXT:    srli a0, a0, 32
 ; RV64ID-NEXT:    ret
@@ -42,8 +39,7 @@ define zeroext i32 @zext_fptosi(double %a) nounwind {
 define i32 @aext_fptoui(double %a) nounwind {
 ; RV64ID-LABEL: aext_fptoui:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fmv.d.x ft0, a0
-; RV64ID-NEXT:    fcvt.wu.d a0, ft0, rtz
+; RV64ID-NEXT:    fcvt.wu.d a0, fa0, rtz
 ; RV64ID-NEXT:    ret
   %1 = fptoui double %a to i32
   ret i32 %1
@@ -52,8 +48,7 @@ define i32 @aext_fptoui(double %a) nounwind {
 define signext i32 @sext_fptoui(double %a) nounwind {
 ; RV64ID-LABEL: sext_fptoui:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fmv.d.x ft0, a0
-; RV64ID-NEXT:    fcvt.wu.d a0, ft0, rtz
+; RV64ID-NEXT:    fcvt.wu.d a0, fa0, rtz
 ; RV64ID-NEXT:    ret
   %1 = fptoui double %a to i32
   ret i32 %1
@@ -62,8 +57,7 @@ define signext i32 @sext_fptoui(double %a) nounwind {
 define zeroext i32 @zext_fptoui(double %a) nounwind {
 ; RV64ID-LABEL: zext_fptoui:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fmv.d.x ft0, a0
-; RV64ID-NEXT:    fcvt.lu.d a0, ft0, rtz
+; RV64ID-NEXT:    fcvt.lu.d a0, fa0, rtz
 ; RV64ID-NEXT:    ret
   %1 = fptoui double %a to i32
   ret i32 %1
@@ -72,8 +66,7 @@ define zeroext i32 @zext_fptoui(double %a) nounwind {
 define double @uitofp_aext_i32_to_f64(i32 %a) nounwind {
 ; RV64ID-LABEL: uitofp_aext_i32_to_f64:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fcvt.d.wu ft0, a0
-; RV64ID-NEXT:    fmv.x.d a0, ft0
+; RV64ID-NEXT:    fcvt.d.wu fa0, a0
 ; RV64ID-NEXT:    ret
   %1 = uitofp i32 %a to double
   ret double %1
@@ -82,8 +75,7 @@ define double @uitofp_aext_i32_to_f64(i32 %a) nounwind {
 define double @uitofp_sext_i32_to_f64(i32 signext %a) nounwind {
 ; RV64ID-LABEL: uitofp_sext_i32_to_f64:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fcvt.d.wu ft0, a0
-; RV64ID-NEXT:    fmv.x.d a0, ft0
+; RV64ID-NEXT:    fcvt.d.wu fa0, a0
 ; RV64ID-NEXT:    ret
   %1 = uitofp i32 %a to double
   ret double %1
@@ -92,8 +84,7 @@ define double @uitofp_sext_i32_to_f64(i32 signext %a) nounwind {
 define double @uitofp_zext_i32_to_f64(i32 zeroext %a) nounwind {
 ; RV64ID-LABEL: uitofp_zext_i32_to_f64:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fcvt.d.wu ft0, a0
-; RV64ID-NEXT:    fmv.x.d a0, ft0
+; RV64ID-NEXT:    fcvt.d.wu fa0, a0
 ; RV64ID-NEXT:    ret
   %1 = uitofp i32 %a to double
   ret double %1
@@ -102,8 +93,7 @@ define double @uitofp_zext_i32_to_f64(i32 zeroext %a) nounwind {
 define double @sitofp_aext_i32_to_f64(i32 %a) nounwind {
 ; RV64ID-LABEL: sitofp_aext_i32_to_f64:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fcvt.d.w ft0, a0
-; RV64ID-NEXT:    fmv.x.d a0, ft0
+; RV64ID-NEXT:    fcvt.d.w fa0, a0
 ; RV64ID-NEXT:    ret
   %1 = sitofp i32 %a to double
   ret double %1
@@ -112,8 +102,7 @@ define double @sitofp_aext_i32_to_f64(i32 %a) nounwind {
 define double @sitofp_sext_i32_to_f64(i32 signext %a) nounwind {
 ; RV64ID-LABEL: sitofp_sext_i32_to_f64:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fcvt.d.w ft0, a0
-; RV64ID-NEXT:    fmv.x.d a0, ft0
+; RV64ID-NEXT:    fcvt.d.w fa0, a0
 ; RV64ID-NEXT:    ret
   %1 = sitofp i32 %a to double
   ret double %1
@@ -122,8 +111,7 @@ define double @sitofp_sext_i32_to_f64(i32 signext %a) nounwind {
 define double @sitofp_zext_i32_to_f64(i32 zeroext %a) nounwind {
 ; RV64ID-LABEL: sitofp_zext_i32_to_f64:
 ; RV64ID:       # %bb.0:
-; RV64ID-NEXT:    fcvt.d.w ft0, a0
-; RV64ID-NEXT:    fmv.x.d a0, ft0
+; RV64ID-NEXT:    fcvt.d.w fa0, a0
 ; RV64ID-NEXT:    ret
   %1 = sitofp i32 %a to double
   ret double %1

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll
index bc829dd463404..1097c6db699af 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll
@@ -1,6 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV32-FP
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV64-FP
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
+; RUN:   -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32-FP
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
+; RUN:   -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64-FP
 
 define i16 @bitcast_v1f16_i16(<1 x half> %a) {
 ; CHECK-LABEL: bitcast_v1f16_i16:
@@ -16,8 +20,7 @@ define half @bitcast_v1f16_f16(<1 x half> %a) {
 ; CHECK-LABEL: bitcast_v1f16_f16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmv.f.s ft0, v8
-; CHECK-NEXT:    fmv.x.h a0, ft0
+; CHECK-NEXT:    vfmv.f.s fa0, v8
 ; CHECK-NEXT:    ret
   %b = bitcast <1 x half> %a to half
   ret half %b
@@ -44,35 +47,21 @@ define i32 @bitcast_v1f32_i32(<1 x float> %a) {
 }
 
 define float @bitcast_v2f16_f32(<2 x half> %a) {
-; RV32-FP-LABEL: bitcast_v2f16_f32:
-; RV32-FP:       # %bb.0:
-; RV32-FP-NEXT:    vsetivli zero, 0, e32, mf2, ta, mu
-; RV32-FP-NEXT:    vmv.x.s a0, v8
-; RV32-FP-NEXT:    ret
-;
-; RV64-FP-LABEL: bitcast_v2f16_f32:
-; RV64-FP:       # %bb.0:
-; RV64-FP-NEXT:    vsetivli zero, 0, e32, mf2, ta, mu
-; RV64-FP-NEXT:    vfmv.f.s ft0, v8
-; RV64-FP-NEXT:    fmv.x.w a0, ft0
-; RV64-FP-NEXT:    ret
+; CHECK-LABEL: bitcast_v2f16_f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmv.f.s fa0, v8
+; CHECK-NEXT:    ret
   %b = bitcast <2 x half> %a to float
   ret float %b
 }
 
 define float @bitcast_v1f32_f32(<1 x float> %a) {
-; RV32-FP-LABEL: bitcast_v1f32_f32:
-; RV32-FP:       # %bb.0:
-; RV32-FP-NEXT:    vsetivli zero, 0, e32, mf2, ta, mu
-; RV32-FP-NEXT:    vmv.x.s a0, v8
-; RV32-FP-NEXT:    ret
-;
-; RV64-FP-LABEL: bitcast_v1f32_f32:
-; RV64-FP:       # %bb.0:
-; RV64-FP-NEXT:    vsetivli zero, 0, e32, mf2, ta, mu
-; RV64-FP-NEXT:    vfmv.f.s ft0, v8
-; RV64-FP-NEXT:    fmv.x.w a0, ft0
-; RV64-FP-NEXT:    ret
+; CHECK-LABEL: bitcast_v1f32_f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmv.f.s fa0, v8
+; CHECK-NEXT:    ret
   %b = bitcast <1 x float> %a to float
   ret float %b
 }
@@ -135,67 +124,31 @@ define i64 @bitcast_v1f64_i64(<1 x double> %a) {
 }
 
 define double @bitcast_v4f16_f64(<4 x half> %a) {
-; RV32-FP-LABEL: bitcast_v4f16_f64:
-; RV32-FP:       # %bb.0:
-; RV32-FP-NEXT:    addi sp, sp, -16
-; RV32-FP-NEXT:    .cfi_def_cfa_offset 16
-; RV32-FP-NEXT:    vsetivli zero, 0, e64, m1, ta, mu
-; RV32-FP-NEXT:    vfmv.f.s ft0, v8
-; RV32-FP-NEXT:    fsd ft0, 8(sp)
-; RV32-FP-NEXT:    lw a0, 8(sp)
-; RV32-FP-NEXT:    lw a1, 12(sp)
-; RV32-FP-NEXT:    addi sp, sp, 16
-; RV32-FP-NEXT:    ret
-;
-; RV64-FP-LABEL: bitcast_v4f16_f64:
-; RV64-FP:       # %bb.0:
-; RV64-FP-NEXT:    vsetivli zero, 0, e64, m1, ta, mu
-; RV64-FP-NEXT:    vmv.x.s a0, v8
-; RV64-FP-NEXT:    ret
+; CHECK-LABEL: bitcast_v4f16_f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 0, e64, m1, ta, mu
+; CHECK-NEXT:    vfmv.f.s fa0, v8
+; CHECK-NEXT:    ret
   %b = bitcast <4 x half> %a to double
   ret double %b
 }
 
 define double @bitcast_v2f32_f64(<2 x float> %a) {
-; RV32-FP-LABEL: bitcast_v2f32_f64:
-; RV32-FP:       # %bb.0:
-; RV32-FP-NEXT:    addi sp, sp, -16
-; RV32-FP-NEXT:    .cfi_def_cfa_offset 16
-; RV32-FP-NEXT:    vsetivli zero, 0, e64, m1, ta, mu
-; RV32-FP-NEXT:    vfmv.f.s ft0, v8
-; RV32-FP-NEXT:    fsd ft0, 8(sp)
-; RV32-FP-NEXT:    lw a0, 8(sp)
-; RV32-FP-NEXT:    lw a1, 12(sp)
-; RV32-FP-NEXT:    addi sp, sp, 16
-; RV32-FP-NEXT:    ret
-;
-; RV64-FP-LABEL: bitcast_v2f32_f64:
-; RV64-FP:       # %bb.0:
-; RV64-FP-NEXT:    vsetivli zero, 0, e64, m1, ta, mu
-; RV64-FP-NEXT:    vmv.x.s a0, v8
-; RV64-FP-NEXT:    ret
+; CHECK-LABEL: bitcast_v2f32_f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 0, e64, m1, ta, mu
+; CHECK-NEXT:    vfmv.f.s fa0, v8
+; CHECK-NEXT:    ret
   %b = bitcast <2 x float> %a to double
   ret double %b
 }
 
 define double @bitcast_v1f64_f64(<1 x double> %a) {
-; RV32-FP-LABEL: bitcast_v1f64_f64:
-; RV32-FP:       # %bb.0:
-; RV32-FP-NEXT:    addi sp, sp, -16
-; RV32-FP-NEXT:    .cfi_def_cfa_offset 16
-; RV32-FP-NEXT:    vsetivli zero, 0, e64, m1, ta, mu
-; RV32-FP-NEXT:    vfmv.f.s ft0, v8
-; RV32-FP-NEXT:    fsd ft0, 8(sp)
-; RV32-FP-NEXT:    lw a0, 8(sp)
-; RV32-FP-NEXT:    lw a1, 12(sp)
-; RV32-FP-NEXT:    addi sp, sp, 16
-; RV32-FP-NEXT:    ret
-;
-; RV64-FP-LABEL: bitcast_v1f64_f64:
-; RV64-FP:       # %bb.0:
-; RV64-FP-NEXT:    vsetivli zero, 0, e64, m1, ta, mu
-; RV64-FP-NEXT:    vmv.x.s a0, v8
-; RV64-FP-NEXT:    ret
+; CHECK-LABEL: bitcast_v1f64_f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 0, e64, m1, ta, mu
+; CHECK-NEXT:    vfmv.f.s fa0, v8
+; CHECK-NEXT:    ret
   %b = bitcast <1 x double> %a to double
   ret double %b
 }
@@ -305,9 +258,8 @@ define <1 x double> @bitcast_i64_v1f64(i64 %a) {
 define <1 x i16> @bitcast_f16_v1i16(half %a) {
 ; CHECK-LABEL: bitcast_f16_v1i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.h.x ft0, a0
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmv.s.f v8, ft0
+; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
   %b = bitcast half %a to <1 x i16>
   ret <1 x i16> %b
@@ -316,210 +268,109 @@ define <1 x i16> @bitcast_f16_v1i16(half %a) {
 define <1 x half> @bitcast_f16_v1f16(half %a) {
 ; CHECK-LABEL: bitcast_f16_v1f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmv.h.x ft0, a0
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmv.s.f v8, ft0
+; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
   %b = bitcast half %a to <1 x half>
   ret <1 x half> %b
 }
 
 define <2 x i16> @bitcast_f32_v2i16(float %a) {
-; RV32-FP-LABEL: bitcast_f32_v2i16:
-; RV32-FP:       # %bb.0:
-; RV32-FP-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
-; RV32-FP-NEXT:    vmv.s.x v8, a0
-; RV32-FP-NEXT:    ret
-;
-; RV64-FP-LABEL: bitcast_f32_v2i16:
-; RV64-FP:       # %bb.0:
-; RV64-FP-NEXT:    fmv.w.x ft0, a0
-; RV64-FP-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
-; RV64-FP-NEXT:    vfmv.s.f v8, ft0
-; RV64-FP-NEXT:    ret
+; CHECK-LABEL: bitcast_f32_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmv.s.f v8, fa0
+; CHECK-NEXT:    ret
   %b = bitcast float %a to <2 x i16>
   ret <2 x i16> %b
 }
 
 define <2 x half> @bitcast_f32_v2f16(float %a) {
-; RV32-FP-LABEL: bitcast_f32_v2f16:
-; RV32-FP:       # %bb.0:
-; RV32-FP-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
-; RV32-FP-NEXT:    vmv.s.x v8, a0
-; RV32-FP-NEXT:    ret
-;
-; RV64-FP-LABEL: bitcast_f32_v2f16:
-; RV64-FP:       # %bb.0:
-; RV64-FP-NEXT:    fmv.w.x ft0, a0
-; RV64-FP-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
-; RV64-FP-NEXT:    vfmv.s.f v8, ft0
-; RV64-FP-NEXT:    ret
+; CHECK-LABEL: bitcast_f32_v2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmv.s.f v8, fa0
+; CHECK-NEXT:    ret
   %b = bitcast float %a to <2 x half>
   ret <2 x half> %b
 }
 
 define <1 x i32> @bitcast_f32_v1i32(float %a) {
-; RV32-FP-LABEL: bitcast_f32_v1i32:
-; RV32-FP:       # %bb.0:
-; RV32-FP-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
-; RV32-FP-NEXT:    vmv.s.x v8, a0
-; RV32-FP-NEXT:    ret
-;
-; RV64-FP-LABEL: bitcast_f32_v1i32:
-; RV64-FP:       # %bb.0:
-; RV64-FP-NEXT:    fmv.w.x ft0, a0
-; RV64-FP-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
-; RV64-FP-NEXT:    vfmv.s.f v8, ft0
-; RV64-FP-NEXT:    ret
+; CHECK-LABEL: bitcast_f32_v1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmv.s.f v8, fa0
+; CHECK-NEXT:    ret
   %b = bitcast float %a to <1 x i32>
   ret <1 x i32> %b
 }
 
 define <1 x float> @bitcast_f32_v1f32(float %a) {
-; RV32-FP-LABEL: bitcast_f32_v1f32:
-; RV32-FP:       # %bb.0:
-; RV32-FP-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
-; RV32-FP-NEXT:    vmv.s.x v8, a0
-; RV32-FP-NEXT:    ret
-;
-; RV64-FP-LABEL: bitcast_f32_v1f32:
-; RV64-FP:       # %bb.0:
-; RV64-FP-NEXT:    fmv.w.x ft0, a0
-; RV64-FP-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
-; RV64-FP-NEXT:    vfmv.s.f v8, ft0
-; RV64-FP-NEXT:    ret
+; CHECK-LABEL: bitcast_f32_v1f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmv.s.f v8, fa0
+; CHECK-NEXT:    ret
   %b = bitcast float %a to <1 x float>
   ret <1 x float> %b
 }
 
 define <4 x i16> @bitcast_f64_v4i16(double %a) {
-; RV32-FP-LABEL: bitcast_f64_v4i16:
-; RV32-FP:       # %bb.0:
-; RV32-FP-NEXT:    addi sp, sp, -16
-; RV32-FP-NEXT:    .cfi_def_cfa_offset 16
-; RV32-FP-NEXT:    sw a0, 8(sp)
-; RV32-FP-NEXT:    sw a1, 12(sp)
-; RV32-FP-NEXT:    fld ft0, 8(sp)
-; RV32-FP-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; RV32-FP-NEXT:    vfmv.s.f v8, ft0
-; RV32-FP-NEXT:    addi sp, sp, 16
-; RV32-FP-NEXT:    ret
-;
-; RV64-FP-LABEL: bitcast_f64_v4i16:
-; RV64-FP:       # %bb.0:
-; RV64-FP-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; RV64-FP-NEXT:    vmv.s.x v8, a0
-; RV64-FP-NEXT:    ret
+; CHECK-LABEL: bitcast_f64_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
+; CHECK-NEXT:    vfmv.s.f v8, fa0
+; CHECK-NEXT:    ret
   %b = bitcast double %a to <4 x i16>
   ret <4 x i16> %b
 }
 
 define <4 x half> @bitcast_f64_v4f16(double %a) {
-; RV32-FP-LABEL: bitcast_f64_v4f16:
-; RV32-FP:       # %bb.0:
-; RV32-FP-NEXT:    addi sp, sp, -16
-; RV32-FP-NEXT:    .cfi_def_cfa_offset 16
-; RV32-FP-NEXT:    sw a0, 8(sp)
-; RV32-FP-NEXT:    sw a1, 12(sp)
-; RV32-FP-NEXT:    fld ft0, 8(sp)
-; RV32-FP-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; RV32-FP-NEXT:    vfmv.s.f v8, ft0
-; RV32-FP-NEXT:    addi sp, sp, 16
-; RV32-FP-NEXT:    ret
-;
-; RV64-FP-LABEL: bitcast_f64_v4f16:
-; RV64-FP:       # %bb.0:
-; RV64-FP-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; RV64-FP-NEXT:    vmv.s.x v8, a0
-; RV64-FP-NEXT:    ret
+; CHECK-LABEL: bitcast_f64_v4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
+; CHECK-NEXT:    vfmv.s.f v8, fa0
+; CHECK-NEXT:    ret
   %b = bitcast double %a to <4 x half>
   ret <4 x half> %b
 }
 
 define <2 x i32> @bitcast_f64_v2i32(double %a) {
-; RV32-FP-LABEL: bitcast_f64_v2i32:
-; RV32-FP:       # %bb.0:
-; RV32-FP-NEXT:    addi sp, sp, -16
-; RV32-FP-NEXT:    .cfi_def_cfa_offset 16
-; RV32-FP-NEXT:    sw a0, 8(sp)
-; RV32-FP-NEXT:    sw a1, 12(sp)
-; RV32-FP-NEXT:    fld ft0, 8(sp)
-; RV32-FP-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; RV32-FP-NEXT:    vfmv.s.f v8, ft0
-; RV32-FP-NEXT:    addi sp, sp, 16
-; RV32-FP-NEXT:    ret
-;
-; RV64-FP-LABEL: bitcast_f64_v2i32:
-; RV64-FP:       # %bb.0:
-; RV64-FP-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; RV64-FP-NEXT:    vmv.s.x v8, a0
-; RV64-FP-NEXT:    ret
+; CHECK-LABEL: bitcast_f64_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
+; CHECK-NEXT:    vfmv.s.f v8, fa0
+; CHECK-NEXT:    ret
   %b = bitcast double %a to <2 x i32>
   ret <2 x i32> %b
 }
 
 define <2 x float> @bitcast_f64_v2f32(double %a) {
-; RV32-FP-LABEL: bitcast_f64_v2f32:
-; RV32-FP:       # %bb.0:
-; RV32-FP-NEXT:    addi sp, sp, -16
-; RV32-FP-NEXT:    .cfi_def_cfa_offset 16
-; RV32-FP-NEXT:    sw a0, 8(sp)
-; RV32-FP-NEXT:    sw a1, 12(sp)
-; RV32-FP-NEXT:    fld ft0, 8(sp)
-; RV32-FP-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; RV32-FP-NEXT:    vfmv.s.f v8, ft0
-; RV32-FP-NEXT:    addi sp, sp, 16
-; RV32-FP-NEXT:    ret
-;
-; RV64-FP-LABEL: bitcast_f64_v2f32:
-; RV64-FP:       # %bb.0:
-; RV64-FP-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; RV64-FP-NEXT:    vmv.s.x v8, a0
-; RV64-FP-NEXT:    ret
+; CHECK-LABEL: bitcast_f64_v2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
+; CHECK-NEXT:    vfmv.s.f v8, fa0
+; CHECK-NEXT:    ret
   %b = bitcast double %a to <2 x float>
   ret <2 x float> %b
 }
 
 define <1 x i64> @bitcast_f64_v1i64(double %a) {
-; RV32-FP-LABEL: bitcast_f64_v1i64:
-; RV32-FP:       # %bb.0:
-; RV32-FP-NEXT:    addi sp, sp, -16
-; RV32-FP-NEXT:    .cfi_def_cfa_offset 16
-; RV32-FP-NEXT:    sw a0, 8(sp)
-; RV32-FP-NEXT:    sw a1, 12(sp)
-; RV32-FP-NEXT:    fld ft0, 8(sp)
-; RV32-FP-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; RV32-FP-NEXT:    vfmv.s.f v8, ft0
-; RV32-FP-NEXT:    addi sp, sp, 16
-; RV32-FP-NEXT:    ret
-;
-; RV64-FP-LABEL: bitcast_f64_v1i64:
-; RV64-FP:       # %bb.0:
-; RV64-FP-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; RV64-FP-NEXT:    vmv.s.x v8, a0
-; RV64-FP-NEXT:    ret
+; CHECK-LABEL: bitcast_f64_v1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
+; CHECK-NEXT:    vfmv.s.f v8, fa0
+; CHECK-NEXT:    ret
   %b = bitcast double %a to <1 x i64>
   ret <1 x i64> %b
 }
 
 define <1 x double> @bitcast_f64_v1f64(double %a) {
-; RV32-FP-LABEL: bitcast_f64_v1f64:
-; RV32-FP:       # %bb.0:
-; RV32-FP-NEXT:    addi sp, sp, -16
-; RV32-FP-NEXT:    .cfi_def_cfa_offset 16
-; RV32-FP-NEXT:    sw a0, 8(sp)
-; RV32-FP-NEXT:    sw a1, 12(sp)
-; RV32-FP-NEXT:    fld ft0, 8(sp)
-; RV32-FP-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; RV32-FP-NEXT:    vfmv.s.f v8, ft0
-; RV32-FP-NEXT:    addi sp, sp, 16
-; RV32-FP-NEXT:    ret
-;
-; RV64-FP-LABEL: bitcast_f64_v1f64:
-; RV64-FP:       # %bb.0:
-; RV64-FP-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
-; RV64-FP-NEXT:    vmv.s.x v8, a0
-; RV64-FP-NEXT:    ret
+; CHECK-LABEL: bitcast_f64_v1f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, mu
+; CHECK-NEXT:    vfmv.s.f v8, fa0
+; CHECK-NEXT:    ret
   %b = bitcast double %a to <1 x double>
   ret <1 x double> %b
 }

diff  --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index dd318c9792465..6dcb7f7a6b793 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+f \
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+f -target-abi=lp64f \
 ; RUN:     -riscv-v-vector-bits-min=128 | FileCheck %s
 
 define void @sink_splat_mul(i32* nocapture %a, i32 signext %x) {
@@ -1295,13 +1295,12 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_fmul(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_fmul:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a1
 ; CHECK-NEXT:    li a1, 1024
 ; CHECK-NEXT:  .LBB20_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    addi a1, a1, -4
 ; CHECK-NEXT:    addi a0, a0, 16
@@ -1332,13 +1331,12 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_fdiv(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_fdiv:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a1
 ; CHECK-NEXT:    li a1, 1024
 ; CHECK-NEXT:  .LBB21_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    addi a1, a1, -4
 ; CHECK-NEXT:    addi a0, a0, 16
@@ -1369,13 +1367,12 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_frdiv(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_frdiv:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a1
 ; CHECK-NEXT:    li a1, 1024
 ; CHECK-NEXT:  .LBB22_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    addi a1, a1, -4
 ; CHECK-NEXT:    addi a0, a0, 16
@@ -1406,13 +1403,12 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_fadd(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_fadd:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a1
 ; CHECK-NEXT:    li a1, 1024
 ; CHECK-NEXT:  .LBB23_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    addi a1, a1, -4
 ; CHECK-NEXT:    addi a0, a0, 16
@@ -1443,13 +1439,12 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_fsub(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_fsub:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a1
 ; CHECK-NEXT:    li a1, 1024
 ; CHECK-NEXT:  .LBB24_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    addi a1, a1, -4
 ; CHECK-NEXT:    addi a0, a0, 16
@@ -1480,13 +1475,12 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_frsub(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_frsub:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a1
 ; CHECK-NEXT:    li a1, 1024
 ; CHECK-NEXT:  .LBB25_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    addi a1, a1, -4
 ; CHECK-NEXT:    addi a0, a0, 16
@@ -1517,43 +1511,42 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_fmul_scalable(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_fmul_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a3, a2, 2
-; CHECK-NEXT:    li a6, 1024
-; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    bgeu a6, a3, .LBB26_2
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    srli a2, a1, 2
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:    bgeu a3, a2, .LBB26_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB26_5
 ; CHECK-NEXT:  .LBB26_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a4, a6, a3
-; CHECK-NEXT:    sub a1, a6, a4
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
 ; CHECK-NEXT:    mv a6, a0
 ; CHECK-NEXT:  .LBB26_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vl1re32.v v8, (a6)
 ; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    vs1r.v v8, (a6)
-; CHECK-NEXT:    add a5, a5, a3
-; CHECK-NEXT:    add a6, a6, a2
-; CHECK-NEXT:    bne a5, a1, .LBB26_3
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a6, a6, a1
+; CHECK-NEXT:    bne a5, a3, .LBB26_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB26_7
 ; CHECK-NEXT:  .LBB26_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, a1, -1024
-; CHECK-NEXT:    slli a1, a1, 2
-; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    addi a1, a3, -1024
+; CHECK-NEXT:    slli a2, a3, 2
+; CHECK-NEXT:    add a0, a0, a2
 ; CHECK-NEXT:  .LBB26_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    flw ft1, 0(a0)
-; CHECK-NEXT:    mv a1, a2
-; CHECK-NEXT:    fmul.s ft1, ft1, ft0
-; CHECK-NEXT:    fsw ft1, 0(a0)
-; CHECK-NEXT:    addi a2, a2, 1
+; CHECK-NEXT:    flw ft0, 0(a0)
+; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    fmul.s ft0, ft0, fa0
+; CHECK-NEXT:    fsw ft0, 0(a0)
+; CHECK-NEXT:    addi a1, a1, 1
 ; CHECK-NEXT:    addi a0, a0, 4
-; CHECK-NEXT:    bgeu a2, a1, .LBB26_6
+; CHECK-NEXT:    bgeu a1, a2, .LBB26_6
 ; CHECK-NEXT:  .LBB26_7: # %for.cond.cleanup
 ; CHECK-NEXT:    ret
 entry:
@@ -1610,43 +1603,42 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_fdiv_scalable(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_fdiv_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a3, a2, 2
-; CHECK-NEXT:    li a6, 1024
-; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    bgeu a6, a3, .LBB27_2
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    srli a2, a1, 2
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:    bgeu a3, a2, .LBB27_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB27_5
 ; CHECK-NEXT:  .LBB27_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a4, a6, a3
-; CHECK-NEXT:    sub a1, a6, a4
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
 ; CHECK-NEXT:    mv a6, a0
 ; CHECK-NEXT:  .LBB27_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vl1re32.v v8, (a6)
 ; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    vs1r.v v8, (a6)
-; CHECK-NEXT:    add a5, a5, a3
-; CHECK-NEXT:    add a6, a6, a2
-; CHECK-NEXT:    bne a5, a1, .LBB27_3
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a6, a6, a1
+; CHECK-NEXT:    bne a5, a3, .LBB27_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB27_7
 ; CHECK-NEXT:  .LBB27_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, a1, -1024
-; CHECK-NEXT:    slli a1, a1, 2
-; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    addi a1, a3, -1024
+; CHECK-NEXT:    slli a2, a3, 2
+; CHECK-NEXT:    add a0, a0, a2
 ; CHECK-NEXT:  .LBB27_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    flw ft1, 0(a0)
-; CHECK-NEXT:    mv a1, a2
-; CHECK-NEXT:    fdiv.s ft1, ft1, ft0
-; CHECK-NEXT:    fsw ft1, 0(a0)
-; CHECK-NEXT:    addi a2, a2, 1
+; CHECK-NEXT:    flw ft0, 0(a0)
+; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    fdiv.s ft0, ft0, fa0
+; CHECK-NEXT:    fsw ft0, 0(a0)
+; CHECK-NEXT:    addi a1, a1, 1
 ; CHECK-NEXT:    addi a0, a0, 4
-; CHECK-NEXT:    bgeu a2, a1, .LBB27_6
+; CHECK-NEXT:    bgeu a1, a2, .LBB27_6
 ; CHECK-NEXT:  .LBB27_7: # %for.cond.cleanup
 ; CHECK-NEXT:    ret
 entry:
@@ -1703,43 +1695,42 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_frdiv_scalable(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_frdiv_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a3, a2, 2
-; CHECK-NEXT:    li a6, 1024
-; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    bgeu a6, a3, .LBB28_2
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    srli a2, a1, 2
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:    bgeu a3, a2, .LBB28_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB28_5
 ; CHECK-NEXT:  .LBB28_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a4, a6, a3
-; CHECK-NEXT:    sub a1, a6, a4
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
 ; CHECK-NEXT:    mv a6, a0
 ; CHECK-NEXT:  .LBB28_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vl1re32.v v8, (a6)
 ; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    vs1r.v v8, (a6)
-; CHECK-NEXT:    add a5, a5, a3
-; CHECK-NEXT:    add a6, a6, a2
-; CHECK-NEXT:    bne a5, a1, .LBB28_3
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a6, a6, a1
+; CHECK-NEXT:    bne a5, a3, .LBB28_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB28_7
 ; CHECK-NEXT:  .LBB28_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, a1, -1024
-; CHECK-NEXT:    slli a1, a1, 2
-; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    addi a1, a3, -1024
+; CHECK-NEXT:    slli a2, a3, 2
+; CHECK-NEXT:    add a0, a0, a2
 ; CHECK-NEXT:  .LBB28_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    flw ft1, 0(a0)
-; CHECK-NEXT:    mv a1, a2
-; CHECK-NEXT:    fdiv.s ft1, ft0, ft1
-; CHECK-NEXT:    fsw ft1, 0(a0)
-; CHECK-NEXT:    addi a2, a2, 1
+; CHECK-NEXT:    flw ft0, 0(a0)
+; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    fdiv.s ft0, fa0, ft0
+; CHECK-NEXT:    fsw ft0, 0(a0)
+; CHECK-NEXT:    addi a1, a1, 1
 ; CHECK-NEXT:    addi a0, a0, 4
-; CHECK-NEXT:    bgeu a2, a1, .LBB28_6
+; CHECK-NEXT:    bgeu a1, a2, .LBB28_6
 ; CHECK-NEXT:  .LBB28_7: # %for.cond.cleanup
 ; CHECK-NEXT:    ret
 entry:
@@ -1796,43 +1787,42 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_fadd_scalable(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_fadd_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a3, a2, 2
-; CHECK-NEXT:    li a6, 1024
-; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    bgeu a6, a3, .LBB29_2
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    srli a2, a1, 2
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:    bgeu a3, a2, .LBB29_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB29_5
 ; CHECK-NEXT:  .LBB29_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a4, a6, a3
-; CHECK-NEXT:    sub a1, a6, a4
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
 ; CHECK-NEXT:    mv a6, a0
 ; CHECK-NEXT:  .LBB29_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vl1re32.v v8, (a6)
 ; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    vs1r.v v8, (a6)
-; CHECK-NEXT:    add a5, a5, a3
-; CHECK-NEXT:    add a6, a6, a2
-; CHECK-NEXT:    bne a5, a1, .LBB29_3
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a6, a6, a1
+; CHECK-NEXT:    bne a5, a3, .LBB29_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB29_7
 ; CHECK-NEXT:  .LBB29_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, a1, -1024
-; CHECK-NEXT:    slli a1, a1, 2
-; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    addi a1, a3, -1024
+; CHECK-NEXT:    slli a2, a3, 2
+; CHECK-NEXT:    add a0, a0, a2
 ; CHECK-NEXT:  .LBB29_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    flw ft1, 0(a0)
-; CHECK-NEXT:    mv a1, a2
-; CHECK-NEXT:    fadd.s ft1, ft1, ft0
-; CHECK-NEXT:    fsw ft1, 0(a0)
-; CHECK-NEXT:    addi a2, a2, 1
+; CHECK-NEXT:    flw ft0, 0(a0)
+; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    fadd.s ft0, ft0, fa0
+; CHECK-NEXT:    fsw ft0, 0(a0)
+; CHECK-NEXT:    addi a1, a1, 1
 ; CHECK-NEXT:    addi a0, a0, 4
-; CHECK-NEXT:    bgeu a2, a1, .LBB29_6
+; CHECK-NEXT:    bgeu a1, a2, .LBB29_6
 ; CHECK-NEXT:  .LBB29_7: # %for.cond.cleanup
 ; CHECK-NEXT:    ret
 entry:
@@ -1889,43 +1879,42 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_fsub_scalable(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_fsub_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a3, a2, 2
-; CHECK-NEXT:    li a6, 1024
-; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    bgeu a6, a3, .LBB30_2
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    srli a2, a1, 2
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:    bgeu a3, a2, .LBB30_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB30_5
 ; CHECK-NEXT:  .LBB30_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a4, a6, a3
-; CHECK-NEXT:    sub a1, a6, a4
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
 ; CHECK-NEXT:    mv a6, a0
 ; CHECK-NEXT:  .LBB30_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vl1re32.v v8, (a6)
 ; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    vs1r.v v8, (a6)
-; CHECK-NEXT:    add a5, a5, a3
-; CHECK-NEXT:    add a6, a6, a2
-; CHECK-NEXT:    bne a5, a1, .LBB30_3
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a6, a6, a1
+; CHECK-NEXT:    bne a5, a3, .LBB30_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB30_7
 ; CHECK-NEXT:  .LBB30_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, a1, -1024
-; CHECK-NEXT:    slli a1, a1, 2
-; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    addi a1, a3, -1024
+; CHECK-NEXT:    slli a2, a3, 2
+; CHECK-NEXT:    add a0, a0, a2
 ; CHECK-NEXT:  .LBB30_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    flw ft1, 0(a0)
-; CHECK-NEXT:    mv a1, a2
-; CHECK-NEXT:    fsub.s ft1, ft1, ft0
-; CHECK-NEXT:    fsw ft1, 0(a0)
-; CHECK-NEXT:    addi a2, a2, 1
+; CHECK-NEXT:    flw ft0, 0(a0)
+; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    fsub.s ft0, ft0, fa0
+; CHECK-NEXT:    fsw ft0, 0(a0)
+; CHECK-NEXT:    addi a1, a1, 1
 ; CHECK-NEXT:    addi a0, a0, 4
-; CHECK-NEXT:    bgeu a2, a1, .LBB30_6
+; CHECK-NEXT:    bgeu a1, a2, .LBB30_6
 ; CHECK-NEXT:  .LBB30_7: # %for.cond.cleanup
 ; CHECK-NEXT:    ret
 entry:
@@ -1982,43 +1971,42 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_frsub_scalable(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_frsub_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a3, a2, 2
-; CHECK-NEXT:    li a6, 1024
-; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    bgeu a6, a3, .LBB31_2
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    srli a2, a1, 2
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:    bgeu a3, a2, .LBB31_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a1, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB31_5
 ; CHECK-NEXT:  .LBB31_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a4, a6, a3
-; CHECK-NEXT:    sub a1, a6, a4
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
 ; CHECK-NEXT:    mv a6, a0
 ; CHECK-NEXT:  .LBB31_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vl1re32.v v8, (a6)
 ; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    vs1r.v v8, (a6)
-; CHECK-NEXT:    add a5, a5, a3
-; CHECK-NEXT:    add a6, a6, a2
-; CHECK-NEXT:    bne a5, a1, .LBB31_3
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a6, a6, a1
+; CHECK-NEXT:    bne a5, a3, .LBB31_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a4, .LBB31_7
 ; CHECK-NEXT:  .LBB31_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, a1, -1024
-; CHECK-NEXT:    slli a1, a1, 2
-; CHECK-NEXT:    add a0, a0, a1
+; CHECK-NEXT:    addi a1, a3, -1024
+; CHECK-NEXT:    slli a2, a3, 2
+; CHECK-NEXT:    add a0, a0, a2
 ; CHECK-NEXT:  .LBB31_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    flw ft1, 0(a0)
-; CHECK-NEXT:    mv a1, a2
-; CHECK-NEXT:    fsub.s ft1, ft0, ft1
-; CHECK-NEXT:    fsw ft1, 0(a0)
-; CHECK-NEXT:    addi a2, a2, 1
+; CHECK-NEXT:    flw ft0, 0(a0)
+; CHECK-NEXT:    mv a2, a1
+; CHECK-NEXT:    fsub.s ft0, fa0, ft0
+; CHECK-NEXT:    fsw ft0, 0(a0)
+; CHECK-NEXT:    addi a1, a1, 1
 ; CHECK-NEXT:    addi a0, a0, 4
-; CHECK-NEXT:    bgeu a2, a1, .LBB31_6
+; CHECK-NEXT:    bgeu a1, a2, .LBB31_6
 ; CHECK-NEXT:  .LBB31_7: # %for.cond.cleanup
 ; CHECK-NEXT:    ret
 entry:
@@ -2075,14 +2063,13 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_fma(float* noalias nocapture %a, float* nocapture readonly %b, float %x) {
 ; CHECK-LABEL: sink_splat_fma:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a2
 ; CHECK-NEXT:    li a2, 1024
 ; CHECK-NEXT:  .LBB32_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vfmacc.vf v9, ft0, v8
+; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
 ; CHECK-NEXT:    vse32.v v9, (a0)
 ; CHECK-NEXT:    addi a2, a2, -4
 ; CHECK-NEXT:    addi a1, a1, 16
@@ -2117,14 +2104,13 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_fma_commute(float* noalias nocapture %a, float* nocapture readonly %b, float %x) {
 ; CHECK-LABEL: sink_splat_fma_commute:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a2
 ; CHECK-NEXT:    li a2, 1024
 ; CHECK-NEXT:  .LBB33_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vfmacc.vf v9, ft0, v8
+; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
 ; CHECK-NEXT:    vse32.v v9, (a0)
 ; CHECK-NEXT:    addi a2, a2, -4
 ; CHECK-NEXT:    addi a1, a1, 16
@@ -2159,19 +2145,18 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_fma_scalable(float* noalias nocapture %a, float* noalias nocapture readonly %b, float %x) {
 ; CHECK-LABEL: sink_splat_fma_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    srli a4, a3, 2
-; CHECK-NEXT:    li t0, 1024
-; CHECK-NEXT:    fmv.w.x ft0, a2
-; CHECK-NEXT:    bgeu t0, a4, .LBB34_2
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a3, a2, 2
+; CHECK-NEXT:    li a4, 1024
+; CHECK-NEXT:    bgeu a4, a3, .LBB34_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a2, 0
+; CHECK-NEXT:    li a4, 0
 ; CHECK-NEXT:    j .LBB34_5
 ; CHECK-NEXT:  .LBB34_2: # %vector.ph
 ; CHECK-NEXT:    li a6, 0
 ; CHECK-NEXT:    li a7, 0
-; CHECK-NEXT:    remu a5, t0, a4
-; CHECK-NEXT:    sub a2, t0, a5
+; CHECK-NEXT:    remu a5, a4, a3
+; CHECK-NEXT:    sub a4, a4, a5
 ; CHECK-NEXT:  .LBB34_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    add t0, a0, a6
@@ -2179,29 +2164,29 @@ define void @sink_splat_fma_scalable(float* noalias nocapture %a, float* noalias
 ; CHECK-NEXT:    add t1, a1, a6
 ; CHECK-NEXT:    vl1re32.v v9, (t1)
 ; CHECK-NEXT:    vsetvli t1, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfmacc.vf v9, ft0, v8
+; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
 ; CHECK-NEXT:    vs1r.v v9, (t0)
-; CHECK-NEXT:    add a7, a7, a4
-; CHECK-NEXT:    add a6, a6, a3
-; CHECK-NEXT:    bne a7, a2, .LBB34_3
+; CHECK-NEXT:    add a7, a7, a3
+; CHECK-NEXT:    add a6, a6, a2
+; CHECK-NEXT:    bne a7, a4, .LBB34_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a5, .LBB34_7
 ; CHECK-NEXT:  .LBB34_5: # %for.body.preheader
-; CHECK-NEXT:    addi a3, a2, -1024
-; CHECK-NEXT:    slli a2, a2, 2
-; CHECK-NEXT:    add a1, a1, a2
-; CHECK-NEXT:    add a0, a0, a2
+; CHECK-NEXT:    addi a2, a4, -1024
+; CHECK-NEXT:    slli a3, a4, 2
+; CHECK-NEXT:    add a1, a1, a3
+; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:  .LBB34_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    flw ft1, 0(a0)
-; CHECK-NEXT:    flw ft2, 0(a1)
-; CHECK-NEXT:    mv a2, a3
-; CHECK-NEXT:    fmadd.s ft1, ft1, ft0, ft2
-; CHECK-NEXT:    fsw ft1, 0(a0)
-; CHECK-NEXT:    addi a3, a3, 1
+; CHECK-NEXT:    flw ft0, 0(a0)
+; CHECK-NEXT:    flw ft1, 0(a1)
+; CHECK-NEXT:    mv a3, a2
+; CHECK-NEXT:    fmadd.s ft0, ft0, fa0, ft1
+; CHECK-NEXT:    fsw ft0, 0(a0)
+; CHECK-NEXT:    addi a2, a2, 1
 ; CHECK-NEXT:    addi a1, a1, 4
 ; CHECK-NEXT:    addi a0, a0, 4
-; CHECK-NEXT:    bgeu a3, a2, .LBB34_6
+; CHECK-NEXT:    bgeu a2, a3, .LBB34_6
 ; CHECK-NEXT:  .LBB34_7: # %for.cond.cleanup
 ; CHECK-NEXT:    ret
 entry:
@@ -2263,19 +2248,18 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_fma_commute_scalable(float* noalias nocapture %a, float* noalias nocapture readonly %b, float %x) {
 ; CHECK-LABEL: sink_splat_fma_commute_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    srli a4, a3, 2
-; CHECK-NEXT:    li t0, 1024
-; CHECK-NEXT:    fmv.w.x ft0, a2
-; CHECK-NEXT:    bgeu t0, a4, .LBB35_2
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a3, a2, 2
+; CHECK-NEXT:    li a4, 1024
+; CHECK-NEXT:    bgeu a4, a3, .LBB35_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li a2, 0
+; CHECK-NEXT:    li a4, 0
 ; CHECK-NEXT:    j .LBB35_5
 ; CHECK-NEXT:  .LBB35_2: # %vector.ph
 ; CHECK-NEXT:    li a6, 0
 ; CHECK-NEXT:    li a7, 0
-; CHECK-NEXT:    remu a5, t0, a4
-; CHECK-NEXT:    sub a2, t0, a5
+; CHECK-NEXT:    remu a5, a4, a3
+; CHECK-NEXT:    sub a4, a4, a5
 ; CHECK-NEXT:  .LBB35_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    add t0, a0, a6
@@ -2283,29 +2267,29 @@ define void @sink_splat_fma_commute_scalable(float* noalias nocapture %a, float*
 ; CHECK-NEXT:    add t1, a1, a6
 ; CHECK-NEXT:    vl1re32.v v9, (t1)
 ; CHECK-NEXT:    vsetvli t1, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vfmacc.vf v9, ft0, v8
+; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
 ; CHECK-NEXT:    vs1r.v v9, (t0)
-; CHECK-NEXT:    add a7, a7, a4
-; CHECK-NEXT:    add a6, a6, a3
-; CHECK-NEXT:    bne a7, a2, .LBB35_3
+; CHECK-NEXT:    add a7, a7, a3
+; CHECK-NEXT:    add a6, a6, a2
+; CHECK-NEXT:    bne a7, a4, .LBB35_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
 ; CHECK-NEXT:    beqz a5, .LBB35_7
 ; CHECK-NEXT:  .LBB35_5: # %for.body.preheader
-; CHECK-NEXT:    addi a3, a2, -1024
-; CHECK-NEXT:    slli a2, a2, 2
-; CHECK-NEXT:    add a1, a1, a2
-; CHECK-NEXT:    add a0, a0, a2
+; CHECK-NEXT:    addi a2, a4, -1024
+; CHECK-NEXT:    slli a3, a4, 2
+; CHECK-NEXT:    add a1, a1, a3
+; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:  .LBB35_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    flw ft1, 0(a0)
-; CHECK-NEXT:    flw ft2, 0(a1)
-; CHECK-NEXT:    mv a2, a3
-; CHECK-NEXT:    fmadd.s ft1, ft0, ft1, ft2
-; CHECK-NEXT:    fsw ft1, 0(a0)
-; CHECK-NEXT:    addi a3, a3, 1
+; CHECK-NEXT:    flw ft0, 0(a0)
+; CHECK-NEXT:    flw ft1, 0(a1)
+; CHECK-NEXT:    mv a3, a2
+; CHECK-NEXT:    fmadd.s ft0, fa0, ft0, ft1
+; CHECK-NEXT:    fsw ft0, 0(a0)
+; CHECK-NEXT:    addi a2, a2, 1
 ; CHECK-NEXT:    addi a1, a1, 4
 ; CHECK-NEXT:    addi a0, a0, 4
-; CHECK-NEXT:    bgeu a3, a2, .LBB35_6
+; CHECK-NEXT:    bgeu a2, a3, .LBB35_6
 ; CHECK-NEXT:  .LBB35_7: # %for.cond.cleanup
 ; CHECK-NEXT:    ret
 entry:
@@ -2410,14 +2394,13 @@ declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i
 define void @sink_splat_fcmp(float* nocapture %x, float %y) {
 ; CHECK-LABEL: sink_splat_fcmp:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a1
 ; CHECK-NEXT:    li a1, 1024
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vmv.v.i v8, 0
 ; CHECK-NEXT:  .LBB37_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vle32.v v9, (a0)
-; CHECK-NEXT:    vmfeq.vf v0, v9, ft0
+; CHECK-NEXT:    vmfeq.vf v0, v9, fa0
 ; CHECK-NEXT:    vse32.v v8, (a0), v0.t
 ; CHECK-NEXT:    addi a1, a1, -4
 ; CHECK-NEXT:    addi a0, a0, 16
@@ -3287,19 +3270,18 @@ declare <4 x float> @llvm.vp.fmul.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
 define void @sink_splat_vp_fmul(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: sink_splat_vp_fmul:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    li a1, 1024
+; CHECK-NEXT:    li a2, 1024
 ; CHECK-NEXT:  .LBB54_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    addi a1, a1, -4
+; CHECK-NEXT:    addi a2, a2, -4
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    bnez a1, .LBB54_1
+; CHECK-NEXT:    bnez a2, .LBB54_1
 ; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
 ; CHECK-NEXT:    ret
 entry:
@@ -3328,19 +3310,18 @@ declare <4 x float> @llvm.vp.fdiv.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
 define void @sink_splat_vp_fdiv(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: sink_splat_vp_fdiv:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    li a1, 1024
+; CHECK-NEXT:    li a2, 1024
 ; CHECK-NEXT:  .LBB55_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    addi a1, a1, -4
+; CHECK-NEXT:    addi a2, a2, -4
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    bnez a1, .LBB55_1
+; CHECK-NEXT:    bnez a2, .LBB55_1
 ; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
 ; CHECK-NEXT:    ret
 entry:
@@ -3367,19 +3348,18 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_vp_frdiv(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: sink_splat_vp_frdiv:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    li a1, 1024
+; CHECK-NEXT:    li a2, 1024
 ; CHECK-NEXT:  .LBB56_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    addi a1, a1, -4
+; CHECK-NEXT:    addi a2, a2, -4
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    bnez a1, .LBB56_1
+; CHECK-NEXT:    bnez a2, .LBB56_1
 ; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
 ; CHECK-NEXT:    ret
 entry:
@@ -3408,19 +3388,18 @@ declare <4 x float> @llvm.vp.fadd.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
 define void @sink_splat_vp_fadd(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: sink_splat_vp_fadd:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    li a1, 1024
+; CHECK-NEXT:    li a2, 1024
 ; CHECK-NEXT:  .LBB57_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    addi a1, a1, -4
+; CHECK-NEXT:    addi a2, a2, -4
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    bnez a1, .LBB57_1
+; CHECK-NEXT:    bnez a2, .LBB57_1
 ; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
 ; CHECK-NEXT:    ret
 entry:
@@ -3449,19 +3428,18 @@ declare <4 x float> @llvm.vp.fsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
 define void @sink_splat_vp_fsub(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: sink_splat_vp_fsub:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    li a1, 1024
+; CHECK-NEXT:    li a2, 1024
 ; CHECK-NEXT:  .LBB58_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    addi a1, a1, -4
+; CHECK-NEXT:    addi a2, a2, -4
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    bnez a1, .LBB58_1
+; CHECK-NEXT:    bnez a2, .LBB58_1
 ; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
 ; CHECK-NEXT:    ret
 entry:
@@ -3490,19 +3468,18 @@ declare <4 x float> @llvm.vp.frsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32
 define void @sink_splat_vp_frsub(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: sink_splat_vp_frsub:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    li a1, 1024
+; CHECK-NEXT:    li a2, 1024
 ; CHECK-NEXT:  .LBB59_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    addi a1, a1, -4
+; CHECK-NEXT:    addi a2, a2, -4
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    bnez a1, .LBB59_1
+; CHECK-NEXT:    bnez a2, .LBB59_1
 ; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
 ; CHECK-NEXT:    ret
 entry:
@@ -3732,21 +3709,20 @@ declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4
 define void @sink_splat_vp_fma(float* noalias nocapture %a, float* nocapture readonly %b, float %x, <4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: sink_splat_vp_fma:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a2
-; CHECK-NEXT:    li a2, 1024
+; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:  .LBB65_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vsetvli zero, a3, e32, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    addi a2, a2, -4
+; CHECK-NEXT:    addi a3, a3, -4
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    bnez a2, .LBB65_1
+; CHECK-NEXT:    bnez a3, .LBB65_1
 ; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
 ; CHECK-NEXT:    ret
 entry:
@@ -3776,21 +3752,20 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_vp_fma_commute(float* noalias nocapture %a, float* nocapture readonly %b, float %x, <4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: sink_splat_vp_fma_commute:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a2
-; CHECK-NEXT:    li a2, 1024
+; CHECK-NEXT:    li a3, 1024
 ; CHECK-NEXT:  .LBB66_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vle32.v v9, (a1)
-; CHECK-NEXT:    vsetvli zero, a3, e32, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    addi a2, a2, -4
+; CHECK-NEXT:    addi a3, a3, -4
 ; CHECK-NEXT:    addi a1, a1, 16
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    bnez a2, .LBB66_1
+; CHECK-NEXT:    bnez a3, .LBB66_1
 ; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll
index 553e1b6930f65..ce679449f5a83 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -633,9 +633,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1f16.f16(
 define <vscale x 1 x i1> @intrinsic_vmfeq_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1f16.f16(
@@ -657,10 +656,9 @@ define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vf_nxv1f16_f16(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfeq.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -682,9 +680,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2f16.f16(
 define <vscale x 2 x i1> @intrinsic_vmfeq_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2f16.f16(
@@ -706,10 +703,9 @@ define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vf_nxv2f16_f16(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfeq.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -731,9 +727,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f16.f16(
 define <vscale x 4 x i1> @intrinsic_vmfeq_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f16.f16(
@@ -755,10 +750,9 @@ define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vf_nxv4f16_f16(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfeq.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -780,9 +774,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmfeq_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8f16.f16(
@@ -804,10 +797,9 @@ define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vf_nxv8f16_f16(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfeq.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -829,9 +821,8 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmfeq_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16f16.f16(
@@ -853,10 +844,9 @@ define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vf_nxv16f16_f16(<vscale x 16 x i
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -878,9 +868,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1f32.f32(
 define <vscale x 1 x i1> @intrinsic_vmfeq_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1f32.f32(
@@ -902,10 +891,9 @@ define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vf_nxv1f32_f32(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfeq.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -927,9 +915,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2f32.f32(
 define <vscale x 2 x i1> @intrinsic_vmfeq_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2f32.f32(
@@ -951,10 +938,9 @@ define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vf_nxv2f32_f32(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfeq.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -976,9 +962,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmfeq_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f32.f32(
@@ -1000,10 +985,9 @@ define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vf_nxv4f32_f32(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfeq.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1025,9 +1009,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmfeq_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8f32.f32(
@@ -1049,10 +1032,9 @@ define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vf_nxv8f32_f32(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -1074,13 +1056,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmfeq_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1f64.f64(
@@ -1101,16 +1078,11 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vf_nxv1f64_f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfeq.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1f64.f64(
@@ -1131,13 +1103,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfeq_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2f64.f64(
@@ -1158,16 +1125,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfeq.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2f64.f64(
@@ -1188,13 +1150,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfeq_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f64.f64(
@@ -1215,16 +1172,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f64.f64(

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll
index 6d4c472a5295d..e9c03b9739a6a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -633,9 +633,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1f16.f16(
 define <vscale x 1 x i1> @intrinsic_vmfeq_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1f16.f16(
@@ -657,10 +656,9 @@ define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vf_nxv1f16_f16(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfeq.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -682,9 +680,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2f16.f16(
 define <vscale x 2 x i1> @intrinsic_vmfeq_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2f16.f16(
@@ -706,10 +703,9 @@ define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vf_nxv2f16_f16(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfeq.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -731,9 +727,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f16.f16(
 define <vscale x 4 x i1> @intrinsic_vmfeq_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f16.f16(
@@ -755,10 +750,9 @@ define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vf_nxv4f16_f16(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfeq.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -780,9 +774,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmfeq_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8f16.f16(
@@ -804,10 +797,9 @@ define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vf_nxv8f16_f16(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfeq.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -829,9 +821,8 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmfeq_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16f16.f16(
@@ -853,10 +844,9 @@ define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vf_nxv16f16_f16(<vscale x 16 x i
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -878,9 +868,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1f32.f32(
 define <vscale x 1 x i1> @intrinsic_vmfeq_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1f32.f32(
@@ -902,10 +891,9 @@ define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vf_nxv1f32_f32(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfeq.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -927,9 +915,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2f32.f32(
 define <vscale x 2 x i1> @intrinsic_vmfeq_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2f32.f32(
@@ -951,10 +938,9 @@ define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vf_nxv2f32_f32(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfeq.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -976,9 +962,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmfeq_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f32.f32(
@@ -1000,10 +985,9 @@ define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vf_nxv4f32_f32(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfeq.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1025,9 +1009,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmfeq_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8f32.f32(
@@ -1049,10 +1032,9 @@ define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vf_nxv8f32_f32(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -1074,9 +1056,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmfeq_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1f64.f64(
@@ -1098,10 +1079,9 @@ define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vf_nxv1f64_f64(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfeq.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -1123,9 +1103,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfeq_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2f64.f64(
@@ -1147,10 +1126,9 @@ define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vf_nxv2f64_f64(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfeq.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1172,9 +1150,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfeq_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vmfeq.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vmfeq.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f64.f64(
@@ -1196,10 +1173,9 @@ define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vf_nxv4f64_f64(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfeq.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfeq.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll
index e3afd15c6df74..8a74567808f62 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -633,9 +633,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1f16.f16(
 define <vscale x 1 x i1> @intrinsic_vmfge_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1f16.f16(
@@ -657,10 +656,9 @@ define <vscale x 1 x i1> @intrinsic_vmfge_mask_vf_nxv1f16_f16(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfge.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -682,9 +680,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2f16.f16(
 define <vscale x 2 x i1> @intrinsic_vmfge_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2f16.f16(
@@ -706,10 +703,9 @@ define <vscale x 2 x i1> @intrinsic_vmfge_mask_vf_nxv2f16_f16(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfge.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -731,9 +727,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f16.f16(
 define <vscale x 4 x i1> @intrinsic_vmfge_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f16.f16(
@@ -755,10 +750,9 @@ define <vscale x 4 x i1> @intrinsic_vmfge_mask_vf_nxv4f16_f16(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfge.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -780,9 +774,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmfge_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8f16.f16(
@@ -804,10 +797,9 @@ define <vscale x 8 x i1> @intrinsic_vmfge_mask_vf_nxv8f16_f16(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -829,9 +821,8 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmfge_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16f16.f16(
@@ -853,10 +844,9 @@ define <vscale x 16 x i1> @intrinsic_vmfge_mask_vf_nxv16f16_f16(<vscale x 16 x i
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -878,9 +868,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1f32.f32(
 define <vscale x 1 x i1> @intrinsic_vmfge_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1f32.f32(
@@ -902,10 +891,9 @@ define <vscale x 1 x i1> @intrinsic_vmfge_mask_vf_nxv1f32_f32(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfge.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -927,9 +915,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2f32.f32(
 define <vscale x 2 x i1> @intrinsic_vmfge_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2f32.f32(
@@ -951,10 +938,9 @@ define <vscale x 2 x i1> @intrinsic_vmfge_mask_vf_nxv2f32_f32(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfge.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -976,9 +962,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmfge_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f32.f32(
@@ -1000,10 +985,9 @@ define <vscale x 4 x i1> @intrinsic_vmfge_mask_vf_nxv4f32_f32(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1025,9 +1009,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmfge_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8f32.f32(
@@ -1049,10 +1032,9 @@ define <vscale x 8 x i1> @intrinsic_vmfge_mask_vf_nxv8f32_f32(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -1074,13 +1056,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmfge_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1f64.f64(
@@ -1101,16 +1078,11 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmfge_mask_vf_nxv1f64_f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfge.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1f64.f64(
@@ -1131,13 +1103,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfge_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2f64.f64(
@@ -1158,16 +1125,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfge_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2f64.f64(
@@ -1188,13 +1150,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfge_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f64.f64(
@@ -1215,16 +1172,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfge_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f64.f64(

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll
index ea8e2fcf19af9..f1b29f7e30b21 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -633,9 +633,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1f16.f16(
 define <vscale x 1 x i1> @intrinsic_vmfge_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1f16.f16(
@@ -657,10 +656,9 @@ define <vscale x 1 x i1> @intrinsic_vmfge_mask_vf_nxv1f16_f16(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfge.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -682,9 +680,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2f16.f16(
 define <vscale x 2 x i1> @intrinsic_vmfge_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2f16.f16(
@@ -706,10 +703,9 @@ define <vscale x 2 x i1> @intrinsic_vmfge_mask_vf_nxv2f16_f16(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfge.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -731,9 +727,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f16.f16(
 define <vscale x 4 x i1> @intrinsic_vmfge_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f16.f16(
@@ -755,10 +750,9 @@ define <vscale x 4 x i1> @intrinsic_vmfge_mask_vf_nxv4f16_f16(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfge.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -780,9 +774,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmfge_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8f16.f16(
@@ -804,10 +797,9 @@ define <vscale x 8 x i1> @intrinsic_vmfge_mask_vf_nxv8f16_f16(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -829,9 +821,8 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmfge_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16f16.f16(
@@ -853,10 +844,9 @@ define <vscale x 16 x i1> @intrinsic_vmfge_mask_vf_nxv16f16_f16(<vscale x 16 x i
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -878,9 +868,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1f32.f32(
 define <vscale x 1 x i1> @intrinsic_vmfge_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1f32.f32(
@@ -902,10 +891,9 @@ define <vscale x 1 x i1> @intrinsic_vmfge_mask_vf_nxv1f32_f32(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfge.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -927,9 +915,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2f32.f32(
 define <vscale x 2 x i1> @intrinsic_vmfge_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2f32.f32(
@@ -951,10 +938,9 @@ define <vscale x 2 x i1> @intrinsic_vmfge_mask_vf_nxv2f32_f32(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfge.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -976,9 +962,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmfge_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f32.f32(
@@ -1000,10 +985,9 @@ define <vscale x 4 x i1> @intrinsic_vmfge_mask_vf_nxv4f32_f32(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1025,9 +1009,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmfge_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8f32.f32(
@@ -1049,10 +1032,9 @@ define <vscale x 8 x i1> @intrinsic_vmfge_mask_vf_nxv8f32_f32(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -1074,9 +1056,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmfge_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1f64.f64(
@@ -1098,10 +1079,9 @@ define <vscale x 1 x i1> @intrinsic_vmfge_mask_vf_nxv1f64_f64(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfge.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -1123,9 +1103,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfge_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2f64.f64(
@@ -1147,10 +1126,9 @@ define <vscale x 2 x i1> @intrinsic_vmfge_mask_vf_nxv2f64_f64(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfge.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1172,9 +1150,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfge_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfge_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vmfge.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vmfge.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f64.f64(
@@ -1196,10 +1173,9 @@ define <vscale x 4 x i1> @intrinsic_vmfge_mask_vf_nxv4f64_f64(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfge.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfge.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll
index a8246870ac786..2d2567a6b644c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -633,9 +633,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1f16.f16(
 define <vscale x 1 x i1> @intrinsic_vmfgt_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1f16.f16(
@@ -657,10 +656,9 @@ define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vf_nxv1f16_f16(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfgt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -682,9 +680,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2f16.f16(
 define <vscale x 2 x i1> @intrinsic_vmfgt_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2f16.f16(
@@ -706,10 +703,9 @@ define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vf_nxv2f16_f16(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfgt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -731,9 +727,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f16.f16(
 define <vscale x 4 x i1> @intrinsic_vmfgt_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f16.f16(
@@ -755,10 +750,9 @@ define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vf_nxv4f16_f16(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfgt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -780,9 +774,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmfgt_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8f16.f16(
@@ -804,10 +797,9 @@ define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vf_nxv8f16_f16(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -829,9 +821,8 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmfgt_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16f16.f16(
@@ -853,10 +844,9 @@ define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vf_nxv16f16_f16(<vscale x 16 x i
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -878,9 +868,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1f32.f32(
 define <vscale x 1 x i1> @intrinsic_vmfgt_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1f32.f32(
@@ -902,10 +891,9 @@ define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vf_nxv1f32_f32(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfgt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -927,9 +915,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2f32.f32(
 define <vscale x 2 x i1> @intrinsic_vmfgt_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2f32.f32(
@@ -951,10 +938,9 @@ define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vf_nxv2f32_f32(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfgt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -976,9 +962,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmfgt_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f32.f32(
@@ -1000,10 +985,9 @@ define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vf_nxv4f32_f32(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1025,9 +1009,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmfgt_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8f32.f32(
@@ -1049,10 +1032,9 @@ define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vf_nxv8f32_f32(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -1074,13 +1056,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmfgt_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1f64.f64(
@@ -1101,16 +1078,11 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vf_nxv1f64_f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfgt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1f64.f64(
@@ -1131,13 +1103,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfgt_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2f64.f64(
@@ -1158,16 +1125,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f64.f64(
@@ -1188,13 +1150,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfgt_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f64.f64(
@@ -1215,16 +1172,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f64.f64(

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll
index 457ea76560ddd..89866057c90d3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -633,9 +633,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1f16.f16(
 define <vscale x 1 x i1> @intrinsic_vmfgt_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1f16.f16(
@@ -657,10 +656,9 @@ define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vf_nxv1f16_f16(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfgt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -682,9 +680,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2f16.f16(
 define <vscale x 2 x i1> @intrinsic_vmfgt_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2f16.f16(
@@ -706,10 +703,9 @@ define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vf_nxv2f16_f16(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfgt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -731,9 +727,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f16.f16(
 define <vscale x 4 x i1> @intrinsic_vmfgt_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f16.f16(
@@ -755,10 +750,9 @@ define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vf_nxv4f16_f16(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfgt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -780,9 +774,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmfgt_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8f16.f16(
@@ -804,10 +797,9 @@ define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vf_nxv8f16_f16(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -829,9 +821,8 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmfgt_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16f16.f16(
@@ -853,10 +844,9 @@ define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vf_nxv16f16_f16(<vscale x 16 x i
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -878,9 +868,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1f32.f32(
 define <vscale x 1 x i1> @intrinsic_vmfgt_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1f32.f32(
@@ -902,10 +891,9 @@ define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vf_nxv1f32_f32(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfgt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -927,9 +915,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2f32.f32(
 define <vscale x 2 x i1> @intrinsic_vmfgt_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2f32.f32(
@@ -951,10 +938,9 @@ define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vf_nxv2f32_f32(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfgt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -976,9 +962,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmfgt_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f32.f32(
@@ -1000,10 +985,9 @@ define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vf_nxv4f32_f32(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1025,9 +1009,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmfgt_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8f32.f32(
@@ -1049,10 +1032,9 @@ define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vf_nxv8f32_f32(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -1074,9 +1056,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmfgt_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1f64.f64(
@@ -1098,10 +1079,9 @@ define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vf_nxv1f64_f64(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfgt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -1123,9 +1103,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfgt_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2f64.f64(
@@ -1147,10 +1126,9 @@ define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vf_nxv2f64_f64(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfgt.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1172,9 +1150,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfgt_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vmfgt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vmfgt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f64.f64(
@@ -1196,10 +1173,9 @@ define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vf_nxv4f64_f64(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfgt.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfgt.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll
index 803eb1ceaab13..ba8e5c560958d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -633,9 +633,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1f16.f16(
 define <vscale x 1 x i1> @intrinsic_vmfle_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1f16.f16(
@@ -657,10 +656,9 @@ define <vscale x 1 x i1> @intrinsic_vmfle_mask_vf_nxv1f16_f16(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfle.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -682,9 +680,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2f16.f16(
 define <vscale x 2 x i1> @intrinsic_vmfle_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2f16.f16(
@@ -706,10 +703,9 @@ define <vscale x 2 x i1> @intrinsic_vmfle_mask_vf_nxv2f16_f16(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfle.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -731,9 +727,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f16.f16(
 define <vscale x 4 x i1> @intrinsic_vmfle_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f16.f16(
@@ -755,10 +750,9 @@ define <vscale x 4 x i1> @intrinsic_vmfle_mask_vf_nxv4f16_f16(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfle.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -780,9 +774,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmfle_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8f16.f16(
@@ -804,10 +797,9 @@ define <vscale x 8 x i1> @intrinsic_vmfle_mask_vf_nxv8f16_f16(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -829,9 +821,8 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmfle_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16f16.f16(
@@ -853,10 +844,9 @@ define <vscale x 16 x i1> @intrinsic_vmfle_mask_vf_nxv16f16_f16(<vscale x 16 x i
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -878,9 +868,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1f32.f32(
 define <vscale x 1 x i1> @intrinsic_vmfle_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1f32.f32(
@@ -902,10 +891,9 @@ define <vscale x 1 x i1> @intrinsic_vmfle_mask_vf_nxv1f32_f32(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfle.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -927,9 +915,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2f32.f32(
 define <vscale x 2 x i1> @intrinsic_vmfle_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2f32.f32(
@@ -951,10 +938,9 @@ define <vscale x 2 x i1> @intrinsic_vmfle_mask_vf_nxv2f32_f32(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfle.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -976,9 +962,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmfle_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f32.f32(
@@ -1000,10 +985,9 @@ define <vscale x 4 x i1> @intrinsic_vmfle_mask_vf_nxv4f32_f32(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1025,9 +1009,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmfle_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8f32.f32(
@@ -1049,10 +1032,9 @@ define <vscale x 8 x i1> @intrinsic_vmfle_mask_vf_nxv8f32_f32(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -1074,13 +1056,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmfle_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1f64.f64(
@@ -1101,16 +1078,11 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmfle_mask_vf_nxv1f64_f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfle.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1f64.f64(
@@ -1131,13 +1103,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfle_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2f64.f64(
@@ -1158,16 +1125,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfle_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2f64.f64(
@@ -1188,13 +1150,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfle_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f64.f64(
@@ -1215,16 +1172,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfle_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f64.f64(

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll
index d28d8d73b1960..0fa7e7ae6c265 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -633,9 +633,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1f16.f16(
 define <vscale x 1 x i1> @intrinsic_vmfle_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1f16.f16(
@@ -657,10 +656,9 @@ define <vscale x 1 x i1> @intrinsic_vmfle_mask_vf_nxv1f16_f16(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfle.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -682,9 +680,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2f16.f16(
 define <vscale x 2 x i1> @intrinsic_vmfle_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2f16.f16(
@@ -706,10 +703,9 @@ define <vscale x 2 x i1> @intrinsic_vmfle_mask_vf_nxv2f16_f16(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfle.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -731,9 +727,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f16.f16(
 define <vscale x 4 x i1> @intrinsic_vmfle_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f16.f16(
@@ -755,10 +750,9 @@ define <vscale x 4 x i1> @intrinsic_vmfle_mask_vf_nxv4f16_f16(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfle.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -780,9 +774,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmfle_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8f16.f16(
@@ -804,10 +797,9 @@ define <vscale x 8 x i1> @intrinsic_vmfle_mask_vf_nxv8f16_f16(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -829,9 +821,8 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmfle_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16f16.f16(
@@ -853,10 +844,9 @@ define <vscale x 16 x i1> @intrinsic_vmfle_mask_vf_nxv16f16_f16(<vscale x 16 x i
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -878,9 +868,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1f32.f32(
 define <vscale x 1 x i1> @intrinsic_vmfle_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1f32.f32(
@@ -902,10 +891,9 @@ define <vscale x 1 x i1> @intrinsic_vmfle_mask_vf_nxv1f32_f32(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfle.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -927,9 +915,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2f32.f32(
 define <vscale x 2 x i1> @intrinsic_vmfle_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2f32.f32(
@@ -951,10 +938,9 @@ define <vscale x 2 x i1> @intrinsic_vmfle_mask_vf_nxv2f32_f32(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfle.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -976,9 +962,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmfle_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f32.f32(
@@ -1000,10 +985,9 @@ define <vscale x 4 x i1> @intrinsic_vmfle_mask_vf_nxv4f32_f32(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1025,9 +1009,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmfle_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8f32.f32(
@@ -1049,10 +1032,9 @@ define <vscale x 8 x i1> @intrinsic_vmfle_mask_vf_nxv8f32_f32(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -1074,9 +1056,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmfle_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1f64.f64(
@@ -1098,10 +1079,9 @@ define <vscale x 1 x i1> @intrinsic_vmfle_mask_vf_nxv1f64_f64(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfle.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -1123,9 +1103,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfle_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2f64.f64(
@@ -1147,10 +1126,9 @@ define <vscale x 2 x i1> @intrinsic_vmfle_mask_vf_nxv2f64_f64(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfle.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1172,9 +1150,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfle_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfle_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vmfle.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vmfle.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f64.f64(
@@ -1196,10 +1173,9 @@ define <vscale x 4 x i1> @intrinsic_vmfle_mask_vf_nxv4f64_f64(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfle.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfle.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll
index 9485c178df504..80bb12e6db9f6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -633,9 +633,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1f16.f16(
 define <vscale x 1 x i1> @intrinsic_vmflt_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1f16.f16(
@@ -657,10 +656,9 @@ define <vscale x 1 x i1> @intrinsic_vmflt_mask_vf_nxv1f16_f16(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmflt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -682,9 +680,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2f16.f16(
 define <vscale x 2 x i1> @intrinsic_vmflt_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2f16.f16(
@@ -706,10 +703,9 @@ define <vscale x 2 x i1> @intrinsic_vmflt_mask_vf_nxv2f16_f16(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmflt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -731,9 +727,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f16.f16(
 define <vscale x 4 x i1> @intrinsic_vmflt_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f16.f16(
@@ -755,10 +750,9 @@ define <vscale x 4 x i1> @intrinsic_vmflt_mask_vf_nxv4f16_f16(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmflt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -780,9 +774,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmflt_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8f16.f16(
@@ -804,10 +797,9 @@ define <vscale x 8 x i1> @intrinsic_vmflt_mask_vf_nxv8f16_f16(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -829,9 +821,8 @@ declare <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmflt_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16f16.f16(
@@ -853,10 +844,9 @@ define <vscale x 16 x i1> @intrinsic_vmflt_mask_vf_nxv16f16_f16(<vscale x 16 x i
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -878,9 +868,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1f32.f32(
 define <vscale x 1 x i1> @intrinsic_vmflt_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1f32.f32(
@@ -902,10 +891,9 @@ define <vscale x 1 x i1> @intrinsic_vmflt_mask_vf_nxv1f32_f32(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmflt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -927,9 +915,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2f32.f32(
 define <vscale x 2 x i1> @intrinsic_vmflt_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2f32.f32(
@@ -951,10 +938,9 @@ define <vscale x 2 x i1> @intrinsic_vmflt_mask_vf_nxv2f32_f32(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmflt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -976,9 +962,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmflt_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f32.f32(
@@ -1000,10 +985,9 @@ define <vscale x 4 x i1> @intrinsic_vmflt_mask_vf_nxv4f32_f32(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1025,9 +1009,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmflt_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8f32.f32(
@@ -1049,10 +1032,9 @@ define <vscale x 8 x i1> @intrinsic_vmflt_mask_vf_nxv8f32_f32(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -1074,13 +1056,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmflt_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1f64.f64(
@@ -1101,16 +1078,11 @@ declare <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmflt_mask_vf_nxv1f64_f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmflt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1f64.f64(
@@ -1131,13 +1103,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmflt_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2f64.f64(
@@ -1158,16 +1125,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmflt_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2f64.f64(
@@ -1188,13 +1150,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmflt_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f64.f64(
@@ -1215,16 +1172,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmflt_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f64.f64(

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll
index b55f491944323..55bb51365de74 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -633,9 +633,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1f16.f16(
 define <vscale x 1 x i1> @intrinsic_vmflt_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1f16.f16(
@@ -657,10 +656,9 @@ define <vscale x 1 x i1> @intrinsic_vmflt_mask_vf_nxv1f16_f16(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmflt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -682,9 +680,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2f16.f16(
 define <vscale x 2 x i1> @intrinsic_vmflt_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2f16.f16(
@@ -706,10 +703,9 @@ define <vscale x 2 x i1> @intrinsic_vmflt_mask_vf_nxv2f16_f16(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmflt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -731,9 +727,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f16.f16(
 define <vscale x 4 x i1> @intrinsic_vmflt_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f16.f16(
@@ -755,10 +750,9 @@ define <vscale x 4 x i1> @intrinsic_vmflt_mask_vf_nxv4f16_f16(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmflt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -780,9 +774,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmflt_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8f16.f16(
@@ -804,10 +797,9 @@ define <vscale x 8 x i1> @intrinsic_vmflt_mask_vf_nxv8f16_f16(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -829,9 +821,8 @@ declare <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmflt_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16f16.f16(
@@ -853,10 +844,9 @@ define <vscale x 16 x i1> @intrinsic_vmflt_mask_vf_nxv16f16_f16(<vscale x 16 x i
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -878,9 +868,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1f32.f32(
 define <vscale x 1 x i1> @intrinsic_vmflt_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1f32.f32(
@@ -902,10 +891,9 @@ define <vscale x 1 x i1> @intrinsic_vmflt_mask_vf_nxv1f32_f32(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmflt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -927,9 +915,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2f32.f32(
 define <vscale x 2 x i1> @intrinsic_vmflt_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2f32.f32(
@@ -951,10 +938,9 @@ define <vscale x 2 x i1> @intrinsic_vmflt_mask_vf_nxv2f32_f32(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmflt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -976,9 +962,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmflt_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f32.f32(
@@ -1000,10 +985,9 @@ define <vscale x 4 x i1> @intrinsic_vmflt_mask_vf_nxv4f32_f32(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1025,9 +1009,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmflt_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8f32.f32(
@@ -1049,10 +1032,9 @@ define <vscale x 8 x i1> @intrinsic_vmflt_mask_vf_nxv8f32_f32(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -1074,9 +1056,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmflt_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1f64.f64(
@@ -1098,10 +1079,9 @@ define <vscale x 1 x i1> @intrinsic_vmflt_mask_vf_nxv1f64_f64(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmflt.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -1123,9 +1103,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmflt_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2f64.f64(
@@ -1147,10 +1126,9 @@ define <vscale x 2 x i1> @intrinsic_vmflt_mask_vf_nxv2f64_f64(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmflt.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1172,9 +1150,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmflt_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmflt_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vmflt.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vmflt.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f64.f64(
@@ -1196,10 +1173,9 @@ define <vscale x 4 x i1> @intrinsic_vmflt_mask_vf_nxv4f64_f64(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmflt.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmflt.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll
index 30c8be8e6fe0c..ac45efea41597 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -633,9 +633,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1f16.f16(
 define <vscale x 1 x i1> @intrinsic_vmfne_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1f16.f16(
@@ -657,10 +656,9 @@ define <vscale x 1 x i1> @intrinsic_vmfne_mask_vf_nxv1f16_f16(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfne.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -682,9 +680,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2f16.f16(
 define <vscale x 2 x i1> @intrinsic_vmfne_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2f16.f16(
@@ -706,10 +703,9 @@ define <vscale x 2 x i1> @intrinsic_vmfne_mask_vf_nxv2f16_f16(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfne.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -731,9 +727,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f16.f16(
 define <vscale x 4 x i1> @intrinsic_vmfne_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f16.f16(
@@ -755,10 +750,9 @@ define <vscale x 4 x i1> @intrinsic_vmfne_mask_vf_nxv4f16_f16(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfne.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -780,9 +774,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmfne_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8f16.f16(
@@ -804,10 +797,9 @@ define <vscale x 8 x i1> @intrinsic_vmfne_mask_vf_nxv8f16_f16(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfne.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -829,9 +821,8 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmfne_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16f16.f16(
@@ -853,10 +844,9 @@ define <vscale x 16 x i1> @intrinsic_vmfne_mask_vf_nxv16f16_f16(<vscale x 16 x i
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfne.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -878,9 +868,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1f32.f32(
 define <vscale x 1 x i1> @intrinsic_vmfne_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1f32.f32(
@@ -902,10 +891,9 @@ define <vscale x 1 x i1> @intrinsic_vmfne_mask_vf_nxv1f32_f32(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfne.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -927,9 +915,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2f32.f32(
 define <vscale x 2 x i1> @intrinsic_vmfne_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2f32.f32(
@@ -951,10 +938,9 @@ define <vscale x 2 x i1> @intrinsic_vmfne_mask_vf_nxv2f32_f32(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfne.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -976,9 +962,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmfne_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f32.f32(
@@ -1000,10 +985,9 @@ define <vscale x 4 x i1> @intrinsic_vmfne_mask_vf_nxv4f32_f32(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfne.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1025,9 +1009,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmfne_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8f32.f32(
@@ -1049,10 +1032,9 @@ define <vscale x 8 x i1> @intrinsic_vmfne_mask_vf_nxv8f32_f32(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfne.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -1074,13 +1056,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmfne_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1f64.f64(
@@ -1101,16 +1078,11 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmfne_mask_vf_nxv1f64_f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfne.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1f64.f64(
@@ -1131,13 +1103,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfne_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2f64.f64(
@@ -1158,16 +1125,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfne_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfne.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2f64.f64(
@@ -1188,13 +1150,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfne_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f64.f64(
@@ -1215,16 +1172,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfne_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfne.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f64.f64(

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll
index e0d9c231516b0..26e2a5fcdc1dd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -633,9 +633,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1f16.f16(
 define <vscale x 1 x i1> @intrinsic_vmfne_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1f16.f16(
@@ -657,10 +656,9 @@ define <vscale x 1 x i1> @intrinsic_vmfne_mask_vf_nxv1f16_f16(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfne.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -682,9 +680,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2f16.f16(
 define <vscale x 2 x i1> @intrinsic_vmfne_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2f16.f16(
@@ -706,10 +703,9 @@ define <vscale x 2 x i1> @intrinsic_vmfne_mask_vf_nxv2f16_f16(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfne.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -731,9 +727,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f16.f16(
 define <vscale x 4 x i1> @intrinsic_vmfne_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f16.f16(
@@ -755,10 +750,9 @@ define <vscale x 4 x i1> @intrinsic_vmfne_mask_vf_nxv4f16_f16(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfne.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -780,9 +774,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8f16.f16(
 define <vscale x 8 x i1> @intrinsic_vmfne_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8f16.f16(
@@ -804,10 +797,9 @@ define <vscale x 8 x i1> @intrinsic_vmfne_mask_vf_nxv8f16_f16(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfne.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -829,9 +821,8 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16f16.f16(
 define <vscale x 16 x i1> @intrinsic_vmfne_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16f16.f16(
@@ -853,10 +844,9 @@ define <vscale x 16 x i1> @intrinsic_vmfne_mask_vf_nxv16f16_f16(<vscale x 16 x i
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfne.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -878,9 +868,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1f32.f32(
 define <vscale x 1 x i1> @intrinsic_vmfne_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1f32.f32(
@@ -902,10 +891,9 @@ define <vscale x 1 x i1> @intrinsic_vmfne_mask_vf_nxv1f32_f32(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfne.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -927,9 +915,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2f32.f32(
 define <vscale x 2 x i1> @intrinsic_vmfne_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2f32.f32(
@@ -951,10 +938,9 @@ define <vscale x 2 x i1> @intrinsic_vmfne_mask_vf_nxv2f32_f32(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfne.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -976,9 +962,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f32.f32(
 define <vscale x 4 x i1> @intrinsic_vmfne_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f32.f32(
@@ -1000,10 +985,9 @@ define <vscale x 4 x i1> @intrinsic_vmfne_mask_vf_nxv4f32_f32(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfne.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1025,9 +1009,8 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8f32.f32(
 define <vscale x 8 x i1> @intrinsic_vmfne_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8f32.f32(
@@ -1049,10 +1032,9 @@ define <vscale x 8 x i1> @intrinsic_vmfne_mask_vf_nxv8f32_f32(<vscale x 8 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfne.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:
@@ -1074,9 +1056,8 @@ declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1f64.f64(
 define <vscale x 1 x i1> @intrinsic_vmfne_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1f64.f64(
@@ -1098,10 +1079,9 @@ define <vscale x 1 x i1> @intrinsic_vmfne_mask_vf_nxv1f64_f64(<vscale x 1 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmfne.vf v10, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v10, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv.v.v v0, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -1123,9 +1103,8 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2f64.f64(
 define <vscale x 2 x i1> @intrinsic_vmfne_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2f64.f64(
@@ -1147,10 +1126,9 @@ define <vscale x 2 x i1> @intrinsic_vmfne_mask_vf_nxv2f64_f64(<vscale x 2 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v11, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    vmfne.vf v11, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v11, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v11
 ; CHECK-NEXT:    ret
 entry:
@@ -1172,9 +1150,8 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f64.f64(
 define <vscale x 4 x i1> @intrinsic_vmfne_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmfne_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vmfne.vf v0, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vmfne.vf v0, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f64.f64(
@@ -1196,10 +1173,9 @@ define <vscale x 4 x i1> @intrinsic_vmfne_mask_vf_nxv4f64_f64(<vscale x 4 x i1>
 ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v13, v0
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v12
-; CHECK-NEXT:    vmfne.vf v13, v8, ft0, v0.t
+; CHECK-NEXT:    vmfne.vf v13, v8, fa0, v0.t
 ; CHECK-NEXT:    vmv1r.v v0, v13
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index fdd30fbdab3bf..c8390e645c07c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+v \
-; RUN:    -verify-machineinstrs -O2 < %s | FileCheck %s
+; RUN:    -target-abi=lp64d -verify-machineinstrs -O2 < %s | FileCheck %s
 
 ; The following tests check whether inserting VSETVLI avoids inserting
 ; unneeded vsetvlis across basic blocks.
@@ -445,24 +445,22 @@ if.end:                                           ; preds = %if.else, %if.then
 define void @saxpy_vec(i64 %n, float %a, float* nocapture readonly %x, float* nocapture %y) {
 ; CHECK-LABEL: saxpy_vec:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a4, a0, e32, m8, ta, mu
-; CHECK-NEXT:    beqz a4, .LBB8_3
-; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:  .LBB8_2: # %for.body
+; CHECK-NEXT:    vsetvli a3, a0, e32, m8, ta, mu
+; CHECK-NEXT:    beqz a3, .LBB8_2
+; CHECK-NEXT:  .LBB8_1: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vle32.v v8, (a2)
-; CHECK-NEXT:    vle32.v v16, (a3)
-; CHECK-NEXT:    slli a1, a4, 2
-; CHECK-NEXT:    add a2, a2, a1
-; CHECK-NEXT:    vsetvli zero, a4, e32, m8, tu, mu
-; CHECK-NEXT:    vfmacc.vf v16, ft0, v8
-; CHECK-NEXT:    vse32.v v16, (a3)
-; CHECK-NEXT:    sub a0, a0, a4
-; CHECK-NEXT:    vsetvli a4, a0, e32, m8, ta, mu
-; CHECK-NEXT:    add a3, a3, a1
-; CHECK-NEXT:    bnez a4, .LBB8_2
-; CHECK-NEXT:  .LBB8_3: # %for.end
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vle32.v v16, (a2)
+; CHECK-NEXT:    slli a4, a3, 2
+; CHECK-NEXT:    add a1, a1, a4
+; CHECK-NEXT:    vsetvli zero, a3, e32, m8, tu, mu
+; CHECK-NEXT:    vfmacc.vf v16, fa0, v8
+; CHECK-NEXT:    vse32.v v16, (a2)
+; CHECK-NEXT:    sub a0, a0, a3
+; CHECK-NEXT:    vsetvli a3, a0, e32, m8, ta, mu
+; CHECK-NEXT:    add a2, a2, a4
+; CHECK-NEXT:    bnez a3, .LBB8_1
+; CHECK-NEXT:  .LBB8_2: # %for.end
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 3)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
index f08cf4f840947..41b179e6612dc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+v \
-; RUN:    -verify-machineinstrs -O2 < %s | FileCheck %s
+; RUN:   -target-abi=lp64d -verify-machineinstrs -O2 < %s | FileCheck %s
 
 declare i64 @llvm.riscv.vsetvli(i64, i64, i64)
 declare i64 @llvm.riscv.vsetvlimax(i64, i64)
@@ -196,9 +196,8 @@ entry:
 define <vscale x 1 x double> @test10(<vscale x 1 x double> %a, double %b) nounwind {
 ; CHECK-LABEL: test10:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m1, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, ft0
+; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %x = tail call i64 @llvm.riscv.vsetvlimax(i64 3, i64 0)
@@ -210,9 +209,8 @@ entry:
 define <vscale x 1 x double> @test11(<vscale x 1 x double> %a, double %b) nounwind {
 ; CHECK-LABEL: test11:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
 ; CHECK-NEXT:    vsetivli a0, 6, e64, m1, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, ft0
+; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %x = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 3, i64 0)
@@ -224,10 +222,9 @@ entry:
 define <vscale x 1 x double> @test12(<vscale x 1 x double> %a, double %b, <vscale x 1 x i1> %mask) nounwind {
 ; CHECK-LABEL: test12:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
 ; CHECK-NEXT:    vsetivli zero, 9, e64, m1, tu, mu
 ; CHECK-NEXT:    vfadd.vv v8, v8, v8, v0.t
-; CHECK-NEXT:    vfmv.s.f v8, ft0
+; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %x = call <vscale x 1 x double> @llvm.riscv.vfadd.mask.nxv1f64.f64(


        


More information about the llvm-commits mailing list