[llvm] [RISCV] Pass f32/f64 directly without a bitcast for Zfinx/Zdinx. (PR #107464)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 6 14:42:20 PDT 2024


https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/107464

>From 322983089d7a13e6933b3fa91aa3778952a2226e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 5 Sep 2024 13:48:43 -0700
Subject: [PATCH 1/3] [RISCV] Pass f32/f64 directly without a bitcast for
 Zfinx/Zdinx.

With Zfinx/Zdinx, f32/f64 are legal types for a GPR, we don't need
a bitcast.

This avoids turning fneg/fabs into bitwise operations purely because
of these bitcasts. If the bitwise operations are faster for some
reason on a Zfinx CPU, then that seems like it should be done for
all fneg/fabs, not just ones near function arguments/returns.

I don't have much interest in Zfinx, this just makes the code more similar
to what I proposed for Zhinx in #107446.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  10 +
 llvm/test/CodeGen/RISCV/double-arith.ll       |  12 +-
 .../RISCV/double-bitmanip-dagcombines.ll      |  10 +-
 llvm/test/CodeGen/RISCV/double-imm.ll         |   8 +-
 llvm/test/CodeGen/RISCV/double-intrinsics.ll  |   3 +-
 llvm/test/CodeGen/RISCV/float-arith.ll        |  39 +-
 .../RISCV/float-bitmanip-dagcombines.ll       |   9 +-
 llvm/test/CodeGen/RISCV/float-intrinsics.ll   |   3 +-
 .../CodeGen/RISCV/float-round-conv-sat.ll     | 204 ++++-----
 llvm/test/CodeGen/RISCV/half-convert.ll       | 224 ++++-----
 .../test/CodeGen/RISCV/half-round-conv-sat.ll | 432 +++++++++---------
 11 files changed, 449 insertions(+), 505 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6b4219b4623847..ad8a5b163ec315 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -19227,6 +19227,16 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
 
   ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI);
 
+  const RISCVSubtarget &STI =
+      State.getMachineFunction().getSubtarget<RISCVSubtarget>();
+  if ((ValVT == MVT::f32 && XLen == 32 && STI.hasStdExtZfinx()) ||
+      (ValVT == MVT::f64 && XLen == 64 && STI.hasStdExtZdinx())) {
+    if (MCRegister Reg = State.AllocateReg(ArgGPRs)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+  }
+
   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::bf16 ||
                            (ValVT == MVT::f32 && XLen == 64))) {
     MCRegister Reg = State.AllocateReg(ArgGPRs);
diff --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll
index ee54501fe59a8c..fa74dcb4810060 100644
--- a/llvm/test/CodeGen/RISCV/double-arith.ll
+++ b/llvm/test/CodeGen/RISCV/double-arith.ll
@@ -320,8 +320,7 @@ define double @fsgnjn_d(double %a, double %b) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: fsgnjn_d:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    not a1, a1
-; RV64IZFINXZDINX-NEXT:    fsgnj.d a0, a0, a1
+; RV64IZFINXZDINX-NEXT:    fsgnjn.d a0, a0, a1
 ; RV64IZFINXZDINX-NEXT:    ret
 ;
 ; RV32I-LABEL: fsgnjn_d:
@@ -852,9 +851,7 @@ define double @fnmadd_d_3(double %a, double %b, double %c) nounwind {
 ; RV64IZFINXZDINX-LABEL: fnmadd_d_3:
 ; RV64IZFINXZDINX:       # %bb.0:
 ; RV64IZFINXZDINX-NEXT:    fmadd.d a0, a0, a1, a2
-; RV64IZFINXZDINX-NEXT:    li a1, -1
-; RV64IZFINXZDINX-NEXT:    slli a1, a1, 63
-; RV64IZFINXZDINX-NEXT:    xor a0, a0, a1
+; RV64IZFINXZDINX-NEXT:    fneg.d a0, a0
 ; RV64IZFINXZDINX-NEXT:    ret
 ;
 ; RV32I-LABEL: fnmadd_d_3:
@@ -900,10 +897,7 @@ define double @fnmadd_nsz(double %a, double %b, double %c) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: fnmadd_nsz:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    fmadd.d a0, a0, a1, a2
-; RV64IZFINXZDINX-NEXT:    li a1, -1
-; RV64IZFINXZDINX-NEXT:    slli a1, a1, 63
-; RV64IZFINXZDINX-NEXT:    xor a0, a0, a1
+; RV64IZFINXZDINX-NEXT:    fnmadd.d a0, a0, a1, a2
 ; RV64IZFINXZDINX-NEXT:    ret
 ;
 ; RV32I-LABEL: fnmadd_nsz:
diff --git a/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll
index 70d3291d3a8402..f7d57178b03d41 100644
--- a/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll
+++ b/llvm/test/CodeGen/RISCV/double-bitmanip-dagcombines.ll
@@ -56,9 +56,7 @@ define double @fneg(double %a) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: fneg:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    li a1, -1
-; RV64IZFINXZDINX-NEXT:    slli a1, a1, 63
-; RV64IZFINXZDINX-NEXT:    xor a0, a0, a1
+; RV64IZFINXZDINX-NEXT:    fneg.d a0, a0
 ; RV64IZFINXZDINX-NEXT:    ret
   %1 = fneg double %a
   ret double %1
@@ -99,8 +97,7 @@ define double @fabs(double %a) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: fabs:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    slli a0, a0, 1
-; RV64IZFINXZDINX-NEXT:    srli a0, a0, 1
+; RV64IZFINXZDINX-NEXT:    fabs.d a0, a0
 ; RV64IZFINXZDINX-NEXT:    ret
   %1 = call double @llvm.fabs.f64(double %a)
   ret double %1
@@ -165,8 +162,7 @@ define double @fcopysign_fneg(double %a, double %b) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: fcopysign_fneg:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    not a1, a1
-; RV64IZFINXZDINX-NEXT:    fsgnj.d a0, a0, a1
+; RV64IZFINXZDINX-NEXT:    fsgnjn.d a0, a0, a1
 ; RV64IZFINXZDINX-NEXT:    ret
   %1 = fneg double %b
   %2 = call double @llvm.copysign.f64(double %a, double %1)
diff --git a/llvm/test/CodeGen/RISCV/double-imm.ll b/llvm/test/CodeGen/RISCV/double-imm.ll
index 827f034f143fb5..2294171d95ab2c 100644
--- a/llvm/test/CodeGen/RISCV/double-imm.ll
+++ b/llvm/test/CodeGen/RISCV/double-imm.ll
@@ -115,8 +115,7 @@ define double @double_negative_zero(ptr %pd) nounwind {
 ;
 ; CHECKRV64ZDINX-LABEL: double_negative_zero:
 ; CHECKRV64ZDINX:       # %bb.0:
-; CHECKRV64ZDINX-NEXT:    li a0, -1
-; CHECKRV64ZDINX-NEXT:    slli a0, a0, 63
+; CHECKRV64ZDINX-NEXT:    fneg.d a0, zero
 ; CHECKRV64ZDINX-NEXT:    ret
   ret double -0.0
 }
@@ -160,12 +159,11 @@ define dso_local double @negzero_sel(i16 noundef %a, double noundef %d) nounwind
 ; CHECKRV64ZDINX-LABEL: negzero_sel:
 ; CHECKRV64ZDINX:       # %bb.0: # %entry
 ; CHECKRV64ZDINX-NEXT:    slli a2, a0, 48
+; CHECKRV64ZDINX-NEXT:    mv a0, a1
 ; CHECKRV64ZDINX-NEXT:    beqz a2, .LBB4_2
 ; CHECKRV64ZDINX-NEXT:  # %bb.1: # %entry
 ; CHECKRV64ZDINX-NEXT:    fneg.d a0, zero
-; CHECKRV64ZDINX-NEXT:    ret
-; CHECKRV64ZDINX-NEXT:  .LBB4_2:
-; CHECKRV64ZDINX-NEXT:    mv a0, a1
+; CHECKRV64ZDINX-NEXT:  .LBB4_2: # %entry
 ; CHECKRV64ZDINX-NEXT:    ret
 entry:
   %tobool.not = icmp eq i16 %a, 0
diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics.ll b/llvm/test/CodeGen/RISCV/double-intrinsics.ll
index 94b3b1f1b199c2..ea4af4cb60cd3d 100644
--- a/llvm/test/CodeGen/RISCV/double-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/double-intrinsics.ll
@@ -684,8 +684,7 @@ define double @fabs_f64(double %a) nounwind {
 ;
 ; RV64IZFINXZDINX-LABEL: fabs_f64:
 ; RV64IZFINXZDINX:       # %bb.0:
-; RV64IZFINXZDINX-NEXT:    slli a0, a0, 1
-; RV64IZFINXZDINX-NEXT:    srli a0, a0, 1
+; RV64IZFINXZDINX-NEXT:    fabs.d a0, a0
 ; RV64IZFINXZDINX-NEXT:    ret
 ;
 ; RV32I-LABEL: fabs_f64:
diff --git a/llvm/test/CodeGen/RISCV/float-arith.ll b/llvm/test/CodeGen/RISCV/float-arith.ll
index 931f73a94170a6..3f32734db0ba71 100644
--- a/llvm/test/CodeGen/RISCV/float-arith.ll
+++ b/llvm/test/CodeGen/RISCV/float-arith.ll
@@ -4,9 +4,9 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \
 ; RUN:   -target-abi=lp64f | FileCheck -check-prefix=CHECKIF %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zfinx -verify-machineinstrs < %s \
-; RUN:   -target-abi=ilp32 | FileCheck -check-prefix=CHECKIZFINX %s
+; RUN:   -target-abi=ilp32 | FileCheck -check-prefixes=CHECKIZFINX,RV32IZFINX %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zfinx -verify-machineinstrs < %s \
-; RUN:   -target-abi=lp64 | FileCheck -check-prefix=CHECKIZFINX %s
+; RUN:   -target-abi=lp64 | FileCheck -check-prefixes=CHECKIZFINX,RV64IZFINX %s
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV32I %s
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
@@ -706,12 +706,18 @@ define float @fnmadd_s_3(float %a, float %b, float %c) nounwind {
 ; CHECKIF-NEXT:    fneg.s fa0, fa5
 ; CHECKIF-NEXT:    ret
 ;
-; CHECKIZFINX-LABEL: fnmadd_s_3:
-; CHECKIZFINX:       # %bb.0:
-; CHECKIZFINX-NEXT:    fmadd.s a0, a0, a1, a2
-; CHECKIZFINX-NEXT:    lui a1, 524288
-; CHECKIZFINX-NEXT:    xor a0, a0, a1
-; CHECKIZFINX-NEXT:    ret
+; RV32IZFINX-LABEL: fnmadd_s_3:
+; RV32IZFINX:       # %bb.0:
+; RV32IZFINX-NEXT:    fmadd.s a0, a0, a1, a2
+; RV32IZFINX-NEXT:    fneg.s a0, a0
+; RV32IZFINX-NEXT:    ret
+;
+; RV64IZFINX-LABEL: fnmadd_s_3:
+; RV64IZFINX:       # %bb.0:
+; RV64IZFINX-NEXT:    fmadd.s a0, a0, a1, a2
+; RV64IZFINX-NEXT:    lui a1, 524288
+; RV64IZFINX-NEXT:    xor a0, a0, a1
+; RV64IZFINX-NEXT:    ret
 ;
 ; RV32I-LABEL: fnmadd_s_3:
 ; RV32I:       # %bb.0:
@@ -755,12 +761,17 @@ define float @fnmadd_nsz(float %a, float %b, float %c) nounwind {
 ; CHECKIF-NEXT:    fnmadd.s fa0, fa0, fa1, fa2
 ; CHECKIF-NEXT:    ret
 ;
-; CHECKIZFINX-LABEL: fnmadd_nsz:
-; CHECKIZFINX:       # %bb.0:
-; CHECKIZFINX-NEXT:    fmadd.s a0, a0, a1, a2
-; CHECKIZFINX-NEXT:    lui a1, 524288
-; CHECKIZFINX-NEXT:    xor a0, a0, a1
-; CHECKIZFINX-NEXT:    ret
+; RV32IZFINX-LABEL: fnmadd_nsz:
+; RV32IZFINX:       # %bb.0:
+; RV32IZFINX-NEXT:    fnmadd.s a0, a0, a1, a2
+; RV32IZFINX-NEXT:    ret
+;
+; RV64IZFINX-LABEL: fnmadd_nsz:
+; RV64IZFINX:       # %bb.0:
+; RV64IZFINX-NEXT:    fmadd.s a0, a0, a1, a2
+; RV64IZFINX-NEXT:    lui a1, 524288
+; RV64IZFINX-NEXT:    xor a0, a0, a1
+; RV64IZFINX-NEXT:    ret
 ;
 ; RV32I-LABEL: fnmadd_nsz:
 ; RV32I:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll
index 53df588bf1c374..2338219687ef75 100644
--- a/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll
+++ b/llvm/test/CodeGen/RISCV/float-bitmanip-dagcombines.ll
@@ -33,8 +33,7 @@ define float @fneg(float %a) nounwind {
 ;
 ; RV32IZFINX-LABEL: fneg:
 ; RV32IZFINX:       # %bb.0:
-; RV32IZFINX-NEXT:    lui a1, 524288
-; RV32IZFINX-NEXT:    xor a0, a0, a1
+; RV32IZFINX-NEXT:    fneg.s a0, a0
 ; RV32IZFINX-NEXT:    ret
 ;
 ; RV64I-LABEL: fneg:
@@ -75,8 +74,7 @@ define float @fabs(float %a) nounwind {
 ;
 ; RV32IZFINX-LABEL: fabs:
 ; RV32IZFINX:       # %bb.0:
-; RV32IZFINX-NEXT:    slli a0, a0, 1
-; RV32IZFINX-NEXT:    srli a0, a0, 1
+; RV32IZFINX-NEXT:    fabs.s a0, a0
 ; RV32IZFINX-NEXT:    ret
 ;
 ; RV64I-LABEL: fabs:
@@ -128,8 +126,7 @@ define float @fcopysign_fneg(float %a, float %b) nounwind {
 ;
 ; RV32IZFINX-LABEL: fcopysign_fneg:
 ; RV32IZFINX:       # %bb.0:
-; RV32IZFINX-NEXT:    not a1, a1
-; RV32IZFINX-NEXT:    fsgnj.s a0, a0, a1
+; RV32IZFINX-NEXT:    fsgnjn.s a0, a0, a1
 ; RV32IZFINX-NEXT:    ret
 ;
 ; RV64I-LABEL: fcopysign_fneg:
diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
index 9f1578ce158f95..52442026dab502 100644
--- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
@@ -688,8 +688,7 @@ define float @fabs_f32(float %a) nounwind {
 ;
 ; RV32IZFINX-LABEL: fabs_f32:
 ; RV32IZFINX:       # %bb.0:
-; RV32IZFINX-NEXT:    slli a0, a0, 1
-; RV32IZFINX-NEXT:    srli a0, a0, 1
+; RV32IZFINX-NEXT:    fabs.s a0, a0
 ; RV32IZFINX-NEXT:    ret
 ;
 ; RV64IF-LABEL: fabs_f32:
diff --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
index 42ac20286a8920..198b18c75272a9 100644
--- a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
+++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
@@ -96,7 +96,6 @@ define i64 @test_floor_si64(float %x) nounwind {
 ; RV32IZFINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    mv s0, a0
 ; RV32IZFINX-NEXT:    lui a0, 307200
 ; RV32IZFINX-NEXT:    fabs.s a1, s0
@@ -109,33 +108,32 @@ define i64 @test_floor_si64(float %x) nounwind {
 ; RV32IZFINX-NEXT:  .LBB1_2:
 ; RV32IZFINX-NEXT:    lui a0, 913408
 ; RV32IZFINX-NEXT:    fle.s s1, a0, s0
-; RV32IZFINX-NEXT:    neg s2, s1
 ; RV32IZFINX-NEXT:    mv a0, s0
 ; RV32IZFINX-NEXT:    call __fixsfdi
-; RV32IZFINX-NEXT:    and a0, s2, a0
-; RV32IZFINX-NEXT:    lui a2, 389120
-; RV32IZFINX-NEXT:    addi a2, a2, -1
-; RV32IZFINX-NEXT:    flt.s a4, a2, s0
-; RV32IZFINX-NEXT:    neg a2, a4
-; RV32IZFINX-NEXT:    or a0, a2, a0
-; RV32IZFINX-NEXT:    feq.s a2, s0, s0
-; RV32IZFINX-NEXT:    neg a2, a2
-; RV32IZFINX-NEXT:    lui a5, 524288
-; RV32IZFINX-NEXT:    lui a3, 524288
+; RV32IZFINX-NEXT:    lui a4, 524288
+; RV32IZFINX-NEXT:    lui a2, 524288
 ; RV32IZFINX-NEXT:    beqz s1, .LBB1_4
 ; RV32IZFINX-NEXT:  # %bb.3:
-; RV32IZFINX-NEXT:    mv a3, a1
+; RV32IZFINX-NEXT:    mv a2, a1
 ; RV32IZFINX-NEXT:  .LBB1_4:
-; RV32IZFINX-NEXT:    and a0, a2, a0
-; RV32IZFINX-NEXT:    beqz a4, .LBB1_6
+; RV32IZFINX-NEXT:    lui a1, 389120
+; RV32IZFINX-NEXT:    addi a1, a1, -1
+; RV32IZFINX-NEXT:    flt.s a3, a1, s0
+; RV32IZFINX-NEXT:    beqz a3, .LBB1_6
 ; RV32IZFINX-NEXT:  # %bb.5:
-; RV32IZFINX-NEXT:    addi a3, a5, -1
+; RV32IZFINX-NEXT:    addi a2, a4, -1
 ; RV32IZFINX-NEXT:  .LBB1_6:
-; RV32IZFINX-NEXT:    and a1, a2, a3
+; RV32IZFINX-NEXT:    feq.s a1, s0, s0
+; RV32IZFINX-NEXT:    neg a4, a1
+; RV32IZFINX-NEXT:    and a1, a4, a2
+; RV32IZFINX-NEXT:    neg a2, s1
+; RV32IZFINX-NEXT:    and a0, a2, a0
+; RV32IZFINX-NEXT:    neg a2, a3
+; RV32IZFINX-NEXT:    or a0, a2, a0
+; RV32IZFINX-NEXT:    and a0, a4, a0
 ; RV32IZFINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZFINX-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    addi sp, sp, 16
 ; RV32IZFINX-NEXT:    ret
 ;
@@ -356,7 +354,6 @@ define i64 @test_ceil_si64(float %x) nounwind {
 ; RV32IZFINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    mv s0, a0
 ; RV32IZFINX-NEXT:    lui a0, 307200
 ; RV32IZFINX-NEXT:    fabs.s a1, s0
@@ -369,33 +366,32 @@ define i64 @test_ceil_si64(float %x) nounwind {
 ; RV32IZFINX-NEXT:  .LBB5_2:
 ; RV32IZFINX-NEXT:    lui a0, 913408
 ; RV32IZFINX-NEXT:    fle.s s1, a0, s0
-; RV32IZFINX-NEXT:    neg s2, s1
 ; RV32IZFINX-NEXT:    mv a0, s0
 ; RV32IZFINX-NEXT:    call __fixsfdi
-; RV32IZFINX-NEXT:    and a0, s2, a0
-; RV32IZFINX-NEXT:    lui a2, 389120
-; RV32IZFINX-NEXT:    addi a2, a2, -1
-; RV32IZFINX-NEXT:    flt.s a4, a2, s0
-; RV32IZFINX-NEXT:    neg a2, a4
-; RV32IZFINX-NEXT:    or a0, a2, a0
-; RV32IZFINX-NEXT:    feq.s a2, s0, s0
-; RV32IZFINX-NEXT:    neg a2, a2
-; RV32IZFINX-NEXT:    lui a5, 524288
-; RV32IZFINX-NEXT:    lui a3, 524288
+; RV32IZFINX-NEXT:    lui a4, 524288
+; RV32IZFINX-NEXT:    lui a2, 524288
 ; RV32IZFINX-NEXT:    beqz s1, .LBB5_4
 ; RV32IZFINX-NEXT:  # %bb.3:
-; RV32IZFINX-NEXT:    mv a3, a1
+; RV32IZFINX-NEXT:    mv a2, a1
 ; RV32IZFINX-NEXT:  .LBB5_4:
-; RV32IZFINX-NEXT:    and a0, a2, a0
-; RV32IZFINX-NEXT:    beqz a4, .LBB5_6
+; RV32IZFINX-NEXT:    lui a1, 389120
+; RV32IZFINX-NEXT:    addi a1, a1, -1
+; RV32IZFINX-NEXT:    flt.s a3, a1, s0
+; RV32IZFINX-NEXT:    beqz a3, .LBB5_6
 ; RV32IZFINX-NEXT:  # %bb.5:
-; RV32IZFINX-NEXT:    addi a3, a5, -1
+; RV32IZFINX-NEXT:    addi a2, a4, -1
 ; RV32IZFINX-NEXT:  .LBB5_6:
-; RV32IZFINX-NEXT:    and a1, a2, a3
+; RV32IZFINX-NEXT:    feq.s a1, s0, s0
+; RV32IZFINX-NEXT:    neg a4, a1
+; RV32IZFINX-NEXT:    and a1, a4, a2
+; RV32IZFINX-NEXT:    neg a2, s1
+; RV32IZFINX-NEXT:    and a0, a2, a0
+; RV32IZFINX-NEXT:    neg a2, a3
+; RV32IZFINX-NEXT:    or a0, a2, a0
+; RV32IZFINX-NEXT:    and a0, a4, a0
 ; RV32IZFINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZFINX-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    addi sp, sp, 16
 ; RV32IZFINX-NEXT:    ret
 ;
@@ -616,7 +612,6 @@ define i64 @test_trunc_si64(float %x) nounwind {
 ; RV32IZFINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    mv s0, a0
 ; RV32IZFINX-NEXT:    lui a0, 307200
 ; RV32IZFINX-NEXT:    fabs.s a1, s0
@@ -629,33 +624,32 @@ define i64 @test_trunc_si64(float %x) nounwind {
 ; RV32IZFINX-NEXT:  .LBB9_2:
 ; RV32IZFINX-NEXT:    lui a0, 913408
 ; RV32IZFINX-NEXT:    fle.s s1, a0, s0
-; RV32IZFINX-NEXT:    neg s2, s1
 ; RV32IZFINX-NEXT:    mv a0, s0
 ; RV32IZFINX-NEXT:    call __fixsfdi
-; RV32IZFINX-NEXT:    and a0, s2, a0
-; RV32IZFINX-NEXT:    lui a2, 389120
-; RV32IZFINX-NEXT:    addi a2, a2, -1
-; RV32IZFINX-NEXT:    flt.s a4, a2, s0
-; RV32IZFINX-NEXT:    neg a2, a4
-; RV32IZFINX-NEXT:    or a0, a2, a0
-; RV32IZFINX-NEXT:    feq.s a2, s0, s0
-; RV32IZFINX-NEXT:    neg a2, a2
-; RV32IZFINX-NEXT:    lui a5, 524288
-; RV32IZFINX-NEXT:    lui a3, 524288
+; RV32IZFINX-NEXT:    lui a4, 524288
+; RV32IZFINX-NEXT:    lui a2, 524288
 ; RV32IZFINX-NEXT:    beqz s1, .LBB9_4
 ; RV32IZFINX-NEXT:  # %bb.3:
-; RV32IZFINX-NEXT:    mv a3, a1
+; RV32IZFINX-NEXT:    mv a2, a1
 ; RV32IZFINX-NEXT:  .LBB9_4:
-; RV32IZFINX-NEXT:    and a0, a2, a0
-; RV32IZFINX-NEXT:    beqz a4, .LBB9_6
+; RV32IZFINX-NEXT:    lui a1, 389120
+; RV32IZFINX-NEXT:    addi a1, a1, -1
+; RV32IZFINX-NEXT:    flt.s a3, a1, s0
+; RV32IZFINX-NEXT:    beqz a3, .LBB9_6
 ; RV32IZFINX-NEXT:  # %bb.5:
-; RV32IZFINX-NEXT:    addi a3, a5, -1
+; RV32IZFINX-NEXT:    addi a2, a4, -1
 ; RV32IZFINX-NEXT:  .LBB9_6:
-; RV32IZFINX-NEXT:    and a1, a2, a3
+; RV32IZFINX-NEXT:    feq.s a1, s0, s0
+; RV32IZFINX-NEXT:    neg a4, a1
+; RV32IZFINX-NEXT:    and a1, a4, a2
+; RV32IZFINX-NEXT:    neg a2, s1
+; RV32IZFINX-NEXT:    and a0, a2, a0
+; RV32IZFINX-NEXT:    neg a2, a3
+; RV32IZFINX-NEXT:    or a0, a2, a0
+; RV32IZFINX-NEXT:    and a0, a4, a0
 ; RV32IZFINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZFINX-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    addi sp, sp, 16
 ; RV32IZFINX-NEXT:    ret
 ;
@@ -876,7 +870,6 @@ define i64 @test_round_si64(float %x) nounwind {
 ; RV32IZFINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    mv s0, a0
 ; RV32IZFINX-NEXT:    lui a0, 307200
 ; RV32IZFINX-NEXT:    fabs.s a1, s0
@@ -889,33 +882,32 @@ define i64 @test_round_si64(float %x) nounwind {
 ; RV32IZFINX-NEXT:  .LBB13_2:
 ; RV32IZFINX-NEXT:    lui a0, 913408
 ; RV32IZFINX-NEXT:    fle.s s1, a0, s0
-; RV32IZFINX-NEXT:    neg s2, s1
 ; RV32IZFINX-NEXT:    mv a0, s0
 ; RV32IZFINX-NEXT:    call __fixsfdi
-; RV32IZFINX-NEXT:    and a0, s2, a0
-; RV32IZFINX-NEXT:    lui a2, 389120
-; RV32IZFINX-NEXT:    addi a2, a2, -1
-; RV32IZFINX-NEXT:    flt.s a4, a2, s0
-; RV32IZFINX-NEXT:    neg a2, a4
-; RV32IZFINX-NEXT:    or a0, a2, a0
-; RV32IZFINX-NEXT:    feq.s a2, s0, s0
-; RV32IZFINX-NEXT:    neg a2, a2
-; RV32IZFINX-NEXT:    lui a5, 524288
-; RV32IZFINX-NEXT:    lui a3, 524288
+; RV32IZFINX-NEXT:    lui a4, 524288
+; RV32IZFINX-NEXT:    lui a2, 524288
 ; RV32IZFINX-NEXT:    beqz s1, .LBB13_4
 ; RV32IZFINX-NEXT:  # %bb.3:
-; RV32IZFINX-NEXT:    mv a3, a1
+; RV32IZFINX-NEXT:    mv a2, a1
 ; RV32IZFINX-NEXT:  .LBB13_4:
-; RV32IZFINX-NEXT:    and a0, a2, a0
-; RV32IZFINX-NEXT:    beqz a4, .LBB13_6
+; RV32IZFINX-NEXT:    lui a1, 389120
+; RV32IZFINX-NEXT:    addi a1, a1, -1
+; RV32IZFINX-NEXT:    flt.s a3, a1, s0
+; RV32IZFINX-NEXT:    beqz a3, .LBB13_6
 ; RV32IZFINX-NEXT:  # %bb.5:
-; RV32IZFINX-NEXT:    addi a3, a5, -1
+; RV32IZFINX-NEXT:    addi a2, a4, -1
 ; RV32IZFINX-NEXT:  .LBB13_6:
-; RV32IZFINX-NEXT:    and a1, a2, a3
+; RV32IZFINX-NEXT:    feq.s a1, s0, s0
+; RV32IZFINX-NEXT:    neg a4, a1
+; RV32IZFINX-NEXT:    and a1, a4, a2
+; RV32IZFINX-NEXT:    neg a2, s1
+; RV32IZFINX-NEXT:    and a0, a2, a0
+; RV32IZFINX-NEXT:    neg a2, a3
+; RV32IZFINX-NEXT:    or a0, a2, a0
+; RV32IZFINX-NEXT:    and a0, a4, a0
 ; RV32IZFINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZFINX-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    addi sp, sp, 16
 ; RV32IZFINX-NEXT:    ret
 ;
@@ -1136,7 +1128,6 @@ define i64 @test_roundeven_si64(float %x) nounwind {
 ; RV32IZFINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    mv s0, a0
 ; RV32IZFINX-NEXT:    lui a0, 307200
 ; RV32IZFINX-NEXT:    fabs.s a1, s0
@@ -1149,33 +1140,32 @@ define i64 @test_roundeven_si64(float %x) nounwind {
 ; RV32IZFINX-NEXT:  .LBB17_2:
 ; RV32IZFINX-NEXT:    lui a0, 913408
 ; RV32IZFINX-NEXT:    fle.s s1, a0, s0
-; RV32IZFINX-NEXT:    neg s2, s1
 ; RV32IZFINX-NEXT:    mv a0, s0
 ; RV32IZFINX-NEXT:    call __fixsfdi
-; RV32IZFINX-NEXT:    and a0, s2, a0
-; RV32IZFINX-NEXT:    lui a2, 389120
-; RV32IZFINX-NEXT:    addi a2, a2, -1
-; RV32IZFINX-NEXT:    flt.s a4, a2, s0
-; RV32IZFINX-NEXT:    neg a2, a4
-; RV32IZFINX-NEXT:    or a0, a2, a0
-; RV32IZFINX-NEXT:    feq.s a2, s0, s0
-; RV32IZFINX-NEXT:    neg a2, a2
-; RV32IZFINX-NEXT:    lui a5, 524288
-; RV32IZFINX-NEXT:    lui a3, 524288
+; RV32IZFINX-NEXT:    lui a4, 524288
+; RV32IZFINX-NEXT:    lui a2, 524288
 ; RV32IZFINX-NEXT:    beqz s1, .LBB17_4
 ; RV32IZFINX-NEXT:  # %bb.3:
-; RV32IZFINX-NEXT:    mv a3, a1
+; RV32IZFINX-NEXT:    mv a2, a1
 ; RV32IZFINX-NEXT:  .LBB17_4:
-; RV32IZFINX-NEXT:    and a0, a2, a0
-; RV32IZFINX-NEXT:    beqz a4, .LBB17_6
+; RV32IZFINX-NEXT:    lui a1, 389120
+; RV32IZFINX-NEXT:    addi a1, a1, -1
+; RV32IZFINX-NEXT:    flt.s a3, a1, s0
+; RV32IZFINX-NEXT:    beqz a3, .LBB17_6
 ; RV32IZFINX-NEXT:  # %bb.5:
-; RV32IZFINX-NEXT:    addi a3, a5, -1
+; RV32IZFINX-NEXT:    addi a2, a4, -1
 ; RV32IZFINX-NEXT:  .LBB17_6:
-; RV32IZFINX-NEXT:    and a1, a2, a3
+; RV32IZFINX-NEXT:    feq.s a1, s0, s0
+; RV32IZFINX-NEXT:    neg a4, a1
+; RV32IZFINX-NEXT:    and a1, a4, a2
+; RV32IZFINX-NEXT:    neg a2, s1
+; RV32IZFINX-NEXT:    and a0, a2, a0
+; RV32IZFINX-NEXT:    neg a2, a3
+; RV32IZFINX-NEXT:    or a0, a2, a0
+; RV32IZFINX-NEXT:    and a0, a4, a0
 ; RV32IZFINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZFINX-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    addi sp, sp, 16
 ; RV32IZFINX-NEXT:    ret
 ;
@@ -1396,7 +1386,6 @@ define i64 @test_rint_si64(float %x) nounwind {
 ; RV32IZFINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZFINX-NEXT:    mv s0, a0
 ; RV32IZFINX-NEXT:    lui a0, 307200
 ; RV32IZFINX-NEXT:    fabs.s a1, s0
@@ -1409,33 +1398,32 @@ define i64 @test_rint_si64(float %x) nounwind {
 ; RV32IZFINX-NEXT:  .LBB21_2:
 ; RV32IZFINX-NEXT:    lui a0, 913408
 ; RV32IZFINX-NEXT:    fle.s s1, a0, s0
-; RV32IZFINX-NEXT:    neg s2, s1
 ; RV32IZFINX-NEXT:    mv a0, s0
 ; RV32IZFINX-NEXT:    call __fixsfdi
-; RV32IZFINX-NEXT:    and a0, s2, a0
-; RV32IZFINX-NEXT:    lui a2, 389120
-; RV32IZFINX-NEXT:    addi a2, a2, -1
-; RV32IZFINX-NEXT:    flt.s a4, a2, s0
-; RV32IZFINX-NEXT:    neg a2, a4
-; RV32IZFINX-NEXT:    or a0, a2, a0
-; RV32IZFINX-NEXT:    feq.s a2, s0, s0
-; RV32IZFINX-NEXT:    neg a2, a2
-; RV32IZFINX-NEXT:    lui a5, 524288
-; RV32IZFINX-NEXT:    lui a3, 524288
+; RV32IZFINX-NEXT:    lui a4, 524288
+; RV32IZFINX-NEXT:    lui a2, 524288
 ; RV32IZFINX-NEXT:    beqz s1, .LBB21_4
 ; RV32IZFINX-NEXT:  # %bb.3:
-; RV32IZFINX-NEXT:    mv a3, a1
+; RV32IZFINX-NEXT:    mv a2, a1
 ; RV32IZFINX-NEXT:  .LBB21_4:
-; RV32IZFINX-NEXT:    and a0, a2, a0
-; RV32IZFINX-NEXT:    beqz a4, .LBB21_6
+; RV32IZFINX-NEXT:    lui a1, 389120
+; RV32IZFINX-NEXT:    addi a1, a1, -1
+; RV32IZFINX-NEXT:    flt.s a3, a1, s0
+; RV32IZFINX-NEXT:    beqz a3, .LBB21_6
 ; RV32IZFINX-NEXT:  # %bb.5:
-; RV32IZFINX-NEXT:    addi a3, a5, -1
+; RV32IZFINX-NEXT:    addi a2, a4, -1
 ; RV32IZFINX-NEXT:  .LBB21_6:
-; RV32IZFINX-NEXT:    and a1, a2, a3
+; RV32IZFINX-NEXT:    feq.s a1, s0, s0
+; RV32IZFINX-NEXT:    neg a4, a1
+; RV32IZFINX-NEXT:    and a1, a4, a2
+; RV32IZFINX-NEXT:    neg a2, s1
+; RV32IZFINX-NEXT:    and a0, a2, a0
+; RV32IZFINX-NEXT:    neg a2, a3
+; RV32IZFINX-NEXT:    or a0, a2, a0
+; RV32IZFINX-NEXT:    and a0, a4, a0
 ; RV32IZFINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZFINX-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZFINX-NEXT:    addi sp, sp, 16
 ; RV32IZFINX-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll
index 32f7dfaee8837c..4c5097b0a53cc2 100644
--- a/llvm/test/CodeGen/RISCV/half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/half-convert.ll
@@ -2241,46 +2241,40 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ;
 ; RV32IZHINX-LABEL: fcvt_l_h_sat:
 ; RV32IZHINX:       # %bb.0: # %start
-; RV32IZHINX-NEXT:    addi sp, sp, -32
-; RV32IZHINX-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; RV32IZHINX-NEXT:    addi sp, sp, -16
+; RV32IZHINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZHINX-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZHINX-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    fcvt.s.h s0, a0
-; RV32IZHINX-NEXT:    lui a0, 389120
-; RV32IZHINX-NEXT:    addi a0, a0, -1
-; RV32IZHINX-NEXT:    flt.s s1, a0, s0
-; RV32IZHINX-NEXT:    neg s2, s1
 ; RV32IZHINX-NEXT:    lui a0, 913408
-; RV32IZHINX-NEXT:    fle.s s3, a0, s0
-; RV32IZHINX-NEXT:    neg s4, s3
+; RV32IZHINX-NEXT:    fle.s s1, a0, s0
 ; RV32IZHINX-NEXT:    mv a0, s0
 ; RV32IZHINX-NEXT:    call __fixsfdi
-; RV32IZHINX-NEXT:    and a0, s4, a0
-; RV32IZHINX-NEXT:    or a0, s2, a0
-; RV32IZHINX-NEXT:    feq.s a2, s0, s0
-; RV32IZHINX-NEXT:    neg a2, a2
 ; RV32IZHINX-NEXT:    lui a4, 524288
-; RV32IZHINX-NEXT:    lui a3, 524288
-; RV32IZHINX-NEXT:    beqz s3, .LBB10_2
+; RV32IZHINX-NEXT:    lui a2, 524288
+; RV32IZHINX-NEXT:    beqz s1, .LBB10_2
 ; RV32IZHINX-NEXT:  # %bb.1: # %start
-; RV32IZHINX-NEXT:    mv a3, a1
+; RV32IZHINX-NEXT:    mv a2, a1
 ; RV32IZHINX-NEXT:  .LBB10_2: # %start
-; RV32IZHINX-NEXT:    and a0, a2, a0
-; RV32IZHINX-NEXT:    beqz s1, .LBB10_4
+; RV32IZHINX-NEXT:    lui a1, 389120
+; RV32IZHINX-NEXT:    addi a1, a1, -1
+; RV32IZHINX-NEXT:    flt.s a3, a1, s0
+; RV32IZHINX-NEXT:    beqz a3, .LBB10_4
 ; RV32IZHINX-NEXT:  # %bb.3:
-; RV32IZHINX-NEXT:    addi a3, a4, -1
+; RV32IZHINX-NEXT:    addi a2, a4, -1
 ; RV32IZHINX-NEXT:  .LBB10_4: # %start
-; RV32IZHINX-NEXT:    and a1, a2, a3
-; RV32IZHINX-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT:    addi sp, sp, 32
+; RV32IZHINX-NEXT:    feq.s a1, s0, s0
+; RV32IZHINX-NEXT:    neg a4, a1
+; RV32IZHINX-NEXT:    and a1, a4, a2
+; RV32IZHINX-NEXT:    neg a2, a3
+; RV32IZHINX-NEXT:    neg a3, s1
+; RV32IZHINX-NEXT:    and a0, a3, a0
+; RV32IZHINX-NEXT:    or a0, a2, a0
+; RV32IZHINX-NEXT:    and a0, a4, a0
+; RV32IZHINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZHINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZHINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZHINX-NEXT:    addi sp, sp, 16
 ; RV32IZHINX-NEXT:    ret
 ;
 ; RV64IZHINX-LABEL: fcvt_l_h_sat:
@@ -2294,46 +2288,40 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ;
 ; RV32IZDINXZHINX-LABEL: fcvt_l_h_sat:
 ; RV32IZDINXZHINX:       # %bb.0: # %start
-; RV32IZDINXZHINX-NEXT:    addi sp, sp, -32
-; RV32IZDINXZHINX-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IZDINXZHINX-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IZDINXZHINX-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IZDINXZHINX-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IZDINXZHINX-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32IZDINXZHINX-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; RV32IZDINXZHINX-NEXT:    addi sp, sp, -16
+; RV32IZDINXZHINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZDINXZHINX-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZDINXZHINX-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32IZDINXZHINX-NEXT:    fcvt.s.h s0, a0
-; RV32IZDINXZHINX-NEXT:    lui a0, 389120
-; RV32IZDINXZHINX-NEXT:    addi a0, a0, -1
-; RV32IZDINXZHINX-NEXT:    flt.s s1, a0, s0
-; RV32IZDINXZHINX-NEXT:    neg s2, s1
 ; RV32IZDINXZHINX-NEXT:    lui a0, 913408
-; RV32IZDINXZHINX-NEXT:    fle.s s3, a0, s0
-; RV32IZDINXZHINX-NEXT:    neg s4, s3
+; RV32IZDINXZHINX-NEXT:    fle.s s1, a0, s0
 ; RV32IZDINXZHINX-NEXT:    mv a0, s0
 ; RV32IZDINXZHINX-NEXT:    call __fixsfdi
-; RV32IZDINXZHINX-NEXT:    and a0, s4, a0
-; RV32IZDINXZHINX-NEXT:    or a0, s2, a0
-; RV32IZDINXZHINX-NEXT:    feq.s a2, s0, s0
-; RV32IZDINXZHINX-NEXT:    neg a2, a2
 ; RV32IZDINXZHINX-NEXT:    lui a4, 524288
-; RV32IZDINXZHINX-NEXT:    lui a3, 524288
-; RV32IZDINXZHINX-NEXT:    beqz s3, .LBB10_2
+; RV32IZDINXZHINX-NEXT:    lui a2, 524288
+; RV32IZDINXZHINX-NEXT:    beqz s1, .LBB10_2
 ; RV32IZDINXZHINX-NEXT:  # %bb.1: # %start
-; RV32IZDINXZHINX-NEXT:    mv a3, a1
+; RV32IZDINXZHINX-NEXT:    mv a2, a1
 ; RV32IZDINXZHINX-NEXT:  .LBB10_2: # %start
-; RV32IZDINXZHINX-NEXT:    and a0, a2, a0
-; RV32IZDINXZHINX-NEXT:    beqz s1, .LBB10_4
+; RV32IZDINXZHINX-NEXT:    lui a1, 389120
+; RV32IZDINXZHINX-NEXT:    addi a1, a1, -1
+; RV32IZDINXZHINX-NEXT:    flt.s a3, a1, s0
+; RV32IZDINXZHINX-NEXT:    beqz a3, .LBB10_4
 ; RV32IZDINXZHINX-NEXT:  # %bb.3:
-; RV32IZDINXZHINX-NEXT:    addi a3, a4, -1
+; RV32IZDINXZHINX-NEXT:    addi a2, a4, -1
 ; RV32IZDINXZHINX-NEXT:  .LBB10_4: # %start
-; RV32IZDINXZHINX-NEXT:    and a1, a2, a3
-; RV32IZDINXZHINX-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IZDINXZHINX-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IZDINXZHINX-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IZDINXZHINX-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IZDINXZHINX-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32IZDINXZHINX-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32IZDINXZHINX-NEXT:    addi sp, sp, 32
+; RV32IZDINXZHINX-NEXT:    feq.s a1, s0, s0
+; RV32IZDINXZHINX-NEXT:    neg a4, a1
+; RV32IZDINXZHINX-NEXT:    and a1, a4, a2
+; RV32IZDINXZHINX-NEXT:    neg a2, a3
+; RV32IZDINXZHINX-NEXT:    neg a3, s1
+; RV32IZDINXZHINX-NEXT:    and a0, a3, a0
+; RV32IZDINXZHINX-NEXT:    or a0, a2, a0
+; RV32IZDINXZHINX-NEXT:    and a0, a4, a0
+; RV32IZDINXZHINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZDINXZHINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZDINXZHINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZDINXZHINX-NEXT:    addi sp, sp, 16
 ; RV32IZDINXZHINX-NEXT:    ret
 ;
 ; RV64IZDINXZHINX-LABEL: fcvt_l_h_sat:
@@ -2644,46 +2632,40 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ;
 ; CHECK32-IZHINXMIN-LABEL: fcvt_l_h_sat:
 ; CHECK32-IZHINXMIN:       # %bb.0: # %start
-; CHECK32-IZHINXMIN-NEXT:    addi sp, sp, -32
-; CHECK32-IZHINXMIN-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; CHECK32-IZHINXMIN-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
-; CHECK32-IZHINXMIN-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
-; CHECK32-IZHINXMIN-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
-; CHECK32-IZHINXMIN-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; CHECK32-IZHINXMIN-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; CHECK32-IZHINXMIN-NEXT:    addi sp, sp, -16
+; CHECK32-IZHINXMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK32-IZHINXMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; CHECK32-IZHINXMIN-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; CHECK32-IZHINXMIN-NEXT:    fcvt.s.h s0, a0
-; CHECK32-IZHINXMIN-NEXT:    lui a0, 389120
-; CHECK32-IZHINXMIN-NEXT:    addi a0, a0, -1
-; CHECK32-IZHINXMIN-NEXT:    flt.s s1, a0, s0
-; CHECK32-IZHINXMIN-NEXT:    neg s2, s1
 ; CHECK32-IZHINXMIN-NEXT:    lui a0, 913408
-; CHECK32-IZHINXMIN-NEXT:    fle.s s3, a0, s0
-; CHECK32-IZHINXMIN-NEXT:    neg s4, s3
+; CHECK32-IZHINXMIN-NEXT:    fle.s s1, a0, s0
 ; CHECK32-IZHINXMIN-NEXT:    mv a0, s0
 ; CHECK32-IZHINXMIN-NEXT:    call __fixsfdi
-; CHECK32-IZHINXMIN-NEXT:    and a0, s4, a0
-; CHECK32-IZHINXMIN-NEXT:    or a0, s2, a0
-; CHECK32-IZHINXMIN-NEXT:    feq.s a2, s0, s0
-; CHECK32-IZHINXMIN-NEXT:    neg a2, a2
 ; CHECK32-IZHINXMIN-NEXT:    lui a4, 524288
-; CHECK32-IZHINXMIN-NEXT:    lui a3, 524288
-; CHECK32-IZHINXMIN-NEXT:    beqz s3, .LBB10_2
+; CHECK32-IZHINXMIN-NEXT:    lui a2, 524288
+; CHECK32-IZHINXMIN-NEXT:    beqz s1, .LBB10_2
 ; CHECK32-IZHINXMIN-NEXT:  # %bb.1: # %start
-; CHECK32-IZHINXMIN-NEXT:    mv a3, a1
+; CHECK32-IZHINXMIN-NEXT:    mv a2, a1
 ; CHECK32-IZHINXMIN-NEXT:  .LBB10_2: # %start
-; CHECK32-IZHINXMIN-NEXT:    and a0, a2, a0
-; CHECK32-IZHINXMIN-NEXT:    beqz s1, .LBB10_4
+; CHECK32-IZHINXMIN-NEXT:    lui a1, 389120
+; CHECK32-IZHINXMIN-NEXT:    addi a1, a1, -1
+; CHECK32-IZHINXMIN-NEXT:    flt.s a3, a1, s0
+; CHECK32-IZHINXMIN-NEXT:    beqz a3, .LBB10_4
 ; CHECK32-IZHINXMIN-NEXT:  # %bb.3:
-; CHECK32-IZHINXMIN-NEXT:    addi a3, a4, -1
+; CHECK32-IZHINXMIN-NEXT:    addi a2, a4, -1
 ; CHECK32-IZHINXMIN-NEXT:  .LBB10_4: # %start
-; CHECK32-IZHINXMIN-NEXT:    and a1, a2, a3
-; CHECK32-IZHINXMIN-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; CHECK32-IZHINXMIN-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
-; CHECK32-IZHINXMIN-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; CHECK32-IZHINXMIN-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; CHECK32-IZHINXMIN-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; CHECK32-IZHINXMIN-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; CHECK32-IZHINXMIN-NEXT:    addi sp, sp, 32
+; CHECK32-IZHINXMIN-NEXT:    feq.s a1, s0, s0
+; CHECK32-IZHINXMIN-NEXT:    neg a4, a1
+; CHECK32-IZHINXMIN-NEXT:    and a1, a4, a2
+; CHECK32-IZHINXMIN-NEXT:    neg a2, a3
+; CHECK32-IZHINXMIN-NEXT:    neg a3, s1
+; CHECK32-IZHINXMIN-NEXT:    and a0, a3, a0
+; CHECK32-IZHINXMIN-NEXT:    or a0, a2, a0
+; CHECK32-IZHINXMIN-NEXT:    and a0, a4, a0
+; CHECK32-IZHINXMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK32-IZHINXMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; CHECK32-IZHINXMIN-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; CHECK32-IZHINXMIN-NEXT:    addi sp, sp, 16
 ; CHECK32-IZHINXMIN-NEXT:    ret
 ;
 ; CHECK64-IZHINXMIN-LABEL: fcvt_l_h_sat:
@@ -2698,46 +2680,40 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ;
 ; CHECK32-IZDINXZHINXMIN-LABEL: fcvt_l_h_sat:
 ; CHECK32-IZDINXZHINXMIN:       # %bb.0: # %start
-; CHECK32-IZDINXZHINXMIN-NEXT:    addi sp, sp, -32
-; CHECK32-IZDINXZHINXMIN-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; CHECK32-IZDINXZHINXMIN-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
-; CHECK32-IZDINXZHINXMIN-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
-; CHECK32-IZDINXZHINXMIN-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
-; CHECK32-IZDINXZHINXMIN-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; CHECK32-IZDINXZHINXMIN-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; CHECK32-IZDINXZHINXMIN-NEXT:    addi sp, sp, -16
+; CHECK32-IZDINXZHINXMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK32-IZDINXZHINXMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; CHECK32-IZDINXZHINXMIN-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; CHECK32-IZDINXZHINXMIN-NEXT:    fcvt.s.h s0, a0
-; CHECK32-IZDINXZHINXMIN-NEXT:    lui a0, 389120
-; CHECK32-IZDINXZHINXMIN-NEXT:    addi a0, a0, -1
-; CHECK32-IZDINXZHINXMIN-NEXT:    flt.s s1, a0, s0
-; CHECK32-IZDINXZHINXMIN-NEXT:    neg s2, s1
 ; CHECK32-IZDINXZHINXMIN-NEXT:    lui a0, 913408
-; CHECK32-IZDINXZHINXMIN-NEXT:    fle.s s3, a0, s0
-; CHECK32-IZDINXZHINXMIN-NEXT:    neg s4, s3
+; CHECK32-IZDINXZHINXMIN-NEXT:    fle.s s1, a0, s0
 ; CHECK32-IZDINXZHINXMIN-NEXT:    mv a0, s0
 ; CHECK32-IZDINXZHINXMIN-NEXT:    call __fixsfdi
-; CHECK32-IZDINXZHINXMIN-NEXT:    and a0, s4, a0
-; CHECK32-IZDINXZHINXMIN-NEXT:    or a0, s2, a0
-; CHECK32-IZDINXZHINXMIN-NEXT:    feq.s a2, s0, s0
-; CHECK32-IZDINXZHINXMIN-NEXT:    neg a2, a2
 ; CHECK32-IZDINXZHINXMIN-NEXT:    lui a4, 524288
-; CHECK32-IZDINXZHINXMIN-NEXT:    lui a3, 524288
-; CHECK32-IZDINXZHINXMIN-NEXT:    beqz s3, .LBB10_2
+; CHECK32-IZDINXZHINXMIN-NEXT:    lui a2, 524288
+; CHECK32-IZDINXZHINXMIN-NEXT:    beqz s1, .LBB10_2
 ; CHECK32-IZDINXZHINXMIN-NEXT:  # %bb.1: # %start
-; CHECK32-IZDINXZHINXMIN-NEXT:    mv a3, a1
+; CHECK32-IZDINXZHINXMIN-NEXT:    mv a2, a1
 ; CHECK32-IZDINXZHINXMIN-NEXT:  .LBB10_2: # %start
-; CHECK32-IZDINXZHINXMIN-NEXT:    and a0, a2, a0
-; CHECK32-IZDINXZHINXMIN-NEXT:    beqz s1, .LBB10_4
+; CHECK32-IZDINXZHINXMIN-NEXT:    lui a1, 389120
+; CHECK32-IZDINXZHINXMIN-NEXT:    addi a1, a1, -1
+; CHECK32-IZDINXZHINXMIN-NEXT:    flt.s a3, a1, s0
+; CHECK32-IZDINXZHINXMIN-NEXT:    beqz a3, .LBB10_4
 ; CHECK32-IZDINXZHINXMIN-NEXT:  # %bb.3:
-; CHECK32-IZDINXZHINXMIN-NEXT:    addi a3, a4, -1
+; CHECK32-IZDINXZHINXMIN-NEXT:    addi a2, a4, -1
 ; CHECK32-IZDINXZHINXMIN-NEXT:  .LBB10_4: # %start
-; CHECK32-IZDINXZHINXMIN-NEXT:    and a1, a2, a3
-; CHECK32-IZDINXZHINXMIN-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; CHECK32-IZDINXZHINXMIN-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
-; CHECK32-IZDINXZHINXMIN-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; CHECK32-IZDINXZHINXMIN-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; CHECK32-IZDINXZHINXMIN-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; CHECK32-IZDINXZHINXMIN-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; CHECK32-IZDINXZHINXMIN-NEXT:    addi sp, sp, 32
+; CHECK32-IZDINXZHINXMIN-NEXT:    feq.s a1, s0, s0
+; CHECK32-IZDINXZHINXMIN-NEXT:    neg a4, a1
+; CHECK32-IZDINXZHINXMIN-NEXT:    and a1, a4, a2
+; CHECK32-IZDINXZHINXMIN-NEXT:    neg a2, a3
+; CHECK32-IZDINXZHINXMIN-NEXT:    neg a3, s1
+; CHECK32-IZDINXZHINXMIN-NEXT:    and a0, a3, a0
+; CHECK32-IZDINXZHINXMIN-NEXT:    or a0, a2, a0
+; CHECK32-IZDINXZHINXMIN-NEXT:    and a0, a4, a0
+; CHECK32-IZDINXZHINXMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK32-IZDINXZHINXMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; CHECK32-IZDINXZHINXMIN-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; CHECK32-IZDINXZHINXMIN-NEXT:    addi sp, sp, 16
 ; CHECK32-IZDINXZHINXMIN-NEXT:    ret
 ;
 ; CHECK64-IZDINXZHINXMIN-LABEL: fcvt_l_h_sat:
diff --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
index 0b93c8789fca5e..9e1a26e74d70b9 100644
--- a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
+++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
@@ -167,38 +167,36 @@ define i64 @test_floor_si64(half %x) nounwind {
 ; RV32IZHINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    fcvt.s.h s0, a0
 ; RV32IZHINX-NEXT:    lui a0, 913408
 ; RV32IZHINX-NEXT:    fle.s s1, a0, s0
-; RV32IZHINX-NEXT:    neg s2, s1
 ; RV32IZHINX-NEXT:    mv a0, s0
 ; RV32IZHINX-NEXT:    call __fixsfdi
-; RV32IZHINX-NEXT:    and a0, s2, a0
-; RV32IZHINX-NEXT:    lui a2, 389120
-; RV32IZHINX-NEXT:    addi a2, a2, -1
-; RV32IZHINX-NEXT:    flt.s a4, a2, s0
-; RV32IZHINX-NEXT:    neg a2, a4
-; RV32IZHINX-NEXT:    or a0, a2, a0
-; RV32IZHINX-NEXT:    feq.s a2, s0, s0
-; RV32IZHINX-NEXT:    neg a2, a2
-; RV32IZHINX-NEXT:    lui a5, 524288
-; RV32IZHINX-NEXT:    lui a3, 524288
+; RV32IZHINX-NEXT:    lui a4, 524288
+; RV32IZHINX-NEXT:    lui a2, 524288
 ; RV32IZHINX-NEXT:    beqz s1, .LBB1_4
 ; RV32IZHINX-NEXT:  # %bb.3:
-; RV32IZHINX-NEXT:    mv a3, a1
+; RV32IZHINX-NEXT:    mv a2, a1
 ; RV32IZHINX-NEXT:  .LBB1_4:
+; RV32IZHINX-NEXT:    lui a1, 389120
+; RV32IZHINX-NEXT:    addi a1, a1, -1
+; RV32IZHINX-NEXT:    flt.s a3, a1, s0
+; RV32IZHINX-NEXT:    beqz a3, .LBB1_6
+; RV32IZHINX-NEXT:  # %bb.5:
+; RV32IZHINX-NEXT:    addi a2, a4, -1
+; RV32IZHINX-NEXT:  .LBB1_6:
+; RV32IZHINX-NEXT:    feq.s a1, s0, s0
+; RV32IZHINX-NEXT:    neg a4, a1
+; RV32IZHINX-NEXT:    and a1, a4, a2
+; RV32IZHINX-NEXT:    neg a2, s1
 ; RV32IZHINX-NEXT:    and a0, a2, a0
+; RV32IZHINX-NEXT:    neg a2, a3
+; RV32IZHINX-NEXT:    or a0, a2, a0
+; RV32IZHINX-NEXT:    and a0, a4, a0
 ; RV32IZHINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    addi sp, sp, 16
-; RV32IZHINX-NEXT:    beqz a4, .LBB1_6
-; RV32IZHINX-NEXT:  # %bb.5:
-; RV32IZHINX-NEXT:    addi a3, a5, -1
-; RV32IZHINX-NEXT:  .LBB1_6:
-; RV32IZHINX-NEXT:    and a1, a2, a3
 ; RV32IZHINX-NEXT:    ret
 ;
 ; RV64IZHINX-LABEL: test_floor_si64:
@@ -309,39 +307,37 @@ define i64 @test_floor_si64(half %x) nounwind {
 ; RV32IZHINXMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZHINXMIN-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    fcvt.h.s a0, a0
 ; RV32IZHINXMIN-NEXT:    fcvt.s.h s0, a0
 ; RV32IZHINXMIN-NEXT:    lui a0, 913408
 ; RV32IZHINXMIN-NEXT:    fle.s s1, a0, s0
-; RV32IZHINXMIN-NEXT:    neg s2, s1
 ; RV32IZHINXMIN-NEXT:    mv a0, s0
 ; RV32IZHINXMIN-NEXT:    call __fixsfdi
-; RV32IZHINXMIN-NEXT:    and a0, s2, a0
-; RV32IZHINXMIN-NEXT:    lui a2, 389120
-; RV32IZHINXMIN-NEXT:    addi a2, a2, -1
-; RV32IZHINXMIN-NEXT:    flt.s a4, a2, s0
-; RV32IZHINXMIN-NEXT:    neg a2, a4
-; RV32IZHINXMIN-NEXT:    or a0, a2, a0
-; RV32IZHINXMIN-NEXT:    feq.s a2, s0, s0
-; RV32IZHINXMIN-NEXT:    neg a2, a2
-; RV32IZHINXMIN-NEXT:    lui a5, 524288
-; RV32IZHINXMIN-NEXT:    lui a3, 524288
+; RV32IZHINXMIN-NEXT:    lui a4, 524288
+; RV32IZHINXMIN-NEXT:    lui a2, 524288
 ; RV32IZHINXMIN-NEXT:    beqz s1, .LBB1_4
 ; RV32IZHINXMIN-NEXT:  # %bb.3:
-; RV32IZHINXMIN-NEXT:    mv a3, a1
+; RV32IZHINXMIN-NEXT:    mv a2, a1
 ; RV32IZHINXMIN-NEXT:  .LBB1_4:
+; RV32IZHINXMIN-NEXT:    lui a1, 389120
+; RV32IZHINXMIN-NEXT:    addi a1, a1, -1
+; RV32IZHINXMIN-NEXT:    flt.s a3, a1, s0
+; RV32IZHINXMIN-NEXT:    beqz a3, .LBB1_6
+; RV32IZHINXMIN-NEXT:  # %bb.5:
+; RV32IZHINXMIN-NEXT:    addi a2, a4, -1
+; RV32IZHINXMIN-NEXT:  .LBB1_6:
+; RV32IZHINXMIN-NEXT:    feq.s a1, s0, s0
+; RV32IZHINXMIN-NEXT:    neg a4, a1
+; RV32IZHINXMIN-NEXT:    and a1, a4, a2
+; RV32IZHINXMIN-NEXT:    neg a2, s1
 ; RV32IZHINXMIN-NEXT:    and a0, a2, a0
+; RV32IZHINXMIN-NEXT:    neg a2, a3
+; RV32IZHINXMIN-NEXT:    or a0, a2, a0
+; RV32IZHINXMIN-NEXT:    and a0, a4, a0
 ; RV32IZHINXMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZHINXMIN-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    addi sp, sp, 16
-; RV32IZHINXMIN-NEXT:    beqz a4, .LBB1_6
-; RV32IZHINXMIN-NEXT:  # %bb.5:
-; RV32IZHINXMIN-NEXT:    addi a3, a5, -1
-; RV32IZHINXMIN-NEXT:  .LBB1_6:
-; RV32IZHINXMIN-NEXT:    and a1, a2, a3
 ; RV32IZHINXMIN-NEXT:    ret
 ;
 ; RV64IZHINXMIN-LABEL: test_floor_si64:
@@ -879,38 +875,36 @@ define i64 @test_ceil_si64(half %x) nounwind {
 ; RV32IZHINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    fcvt.s.h s0, a0
 ; RV32IZHINX-NEXT:    lui a0, 913408
 ; RV32IZHINX-NEXT:    fle.s s1, a0, s0
-; RV32IZHINX-NEXT:    neg s2, s1
 ; RV32IZHINX-NEXT:    mv a0, s0
 ; RV32IZHINX-NEXT:    call __fixsfdi
-; RV32IZHINX-NEXT:    and a0, s2, a0
-; RV32IZHINX-NEXT:    lui a2, 389120
-; RV32IZHINX-NEXT:    addi a2, a2, -1
-; RV32IZHINX-NEXT:    flt.s a4, a2, s0
-; RV32IZHINX-NEXT:    neg a2, a4
-; RV32IZHINX-NEXT:    or a0, a2, a0
-; RV32IZHINX-NEXT:    feq.s a2, s0, s0
-; RV32IZHINX-NEXT:    neg a2, a2
-; RV32IZHINX-NEXT:    lui a5, 524288
-; RV32IZHINX-NEXT:    lui a3, 524288
+; RV32IZHINX-NEXT:    lui a4, 524288
+; RV32IZHINX-NEXT:    lui a2, 524288
 ; RV32IZHINX-NEXT:    beqz s1, .LBB5_4
 ; RV32IZHINX-NEXT:  # %bb.3:
-; RV32IZHINX-NEXT:    mv a3, a1
+; RV32IZHINX-NEXT:    mv a2, a1
 ; RV32IZHINX-NEXT:  .LBB5_4:
+; RV32IZHINX-NEXT:    lui a1, 389120
+; RV32IZHINX-NEXT:    addi a1, a1, -1
+; RV32IZHINX-NEXT:    flt.s a3, a1, s0
+; RV32IZHINX-NEXT:    beqz a3, .LBB5_6
+; RV32IZHINX-NEXT:  # %bb.5:
+; RV32IZHINX-NEXT:    addi a2, a4, -1
+; RV32IZHINX-NEXT:  .LBB5_6:
+; RV32IZHINX-NEXT:    feq.s a1, s0, s0
+; RV32IZHINX-NEXT:    neg a4, a1
+; RV32IZHINX-NEXT:    and a1, a4, a2
+; RV32IZHINX-NEXT:    neg a2, s1
 ; RV32IZHINX-NEXT:    and a0, a2, a0
+; RV32IZHINX-NEXT:    neg a2, a3
+; RV32IZHINX-NEXT:    or a0, a2, a0
+; RV32IZHINX-NEXT:    and a0, a4, a0
 ; RV32IZHINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    addi sp, sp, 16
-; RV32IZHINX-NEXT:    beqz a4, .LBB5_6
-; RV32IZHINX-NEXT:  # %bb.5:
-; RV32IZHINX-NEXT:    addi a3, a5, -1
-; RV32IZHINX-NEXT:  .LBB5_6:
-; RV32IZHINX-NEXT:    and a1, a2, a3
 ; RV32IZHINX-NEXT:    ret
 ;
 ; RV64IZHINX-LABEL: test_ceil_si64:
@@ -1021,39 +1015,37 @@ define i64 @test_ceil_si64(half %x) nounwind {
 ; RV32IZHINXMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZHINXMIN-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    fcvt.h.s a0, a0
 ; RV32IZHINXMIN-NEXT:    fcvt.s.h s0, a0
 ; RV32IZHINXMIN-NEXT:    lui a0, 913408
 ; RV32IZHINXMIN-NEXT:    fle.s s1, a0, s0
-; RV32IZHINXMIN-NEXT:    neg s2, s1
 ; RV32IZHINXMIN-NEXT:    mv a0, s0
 ; RV32IZHINXMIN-NEXT:    call __fixsfdi
-; RV32IZHINXMIN-NEXT:    and a0, s2, a0
-; RV32IZHINXMIN-NEXT:    lui a2, 389120
-; RV32IZHINXMIN-NEXT:    addi a2, a2, -1
-; RV32IZHINXMIN-NEXT:    flt.s a4, a2, s0
-; RV32IZHINXMIN-NEXT:    neg a2, a4
-; RV32IZHINXMIN-NEXT:    or a0, a2, a0
-; RV32IZHINXMIN-NEXT:    feq.s a2, s0, s0
-; RV32IZHINXMIN-NEXT:    neg a2, a2
-; RV32IZHINXMIN-NEXT:    lui a5, 524288
-; RV32IZHINXMIN-NEXT:    lui a3, 524288
+; RV32IZHINXMIN-NEXT:    lui a4, 524288
+; RV32IZHINXMIN-NEXT:    lui a2, 524288
 ; RV32IZHINXMIN-NEXT:    beqz s1, .LBB5_4
 ; RV32IZHINXMIN-NEXT:  # %bb.3:
-; RV32IZHINXMIN-NEXT:    mv a3, a1
+; RV32IZHINXMIN-NEXT:    mv a2, a1
 ; RV32IZHINXMIN-NEXT:  .LBB5_4:
+; RV32IZHINXMIN-NEXT:    lui a1, 389120
+; RV32IZHINXMIN-NEXT:    addi a1, a1, -1
+; RV32IZHINXMIN-NEXT:    flt.s a3, a1, s0
+; RV32IZHINXMIN-NEXT:    beqz a3, .LBB5_6
+; RV32IZHINXMIN-NEXT:  # %bb.5:
+; RV32IZHINXMIN-NEXT:    addi a2, a4, -1
+; RV32IZHINXMIN-NEXT:  .LBB5_6:
+; RV32IZHINXMIN-NEXT:    feq.s a1, s0, s0
+; RV32IZHINXMIN-NEXT:    neg a4, a1
+; RV32IZHINXMIN-NEXT:    and a1, a4, a2
+; RV32IZHINXMIN-NEXT:    neg a2, s1
 ; RV32IZHINXMIN-NEXT:    and a0, a2, a0
+; RV32IZHINXMIN-NEXT:    neg a2, a3
+; RV32IZHINXMIN-NEXT:    or a0, a2, a0
+; RV32IZHINXMIN-NEXT:    and a0, a4, a0
 ; RV32IZHINXMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZHINXMIN-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    addi sp, sp, 16
-; RV32IZHINXMIN-NEXT:    beqz a4, .LBB5_6
-; RV32IZHINXMIN-NEXT:  # %bb.5:
-; RV32IZHINXMIN-NEXT:    addi a3, a5, -1
-; RV32IZHINXMIN-NEXT:  .LBB5_6:
-; RV32IZHINXMIN-NEXT:    and a1, a2, a3
 ; RV32IZHINXMIN-NEXT:    ret
 ;
 ; RV64IZHINXMIN-LABEL: test_ceil_si64:
@@ -1591,38 +1583,36 @@ define i64 @test_trunc_si64(half %x) nounwind {
 ; RV32IZHINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    fcvt.s.h s0, a0
 ; RV32IZHINX-NEXT:    lui a0, 913408
 ; RV32IZHINX-NEXT:    fle.s s1, a0, s0
-; RV32IZHINX-NEXT:    neg s2, s1
 ; RV32IZHINX-NEXT:    mv a0, s0
 ; RV32IZHINX-NEXT:    call __fixsfdi
-; RV32IZHINX-NEXT:    and a0, s2, a0
-; RV32IZHINX-NEXT:    lui a2, 389120
-; RV32IZHINX-NEXT:    addi a2, a2, -1
-; RV32IZHINX-NEXT:    flt.s a4, a2, s0
-; RV32IZHINX-NEXT:    neg a2, a4
-; RV32IZHINX-NEXT:    or a0, a2, a0
-; RV32IZHINX-NEXT:    feq.s a2, s0, s0
-; RV32IZHINX-NEXT:    neg a2, a2
-; RV32IZHINX-NEXT:    lui a5, 524288
-; RV32IZHINX-NEXT:    lui a3, 524288
+; RV32IZHINX-NEXT:    lui a4, 524288
+; RV32IZHINX-NEXT:    lui a2, 524288
 ; RV32IZHINX-NEXT:    beqz s1, .LBB9_4
 ; RV32IZHINX-NEXT:  # %bb.3:
-; RV32IZHINX-NEXT:    mv a3, a1
+; RV32IZHINX-NEXT:    mv a2, a1
 ; RV32IZHINX-NEXT:  .LBB9_4:
+; RV32IZHINX-NEXT:    lui a1, 389120
+; RV32IZHINX-NEXT:    addi a1, a1, -1
+; RV32IZHINX-NEXT:    flt.s a3, a1, s0
+; RV32IZHINX-NEXT:    beqz a3, .LBB9_6
+; RV32IZHINX-NEXT:  # %bb.5:
+; RV32IZHINX-NEXT:    addi a2, a4, -1
+; RV32IZHINX-NEXT:  .LBB9_6:
+; RV32IZHINX-NEXT:    feq.s a1, s0, s0
+; RV32IZHINX-NEXT:    neg a4, a1
+; RV32IZHINX-NEXT:    and a1, a4, a2
+; RV32IZHINX-NEXT:    neg a2, s1
 ; RV32IZHINX-NEXT:    and a0, a2, a0
+; RV32IZHINX-NEXT:    neg a2, a3
+; RV32IZHINX-NEXT:    or a0, a2, a0
+; RV32IZHINX-NEXT:    and a0, a4, a0
 ; RV32IZHINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    addi sp, sp, 16
-; RV32IZHINX-NEXT:    beqz a4, .LBB9_6
-; RV32IZHINX-NEXT:  # %bb.5:
-; RV32IZHINX-NEXT:    addi a3, a5, -1
-; RV32IZHINX-NEXT:  .LBB9_6:
-; RV32IZHINX-NEXT:    and a1, a2, a3
 ; RV32IZHINX-NEXT:    ret
 ;
 ; RV64IZHINX-LABEL: test_trunc_si64:
@@ -1733,39 +1723,37 @@ define i64 @test_trunc_si64(half %x) nounwind {
 ; RV32IZHINXMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZHINXMIN-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    fcvt.h.s a0, a0
 ; RV32IZHINXMIN-NEXT:    fcvt.s.h s0, a0
 ; RV32IZHINXMIN-NEXT:    lui a0, 913408
 ; RV32IZHINXMIN-NEXT:    fle.s s1, a0, s0
-; RV32IZHINXMIN-NEXT:    neg s2, s1
 ; RV32IZHINXMIN-NEXT:    mv a0, s0
 ; RV32IZHINXMIN-NEXT:    call __fixsfdi
-; RV32IZHINXMIN-NEXT:    and a0, s2, a0
-; RV32IZHINXMIN-NEXT:    lui a2, 389120
-; RV32IZHINXMIN-NEXT:    addi a2, a2, -1
-; RV32IZHINXMIN-NEXT:    flt.s a4, a2, s0
-; RV32IZHINXMIN-NEXT:    neg a2, a4
-; RV32IZHINXMIN-NEXT:    or a0, a2, a0
-; RV32IZHINXMIN-NEXT:    feq.s a2, s0, s0
-; RV32IZHINXMIN-NEXT:    neg a2, a2
-; RV32IZHINXMIN-NEXT:    lui a5, 524288
-; RV32IZHINXMIN-NEXT:    lui a3, 524288
+; RV32IZHINXMIN-NEXT:    lui a4, 524288
+; RV32IZHINXMIN-NEXT:    lui a2, 524288
 ; RV32IZHINXMIN-NEXT:    beqz s1, .LBB9_4
 ; RV32IZHINXMIN-NEXT:  # %bb.3:
-; RV32IZHINXMIN-NEXT:    mv a3, a1
+; RV32IZHINXMIN-NEXT:    mv a2, a1
 ; RV32IZHINXMIN-NEXT:  .LBB9_4:
+; RV32IZHINXMIN-NEXT:    lui a1, 389120
+; RV32IZHINXMIN-NEXT:    addi a1, a1, -1
+; RV32IZHINXMIN-NEXT:    flt.s a3, a1, s0
+; RV32IZHINXMIN-NEXT:    beqz a3, .LBB9_6
+; RV32IZHINXMIN-NEXT:  # %bb.5:
+; RV32IZHINXMIN-NEXT:    addi a2, a4, -1
+; RV32IZHINXMIN-NEXT:  .LBB9_6:
+; RV32IZHINXMIN-NEXT:    feq.s a1, s0, s0
+; RV32IZHINXMIN-NEXT:    neg a4, a1
+; RV32IZHINXMIN-NEXT:    and a1, a4, a2
+; RV32IZHINXMIN-NEXT:    neg a2, s1
 ; RV32IZHINXMIN-NEXT:    and a0, a2, a0
+; RV32IZHINXMIN-NEXT:    neg a2, a3
+; RV32IZHINXMIN-NEXT:    or a0, a2, a0
+; RV32IZHINXMIN-NEXT:    and a0, a4, a0
 ; RV32IZHINXMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZHINXMIN-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    addi sp, sp, 16
-; RV32IZHINXMIN-NEXT:    beqz a4, .LBB9_6
-; RV32IZHINXMIN-NEXT:  # %bb.5:
-; RV32IZHINXMIN-NEXT:    addi a3, a5, -1
-; RV32IZHINXMIN-NEXT:  .LBB9_6:
-; RV32IZHINXMIN-NEXT:    and a1, a2, a3
 ; RV32IZHINXMIN-NEXT:    ret
 ;
 ; RV64IZHINXMIN-LABEL: test_trunc_si64:
@@ -2303,38 +2291,36 @@ define i64 @test_round_si64(half %x) nounwind {
 ; RV32IZHINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    fcvt.s.h s0, a0
 ; RV32IZHINX-NEXT:    lui a0, 913408
 ; RV32IZHINX-NEXT:    fle.s s1, a0, s0
-; RV32IZHINX-NEXT:    neg s2, s1
 ; RV32IZHINX-NEXT:    mv a0, s0
 ; RV32IZHINX-NEXT:    call __fixsfdi
-; RV32IZHINX-NEXT:    and a0, s2, a0
-; RV32IZHINX-NEXT:    lui a2, 389120
-; RV32IZHINX-NEXT:    addi a2, a2, -1
-; RV32IZHINX-NEXT:    flt.s a4, a2, s0
-; RV32IZHINX-NEXT:    neg a2, a4
-; RV32IZHINX-NEXT:    or a0, a2, a0
-; RV32IZHINX-NEXT:    feq.s a2, s0, s0
-; RV32IZHINX-NEXT:    neg a2, a2
-; RV32IZHINX-NEXT:    lui a5, 524288
-; RV32IZHINX-NEXT:    lui a3, 524288
+; RV32IZHINX-NEXT:    lui a4, 524288
+; RV32IZHINX-NEXT:    lui a2, 524288
 ; RV32IZHINX-NEXT:    beqz s1, .LBB13_4
 ; RV32IZHINX-NEXT:  # %bb.3:
-; RV32IZHINX-NEXT:    mv a3, a1
+; RV32IZHINX-NEXT:    mv a2, a1
 ; RV32IZHINX-NEXT:  .LBB13_4:
+; RV32IZHINX-NEXT:    lui a1, 389120
+; RV32IZHINX-NEXT:    addi a1, a1, -1
+; RV32IZHINX-NEXT:    flt.s a3, a1, s0
+; RV32IZHINX-NEXT:    beqz a3, .LBB13_6
+; RV32IZHINX-NEXT:  # %bb.5:
+; RV32IZHINX-NEXT:    addi a2, a4, -1
+; RV32IZHINX-NEXT:  .LBB13_6:
+; RV32IZHINX-NEXT:    feq.s a1, s0, s0
+; RV32IZHINX-NEXT:    neg a4, a1
+; RV32IZHINX-NEXT:    and a1, a4, a2
+; RV32IZHINX-NEXT:    neg a2, s1
 ; RV32IZHINX-NEXT:    and a0, a2, a0
+; RV32IZHINX-NEXT:    neg a2, a3
+; RV32IZHINX-NEXT:    or a0, a2, a0
+; RV32IZHINX-NEXT:    and a0, a4, a0
 ; RV32IZHINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    addi sp, sp, 16
-; RV32IZHINX-NEXT:    beqz a4, .LBB13_6
-; RV32IZHINX-NEXT:  # %bb.5:
-; RV32IZHINX-NEXT:    addi a3, a5, -1
-; RV32IZHINX-NEXT:  .LBB13_6:
-; RV32IZHINX-NEXT:    and a1, a2, a3
 ; RV32IZHINX-NEXT:    ret
 ;
 ; RV64IZHINX-LABEL: test_round_si64:
@@ -2445,39 +2431,37 @@ define i64 @test_round_si64(half %x) nounwind {
 ; RV32IZHINXMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZHINXMIN-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    fcvt.h.s a0, a0
 ; RV32IZHINXMIN-NEXT:    fcvt.s.h s0, a0
 ; RV32IZHINXMIN-NEXT:    lui a0, 913408
 ; RV32IZHINXMIN-NEXT:    fle.s s1, a0, s0
-; RV32IZHINXMIN-NEXT:    neg s2, s1
 ; RV32IZHINXMIN-NEXT:    mv a0, s0
 ; RV32IZHINXMIN-NEXT:    call __fixsfdi
-; RV32IZHINXMIN-NEXT:    and a0, s2, a0
-; RV32IZHINXMIN-NEXT:    lui a2, 389120
-; RV32IZHINXMIN-NEXT:    addi a2, a2, -1
-; RV32IZHINXMIN-NEXT:    flt.s a4, a2, s0
-; RV32IZHINXMIN-NEXT:    neg a2, a4
-; RV32IZHINXMIN-NEXT:    or a0, a2, a0
-; RV32IZHINXMIN-NEXT:    feq.s a2, s0, s0
-; RV32IZHINXMIN-NEXT:    neg a2, a2
-; RV32IZHINXMIN-NEXT:    lui a5, 524288
-; RV32IZHINXMIN-NEXT:    lui a3, 524288
+; RV32IZHINXMIN-NEXT:    lui a4, 524288
+; RV32IZHINXMIN-NEXT:    lui a2, 524288
 ; RV32IZHINXMIN-NEXT:    beqz s1, .LBB13_4
 ; RV32IZHINXMIN-NEXT:  # %bb.3:
-; RV32IZHINXMIN-NEXT:    mv a3, a1
+; RV32IZHINXMIN-NEXT:    mv a2, a1
 ; RV32IZHINXMIN-NEXT:  .LBB13_4:
+; RV32IZHINXMIN-NEXT:    lui a1, 389120
+; RV32IZHINXMIN-NEXT:    addi a1, a1, -1
+; RV32IZHINXMIN-NEXT:    flt.s a3, a1, s0
+; RV32IZHINXMIN-NEXT:    beqz a3, .LBB13_6
+; RV32IZHINXMIN-NEXT:  # %bb.5:
+; RV32IZHINXMIN-NEXT:    addi a2, a4, -1
+; RV32IZHINXMIN-NEXT:  .LBB13_6:
+; RV32IZHINXMIN-NEXT:    feq.s a1, s0, s0
+; RV32IZHINXMIN-NEXT:    neg a4, a1
+; RV32IZHINXMIN-NEXT:    and a1, a4, a2
+; RV32IZHINXMIN-NEXT:    neg a2, s1
 ; RV32IZHINXMIN-NEXT:    and a0, a2, a0
+; RV32IZHINXMIN-NEXT:    neg a2, a3
+; RV32IZHINXMIN-NEXT:    or a0, a2, a0
+; RV32IZHINXMIN-NEXT:    and a0, a4, a0
 ; RV32IZHINXMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZHINXMIN-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    addi sp, sp, 16
-; RV32IZHINXMIN-NEXT:    beqz a4, .LBB13_6
-; RV32IZHINXMIN-NEXT:  # %bb.5:
-; RV32IZHINXMIN-NEXT:    addi a3, a5, -1
-; RV32IZHINXMIN-NEXT:  .LBB13_6:
-; RV32IZHINXMIN-NEXT:    and a1, a2, a3
 ; RV32IZHINXMIN-NEXT:    ret
 ;
 ; RV64IZHINXMIN-LABEL: test_round_si64:
@@ -3015,38 +2999,36 @@ define i64 @test_roundeven_si64(half %x) nounwind {
 ; RV32IZHINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    fcvt.s.h s0, a0
 ; RV32IZHINX-NEXT:    lui a0, 913408
 ; RV32IZHINX-NEXT:    fle.s s1, a0, s0
-; RV32IZHINX-NEXT:    neg s2, s1
 ; RV32IZHINX-NEXT:    mv a0, s0
 ; RV32IZHINX-NEXT:    call __fixsfdi
-; RV32IZHINX-NEXT:    and a0, s2, a0
-; RV32IZHINX-NEXT:    lui a2, 389120
-; RV32IZHINX-NEXT:    addi a2, a2, -1
-; RV32IZHINX-NEXT:    flt.s a4, a2, s0
-; RV32IZHINX-NEXT:    neg a2, a4
-; RV32IZHINX-NEXT:    or a0, a2, a0
-; RV32IZHINX-NEXT:    feq.s a2, s0, s0
-; RV32IZHINX-NEXT:    neg a2, a2
-; RV32IZHINX-NEXT:    lui a5, 524288
-; RV32IZHINX-NEXT:    lui a3, 524288
+; RV32IZHINX-NEXT:    lui a4, 524288
+; RV32IZHINX-NEXT:    lui a2, 524288
 ; RV32IZHINX-NEXT:    beqz s1, .LBB17_4
 ; RV32IZHINX-NEXT:  # %bb.3:
-; RV32IZHINX-NEXT:    mv a3, a1
+; RV32IZHINX-NEXT:    mv a2, a1
 ; RV32IZHINX-NEXT:  .LBB17_4:
+; RV32IZHINX-NEXT:    lui a1, 389120
+; RV32IZHINX-NEXT:    addi a1, a1, -1
+; RV32IZHINX-NEXT:    flt.s a3, a1, s0
+; RV32IZHINX-NEXT:    beqz a3, .LBB17_6
+; RV32IZHINX-NEXT:  # %bb.5:
+; RV32IZHINX-NEXT:    addi a2, a4, -1
+; RV32IZHINX-NEXT:  .LBB17_6:
+; RV32IZHINX-NEXT:    feq.s a1, s0, s0
+; RV32IZHINX-NEXT:    neg a4, a1
+; RV32IZHINX-NEXT:    and a1, a4, a2
+; RV32IZHINX-NEXT:    neg a2, s1
 ; RV32IZHINX-NEXT:    and a0, a2, a0
+; RV32IZHINX-NEXT:    neg a2, a3
+; RV32IZHINX-NEXT:    or a0, a2, a0
+; RV32IZHINX-NEXT:    and a0, a4, a0
 ; RV32IZHINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    addi sp, sp, 16
-; RV32IZHINX-NEXT:    beqz a4, .LBB17_6
-; RV32IZHINX-NEXT:  # %bb.5:
-; RV32IZHINX-NEXT:    addi a3, a5, -1
-; RV32IZHINX-NEXT:  .LBB17_6:
-; RV32IZHINX-NEXT:    and a1, a2, a3
 ; RV32IZHINX-NEXT:    ret
 ;
 ; RV64IZHINX-LABEL: test_roundeven_si64:
@@ -3157,39 +3139,37 @@ define i64 @test_roundeven_si64(half %x) nounwind {
 ; RV32IZHINXMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZHINXMIN-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    fcvt.h.s a0, a0
 ; RV32IZHINXMIN-NEXT:    fcvt.s.h s0, a0
 ; RV32IZHINXMIN-NEXT:    lui a0, 913408
 ; RV32IZHINXMIN-NEXT:    fle.s s1, a0, s0
-; RV32IZHINXMIN-NEXT:    neg s2, s1
 ; RV32IZHINXMIN-NEXT:    mv a0, s0
 ; RV32IZHINXMIN-NEXT:    call __fixsfdi
-; RV32IZHINXMIN-NEXT:    and a0, s2, a0
-; RV32IZHINXMIN-NEXT:    lui a2, 389120
-; RV32IZHINXMIN-NEXT:    addi a2, a2, -1
-; RV32IZHINXMIN-NEXT:    flt.s a4, a2, s0
-; RV32IZHINXMIN-NEXT:    neg a2, a4
-; RV32IZHINXMIN-NEXT:    or a0, a2, a0
-; RV32IZHINXMIN-NEXT:    feq.s a2, s0, s0
-; RV32IZHINXMIN-NEXT:    neg a2, a2
-; RV32IZHINXMIN-NEXT:    lui a5, 524288
-; RV32IZHINXMIN-NEXT:    lui a3, 524288
+; RV32IZHINXMIN-NEXT:    lui a4, 524288
+; RV32IZHINXMIN-NEXT:    lui a2, 524288
 ; RV32IZHINXMIN-NEXT:    beqz s1, .LBB17_4
 ; RV32IZHINXMIN-NEXT:  # %bb.3:
-; RV32IZHINXMIN-NEXT:    mv a3, a1
+; RV32IZHINXMIN-NEXT:    mv a2, a1
 ; RV32IZHINXMIN-NEXT:  .LBB17_4:
+; RV32IZHINXMIN-NEXT:    lui a1, 389120
+; RV32IZHINXMIN-NEXT:    addi a1, a1, -1
+; RV32IZHINXMIN-NEXT:    flt.s a3, a1, s0
+; RV32IZHINXMIN-NEXT:    beqz a3, .LBB17_6
+; RV32IZHINXMIN-NEXT:  # %bb.5:
+; RV32IZHINXMIN-NEXT:    addi a2, a4, -1
+; RV32IZHINXMIN-NEXT:  .LBB17_6:
+; RV32IZHINXMIN-NEXT:    feq.s a1, s0, s0
+; RV32IZHINXMIN-NEXT:    neg a4, a1
+; RV32IZHINXMIN-NEXT:    and a1, a4, a2
+; RV32IZHINXMIN-NEXT:    neg a2, s1
 ; RV32IZHINXMIN-NEXT:    and a0, a2, a0
+; RV32IZHINXMIN-NEXT:    neg a2, a3
+; RV32IZHINXMIN-NEXT:    or a0, a2, a0
+; RV32IZHINXMIN-NEXT:    and a0, a4, a0
 ; RV32IZHINXMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZHINXMIN-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    addi sp, sp, 16
-; RV32IZHINXMIN-NEXT:    beqz a4, .LBB17_6
-; RV32IZHINXMIN-NEXT:  # %bb.5:
-; RV32IZHINXMIN-NEXT:    addi a3, a5, -1
-; RV32IZHINXMIN-NEXT:  .LBB17_6:
-; RV32IZHINXMIN-NEXT:    and a1, a2, a3
 ; RV32IZHINXMIN-NEXT:    ret
 ;
 ; RV64IZHINXMIN-LABEL: test_roundeven_si64:
@@ -3727,38 +3707,36 @@ define i64 @test_rint_si64(half %x) nounwind {
 ; RV32IZHINX-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZHINX-NEXT:    fcvt.s.h s0, a0
 ; RV32IZHINX-NEXT:    lui a0, 913408
 ; RV32IZHINX-NEXT:    fle.s s1, a0, s0
-; RV32IZHINX-NEXT:    neg s2, s1
 ; RV32IZHINX-NEXT:    mv a0, s0
 ; RV32IZHINX-NEXT:    call __fixsfdi
-; RV32IZHINX-NEXT:    and a0, s2, a0
-; RV32IZHINX-NEXT:    lui a2, 389120
-; RV32IZHINX-NEXT:    addi a2, a2, -1
-; RV32IZHINX-NEXT:    flt.s a4, a2, s0
-; RV32IZHINX-NEXT:    neg a2, a4
-; RV32IZHINX-NEXT:    or a0, a2, a0
-; RV32IZHINX-NEXT:    feq.s a2, s0, s0
-; RV32IZHINX-NEXT:    neg a2, a2
-; RV32IZHINX-NEXT:    lui a5, 524288
-; RV32IZHINX-NEXT:    lui a3, 524288
+; RV32IZHINX-NEXT:    lui a4, 524288
+; RV32IZHINX-NEXT:    lui a2, 524288
 ; RV32IZHINX-NEXT:    beqz s1, .LBB21_4
 ; RV32IZHINX-NEXT:  # %bb.3:
-; RV32IZHINX-NEXT:    mv a3, a1
+; RV32IZHINX-NEXT:    mv a2, a1
 ; RV32IZHINX-NEXT:  .LBB21_4:
+; RV32IZHINX-NEXT:    lui a1, 389120
+; RV32IZHINX-NEXT:    addi a1, a1, -1
+; RV32IZHINX-NEXT:    flt.s a3, a1, s0
+; RV32IZHINX-NEXT:    beqz a3, .LBB21_6
+; RV32IZHINX-NEXT:  # %bb.5:
+; RV32IZHINX-NEXT:    addi a2, a4, -1
+; RV32IZHINX-NEXT:  .LBB21_6:
+; RV32IZHINX-NEXT:    feq.s a1, s0, s0
+; RV32IZHINX-NEXT:    neg a4, a1
+; RV32IZHINX-NEXT:    and a1, a4, a2
+; RV32IZHINX-NEXT:    neg a2, s1
 ; RV32IZHINX-NEXT:    and a0, a2, a0
+; RV32IZHINX-NEXT:    neg a2, a3
+; RV32IZHINX-NEXT:    or a0, a2, a0
+; RV32IZHINX-NEXT:    and a0, a4, a0
 ; RV32IZHINX-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZHINX-NEXT:    addi sp, sp, 16
-; RV32IZHINX-NEXT:    beqz a4, .LBB21_6
-; RV32IZHINX-NEXT:  # %bb.5:
-; RV32IZHINX-NEXT:    addi a3, a5, -1
-; RV32IZHINX-NEXT:  .LBB21_6:
-; RV32IZHINX-NEXT:    and a1, a2, a3
 ; RV32IZHINX-NEXT:    ret
 ;
 ; RV64IZHINX-LABEL: test_rint_si64:
@@ -3869,39 +3847,37 @@ define i64 @test_rint_si64(half %x) nounwind {
 ; RV32IZHINXMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZHINXMIN-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32IZHINXMIN-NEXT:    fcvt.h.s a0, a0
 ; RV32IZHINXMIN-NEXT:    fcvt.s.h s0, a0
 ; RV32IZHINXMIN-NEXT:    lui a0, 913408
 ; RV32IZHINXMIN-NEXT:    fle.s s1, a0, s0
-; RV32IZHINXMIN-NEXT:    neg s2, s1
 ; RV32IZHINXMIN-NEXT:    mv a0, s0
 ; RV32IZHINXMIN-NEXT:    call __fixsfdi
-; RV32IZHINXMIN-NEXT:    and a0, s2, a0
-; RV32IZHINXMIN-NEXT:    lui a2, 389120
-; RV32IZHINXMIN-NEXT:    addi a2, a2, -1
-; RV32IZHINXMIN-NEXT:    flt.s a4, a2, s0
-; RV32IZHINXMIN-NEXT:    neg a2, a4
-; RV32IZHINXMIN-NEXT:    or a0, a2, a0
-; RV32IZHINXMIN-NEXT:    feq.s a2, s0, s0
-; RV32IZHINXMIN-NEXT:    neg a2, a2
-; RV32IZHINXMIN-NEXT:    lui a5, 524288
-; RV32IZHINXMIN-NEXT:    lui a3, 524288
+; RV32IZHINXMIN-NEXT:    lui a4, 524288
+; RV32IZHINXMIN-NEXT:    lui a2, 524288
 ; RV32IZHINXMIN-NEXT:    beqz s1, .LBB21_4
 ; RV32IZHINXMIN-NEXT:  # %bb.3:
-; RV32IZHINXMIN-NEXT:    mv a3, a1
+; RV32IZHINXMIN-NEXT:    mv a2, a1
 ; RV32IZHINXMIN-NEXT:  .LBB21_4:
+; RV32IZHINXMIN-NEXT:    lui a1, 389120
+; RV32IZHINXMIN-NEXT:    addi a1, a1, -1
+; RV32IZHINXMIN-NEXT:    flt.s a3, a1, s0
+; RV32IZHINXMIN-NEXT:    beqz a3, .LBB21_6
+; RV32IZHINXMIN-NEXT:  # %bb.5:
+; RV32IZHINXMIN-NEXT:    addi a2, a4, -1
+; RV32IZHINXMIN-NEXT:  .LBB21_6:
+; RV32IZHINXMIN-NEXT:    feq.s a1, s0, s0
+; RV32IZHINXMIN-NEXT:    neg a4, a1
+; RV32IZHINXMIN-NEXT:    and a1, a4, a2
+; RV32IZHINXMIN-NEXT:    neg a2, s1
 ; RV32IZHINXMIN-NEXT:    and a0, a2, a0
+; RV32IZHINXMIN-NEXT:    neg a2, a3
+; RV32IZHINXMIN-NEXT:    or a0, a2, a0
+; RV32IZHINXMIN-NEXT:    and a0, a4, a0
 ; RV32IZHINXMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IZHINXMIN-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32IZHINXMIN-NEXT:    addi sp, sp, 16
-; RV32IZHINXMIN-NEXT:    beqz a4, .LBB21_6
-; RV32IZHINXMIN-NEXT:  # %bb.5:
-; RV32IZHINXMIN-NEXT:    addi a3, a5, -1
-; RV32IZHINXMIN-NEXT:  .LBB21_6:
-; RV32IZHINXMIN-NEXT:    and a1, a2, a3
 ; RV32IZHINXMIN-NEXT:    ret
 ;
 ; RV64IZHINXMIN-LABEL: test_rint_si64:

>From e41d70d7e890c889922cafad2e4746cf3ea78c2e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 5 Sep 2024 17:25:28 -0700
Subject: [PATCH 2/3] fixup! Update CC_RISCV_FastCC too.

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |   2 -
 .../CodeGen/RISCV/fastcc-without-f-reg.ll     | 372 ++++++++++--------
 2 files changed, 198 insertions(+), 176 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ad8a5b163ec315..33313799752802 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -19706,8 +19706,6 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
             CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
         return false;
       }
-      LocVT = Subtarget.getXLenVT();
-      LocInfo = CCValAssign::BCvt;
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
     }
diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
index 2e7ca025314705..8e2fdfc4ba94c3 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
@@ -832,28 +832,32 @@ define fastcc float @callee_float_32(<32 x float> %A) nounwind {
 define float @caller_float_32(<32 x float> %A) nounwind {
 ; ZHINX32-LABEL: caller_float_32:
 ; ZHINX32:       # %bb.0:
-; ZHINX32-NEXT:    addi sp, sp, -144
-; ZHINX32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    addi sp, sp, -160
+; ZHINX32-NEXT:    sw ra, 156(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s0, 152(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s1, 148(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s2, 144(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s3, 140(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s4, 136(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s5, 132(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s6, 128(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s7, 124(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s8, 120(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s9, 116(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s10, 112(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s11, 108(sp) # 4-byte Folded Spill
 ; ZHINX32-NEXT:    lw t0, 160(sp)
-; ZHINX32-NEXT:    lw t1, 164(sp)
-; ZHINX32-NEXT:    lw t2, 168(sp)
-; ZHINX32-NEXT:    lw s0, 172(sp)
-; ZHINX32-NEXT:    lw t3, 176(sp)
-; ZHINX32-NEXT:    lw t4, 180(sp)
-; ZHINX32-NEXT:    lw t5, 184(sp)
-; ZHINX32-NEXT:    lw t6, 188(sp)
+; ZHINX32-NEXT:    sw t0, 104(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    lw t0, 164(sp)
+; ZHINX32-NEXT:    sw t0, 100(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    lw t0, 168(sp)
+; ZHINX32-NEXT:    sw t0, 96(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    lw t0, 172(sp)
+; ZHINX32-NEXT:    sw t0, 92(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    lw t6, 176(sp)
+; ZHINX32-NEXT:    lw t5, 180(sp)
+; ZHINX32-NEXT:    lw t4, 184(sp)
+; ZHINX32-NEXT:    lw s0, 188(sp)
 ; ZHINX32-NEXT:    lw s1, 192(sp)
 ; ZHINX32-NEXT:    lw s2, 196(sp)
 ; ZHINX32-NEXT:    lw s3, 200(sp)
@@ -866,45 +870,49 @@ define float @caller_float_32(<32 x float> %A) nounwind {
 ; ZHINX32-NEXT:    lw s10, 228(sp)
 ; ZHINX32-NEXT:    lw s11, 232(sp)
 ; ZHINX32-NEXT:    lw ra, 236(sp)
-; ZHINX32-NEXT:    sw ra, 76(sp)
-; ZHINX32-NEXT:    sw s11, 72(sp)
-; ZHINX32-NEXT:    sw s10, 68(sp)
-; ZHINX32-NEXT:    sw s9, 64(sp)
-; ZHINX32-NEXT:    sw s8, 60(sp)
-; ZHINX32-NEXT:    sw s7, 56(sp)
-; ZHINX32-NEXT:    sw s6, 52(sp)
-; ZHINX32-NEXT:    sw s5, 48(sp)
-; ZHINX32-NEXT:    sw s4, 44(sp)
-; ZHINX32-NEXT:    sw s3, 40(sp)
-; ZHINX32-NEXT:    sw s2, 36(sp)
-; ZHINX32-NEXT:    sw s1, 32(sp)
-; ZHINX32-NEXT:    sw t6, 28(sp)
-; ZHINX32-NEXT:    sw t5, 24(sp)
-; ZHINX32-NEXT:    sw t4, 20(sp)
-; ZHINX32-NEXT:    sw t3, 16(sp)
-; ZHINX32-NEXT:    lw t3, 144(sp)
-; ZHINX32-NEXT:    lw t4, 148(sp)
-; ZHINX32-NEXT:    lw t5, 152(sp)
-; ZHINX32-NEXT:    lw t6, 156(sp)
+; ZHINX32-NEXT:    lw t3, 240(sp)
+; ZHINX32-NEXT:    lw t2, 244(sp)
+; ZHINX32-NEXT:    lw t1, 248(sp)
+; ZHINX32-NEXT:    lw t0, 252(sp)
+; ZHINX32-NEXT:    sw t0, 76(sp)
+; ZHINX32-NEXT:    sw t1, 72(sp)
+; ZHINX32-NEXT:    sw t2, 68(sp)
+; ZHINX32-NEXT:    sw t3, 64(sp)
+; ZHINX32-NEXT:    sw ra, 60(sp)
+; ZHINX32-NEXT:    sw s11, 56(sp)
+; ZHINX32-NEXT:    sw s10, 52(sp)
+; ZHINX32-NEXT:    sw s9, 48(sp)
+; ZHINX32-NEXT:    sw s8, 44(sp)
+; ZHINX32-NEXT:    sw s7, 40(sp)
+; ZHINX32-NEXT:    sw s6, 36(sp)
+; ZHINX32-NEXT:    sw s5, 32(sp)
+; ZHINX32-NEXT:    sw s4, 28(sp)
+; ZHINX32-NEXT:    sw s3, 24(sp)
+; ZHINX32-NEXT:    sw s2, 20(sp)
+; ZHINX32-NEXT:    sw s1, 16(sp)
 ; ZHINX32-NEXT:    sw s0, 12(sp)
-; ZHINX32-NEXT:    sw t2, 8(sp)
-; ZHINX32-NEXT:    sw t1, 4(sp)
-; ZHINX32-NEXT:    sw t0, 0(sp)
+; ZHINX32-NEXT:    sw t4, 8(sp)
+; ZHINX32-NEXT:    sw t5, 4(sp)
+; ZHINX32-NEXT:    sw t6, 0(sp)
+; ZHINX32-NEXT:    lw t3, 104(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw t4, 100(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw t5, 96(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw t6, 92(sp) # 4-byte Folded Reload
 ; ZHINX32-NEXT:    call callee_float_32
-; ZHINX32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    addi sp, sp, 144
+; ZHINX32-NEXT:    lw ra, 156(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s0, 152(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s1, 148(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s2, 144(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s3, 140(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s4, 136(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s5, 132(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s6, 128(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s7, 124(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s8, 120(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s9, 116(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s10, 112(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s11, 108(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    addi sp, sp, 160
 ; ZHINX32-NEXT:    ret
 ;
 ; ZHINX64-LABEL: caller_float_32:
@@ -994,28 +1002,32 @@ define float @caller_float_32(<32 x float> %A) nounwind {
 ;
 ; ZFINX32-LABEL: caller_float_32:
 ; ZFINX32:       # %bb.0:
-; ZFINX32-NEXT:    addi sp, sp, -144
-; ZFINX32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    addi sp, sp, -160
+; ZFINX32-NEXT:    sw ra, 156(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s0, 152(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s1, 148(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s2, 144(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s3, 140(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s4, 136(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s5, 132(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s6, 128(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s7, 124(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s8, 120(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s9, 116(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s10, 112(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s11, 108(sp) # 4-byte Folded Spill
 ; ZFINX32-NEXT:    lw t0, 160(sp)
-; ZFINX32-NEXT:    lw t1, 164(sp)
-; ZFINX32-NEXT:    lw t2, 168(sp)
-; ZFINX32-NEXT:    lw s0, 172(sp)
-; ZFINX32-NEXT:    lw t3, 176(sp)
-; ZFINX32-NEXT:    lw t4, 180(sp)
-; ZFINX32-NEXT:    lw t5, 184(sp)
-; ZFINX32-NEXT:    lw t6, 188(sp)
+; ZFINX32-NEXT:    sw t0, 104(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw t0, 164(sp)
+; ZFINX32-NEXT:    sw t0, 100(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw t0, 168(sp)
+; ZFINX32-NEXT:    sw t0, 96(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw t0, 172(sp)
+; ZFINX32-NEXT:    sw t0, 92(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw t6, 176(sp)
+; ZFINX32-NEXT:    lw t5, 180(sp)
+; ZFINX32-NEXT:    lw t4, 184(sp)
+; ZFINX32-NEXT:    lw s0, 188(sp)
 ; ZFINX32-NEXT:    lw s1, 192(sp)
 ; ZFINX32-NEXT:    lw s2, 196(sp)
 ; ZFINX32-NEXT:    lw s3, 200(sp)
@@ -1028,45 +1040,49 @@ define float @caller_float_32(<32 x float> %A) nounwind {
 ; ZFINX32-NEXT:    lw s10, 228(sp)
 ; ZFINX32-NEXT:    lw s11, 232(sp)
 ; ZFINX32-NEXT:    lw ra, 236(sp)
-; ZFINX32-NEXT:    sw ra, 76(sp)
-; ZFINX32-NEXT:    sw s11, 72(sp)
-; ZFINX32-NEXT:    sw s10, 68(sp)
-; ZFINX32-NEXT:    sw s9, 64(sp)
-; ZFINX32-NEXT:    sw s8, 60(sp)
-; ZFINX32-NEXT:    sw s7, 56(sp)
-; ZFINX32-NEXT:    sw s6, 52(sp)
-; ZFINX32-NEXT:    sw s5, 48(sp)
-; ZFINX32-NEXT:    sw s4, 44(sp)
-; ZFINX32-NEXT:    sw s3, 40(sp)
-; ZFINX32-NEXT:    sw s2, 36(sp)
-; ZFINX32-NEXT:    sw s1, 32(sp)
-; ZFINX32-NEXT:    sw t6, 28(sp)
-; ZFINX32-NEXT:    sw t5, 24(sp)
-; ZFINX32-NEXT:    sw t4, 20(sp)
-; ZFINX32-NEXT:    sw t3, 16(sp)
-; ZFINX32-NEXT:    lw t3, 144(sp)
-; ZFINX32-NEXT:    lw t4, 148(sp)
-; ZFINX32-NEXT:    lw t5, 152(sp)
-; ZFINX32-NEXT:    lw t6, 156(sp)
+; ZFINX32-NEXT:    lw t3, 240(sp)
+; ZFINX32-NEXT:    lw t2, 244(sp)
+; ZFINX32-NEXT:    lw t1, 248(sp)
+; ZFINX32-NEXT:    lw t0, 252(sp)
+; ZFINX32-NEXT:    sw t0, 76(sp)
+; ZFINX32-NEXT:    sw t1, 72(sp)
+; ZFINX32-NEXT:    sw t2, 68(sp)
+; ZFINX32-NEXT:    sw t3, 64(sp)
+; ZFINX32-NEXT:    sw ra, 60(sp)
+; ZFINX32-NEXT:    sw s11, 56(sp)
+; ZFINX32-NEXT:    sw s10, 52(sp)
+; ZFINX32-NEXT:    sw s9, 48(sp)
+; ZFINX32-NEXT:    sw s8, 44(sp)
+; ZFINX32-NEXT:    sw s7, 40(sp)
+; ZFINX32-NEXT:    sw s6, 36(sp)
+; ZFINX32-NEXT:    sw s5, 32(sp)
+; ZFINX32-NEXT:    sw s4, 28(sp)
+; ZFINX32-NEXT:    sw s3, 24(sp)
+; ZFINX32-NEXT:    sw s2, 20(sp)
+; ZFINX32-NEXT:    sw s1, 16(sp)
 ; ZFINX32-NEXT:    sw s0, 12(sp)
-; ZFINX32-NEXT:    sw t2, 8(sp)
-; ZFINX32-NEXT:    sw t1, 4(sp)
-; ZFINX32-NEXT:    sw t0, 0(sp)
+; ZFINX32-NEXT:    sw t4, 8(sp)
+; ZFINX32-NEXT:    sw t5, 4(sp)
+; ZFINX32-NEXT:    sw t6, 0(sp)
+; ZFINX32-NEXT:    lw t3, 104(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw t4, 100(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw t5, 96(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw t6, 92(sp) # 4-byte Folded Reload
 ; ZFINX32-NEXT:    call callee_float_32
-; ZFINX32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    addi sp, sp, 144
+; ZFINX32-NEXT:    lw ra, 156(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s0, 152(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s1, 148(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s2, 144(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s3, 140(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s4, 136(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s5, 132(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s6, 128(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s7, 124(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s8, 120(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s9, 116(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s10, 112(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s11, 108(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    addi sp, sp, 160
 ; ZFINX32-NEXT:    ret
 ;
 ; ZFINX64-LABEL: caller_float_32:
@@ -1156,28 +1172,32 @@ define float @caller_float_32(<32 x float> %A) nounwind {
 ;
 ; ZDINX32-LABEL: caller_float_32:
 ; ZDINX32:       # %bb.0:
-; ZDINX32-NEXT:    addi sp, sp, -144
-; ZDINX32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    addi sp, sp, -160
+; ZDINX32-NEXT:    sw ra, 156(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s0, 152(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s1, 148(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s2, 144(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s3, 140(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s4, 136(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s5, 132(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s6, 128(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s7, 124(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s8, 120(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s9, 116(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s10, 112(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s11, 108(sp) # 4-byte Folded Spill
 ; ZDINX32-NEXT:    lw t0, 160(sp)
-; ZDINX32-NEXT:    lw t1, 164(sp)
-; ZDINX32-NEXT:    lw t2, 168(sp)
-; ZDINX32-NEXT:    lw s0, 172(sp)
-; ZDINX32-NEXT:    lw t3, 176(sp)
-; ZDINX32-NEXT:    lw t4, 180(sp)
-; ZDINX32-NEXT:    lw t5, 184(sp)
-; ZDINX32-NEXT:    lw t6, 188(sp)
+; ZDINX32-NEXT:    sw t0, 104(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw t0, 164(sp)
+; ZDINX32-NEXT:    sw t0, 100(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw t0, 168(sp)
+; ZDINX32-NEXT:    sw t0, 96(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw t0, 172(sp)
+; ZDINX32-NEXT:    sw t0, 92(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw t6, 176(sp)
+; ZDINX32-NEXT:    lw t5, 180(sp)
+; ZDINX32-NEXT:    lw t4, 184(sp)
+; ZDINX32-NEXT:    lw s0, 188(sp)
 ; ZDINX32-NEXT:    lw s1, 192(sp)
 ; ZDINX32-NEXT:    lw s2, 196(sp)
 ; ZDINX32-NEXT:    lw s3, 200(sp)
@@ -1190,45 +1210,49 @@ define float @caller_float_32(<32 x float> %A) nounwind {
 ; ZDINX32-NEXT:    lw s10, 228(sp)
 ; ZDINX32-NEXT:    lw s11, 232(sp)
 ; ZDINX32-NEXT:    lw ra, 236(sp)
-; ZDINX32-NEXT:    sw ra, 76(sp)
-; ZDINX32-NEXT:    sw s11, 72(sp)
-; ZDINX32-NEXT:    sw s10, 68(sp)
-; ZDINX32-NEXT:    sw s9, 64(sp)
-; ZDINX32-NEXT:    sw s8, 60(sp)
-; ZDINX32-NEXT:    sw s7, 56(sp)
-; ZDINX32-NEXT:    sw s6, 52(sp)
-; ZDINX32-NEXT:    sw s5, 48(sp)
-; ZDINX32-NEXT:    sw s4, 44(sp)
-; ZDINX32-NEXT:    sw s3, 40(sp)
-; ZDINX32-NEXT:    sw s2, 36(sp)
-; ZDINX32-NEXT:    sw s1, 32(sp)
-; ZDINX32-NEXT:    sw t6, 28(sp)
-; ZDINX32-NEXT:    sw t5, 24(sp)
-; ZDINX32-NEXT:    sw t4, 20(sp)
-; ZDINX32-NEXT:    sw t3, 16(sp)
-; ZDINX32-NEXT:    lw t3, 144(sp)
-; ZDINX32-NEXT:    lw t4, 148(sp)
-; ZDINX32-NEXT:    lw t5, 152(sp)
-; ZDINX32-NEXT:    lw t6, 156(sp)
+; ZDINX32-NEXT:    lw t3, 240(sp)
+; ZDINX32-NEXT:    lw t2, 244(sp)
+; ZDINX32-NEXT:    lw t1, 248(sp)
+; ZDINX32-NEXT:    lw t0, 252(sp)
+; ZDINX32-NEXT:    sw t0, 76(sp)
+; ZDINX32-NEXT:    sw t1, 72(sp)
+; ZDINX32-NEXT:    sw t2, 68(sp)
+; ZDINX32-NEXT:    sw t3, 64(sp)
+; ZDINX32-NEXT:    sw ra, 60(sp)
+; ZDINX32-NEXT:    sw s11, 56(sp)
+; ZDINX32-NEXT:    sw s10, 52(sp)
+; ZDINX32-NEXT:    sw s9, 48(sp)
+; ZDINX32-NEXT:    sw s8, 44(sp)
+; ZDINX32-NEXT:    sw s7, 40(sp)
+; ZDINX32-NEXT:    sw s6, 36(sp)
+; ZDINX32-NEXT:    sw s5, 32(sp)
+; ZDINX32-NEXT:    sw s4, 28(sp)
+; ZDINX32-NEXT:    sw s3, 24(sp)
+; ZDINX32-NEXT:    sw s2, 20(sp)
+; ZDINX32-NEXT:    sw s1, 16(sp)
 ; ZDINX32-NEXT:    sw s0, 12(sp)
-; ZDINX32-NEXT:    sw t2, 8(sp)
-; ZDINX32-NEXT:    sw t1, 4(sp)
-; ZDINX32-NEXT:    sw t0, 0(sp)
+; ZDINX32-NEXT:    sw t4, 8(sp)
+; ZDINX32-NEXT:    sw t5, 4(sp)
+; ZDINX32-NEXT:    sw t6, 0(sp)
+; ZDINX32-NEXT:    lw t3, 104(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw t4, 100(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw t5, 96(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw t6, 92(sp) # 4-byte Folded Reload
 ; ZDINX32-NEXT:    call callee_float_32
-; ZDINX32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    addi sp, sp, 144
+; ZDINX32-NEXT:    lw ra, 156(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s0, 152(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s1, 148(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s2, 144(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s3, 140(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s4, 136(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s5, 132(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s6, 128(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s7, 124(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s8, 120(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s9, 116(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s10, 112(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s11, 108(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    addi sp, sp, 160
 ; ZDINX32-NEXT:    ret
 ;
 ; ZDINX64-LABEL: caller_float_32:

>From 8d73b7d2482466f2e440aee44e82f78d9f56848c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Fri, 6 Sep 2024 14:41:20 -0700
Subject: [PATCH 3/3] fixup! Reuse Subtarget variable.

---
 llvm/lib/Target/RISCV/RISCVCallingConv.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
index 0c84e10cbb1cf0..48c2e526dd845e 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
@@ -312,10 +312,8 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT,
 
   ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI);
 
-  const RISCVSubtarget &STI =
-      State.getMachineFunction().getSubtarget<RISCVSubtarget>();
-  if ((ValVT == MVT::f32 && XLen == 32 && STI.hasStdExtZfinx()) ||
-      (ValVT == MVT::f64 && XLen == 64 && STI.hasStdExtZdinx())) {
+  if ((ValVT == MVT::f32 && XLen == 32 && Subtarget.hasStdExtZfinx()) ||
+      (ValVT == MVT::f64 && XLen == 64 && Subtarget.hasStdExtZdinx())) {
     if (MCRegister Reg = State.AllocateReg(ArgGPRs)) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;



More information about the llvm-commits mailing list