[llvm] [RISCV] Insert a freeze before converting select to AND/OR. (PR #84232)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 6 12:51:53 PST 2024
https://github.com/topperc created https://github.com/llvm/llvm-project/pull/84232
Select blocks poison, but AND/OR do not. We need to insert a freeze
to block poison propagation.
This creates suboptimal codegen which I will try to fix with other
patches. We should prioritize the correctness fix.
Fixes #84200.
>From 284b3ca8b5cfdd40575c9b3e92c11a5cf950c20e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 6 Mar 2024 12:46:21 -0800
Subject: [PATCH 1/2] [RISCV] Add test case for pr84200. NFC
---
llvm/test/CodeGen/RISCV/pr84200.ll | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/pr84200.ll
diff --git a/llvm/test/CodeGen/RISCV/pr84200.ll b/llvm/test/CodeGen/RISCV/pr84200.ll
new file mode 100644
index 00000000000000..a971527e9567fd
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/pr84200.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=riscv64 | FileCheck %s
+
+; The sub nuw produces poison if the input is not 0 or 1. We must insert a
+; freeze before converting the sub to AND so that we don't propagate poison.
+define i64 @foo(i64 %1) {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: sub a1, a1, a0
+; CHECK-NEXT: sltiu a0, a0, 2
+; CHECK-NEXT: not a0, a0
+; CHECK-NEXT: xori a1, a1, 1
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a1
+; CHECK-NEXT: ret
+entry:
+ %.urem.i = sub nuw i64 1, %1
+ %.cmp.i = icmp ugt i64 %1, 1
+ %2 = xor i64 %.urem.i, 1
+ %3 = select i1 %.cmp.i, i64 0, i64 %2
+ ret i64 %3
+}
>From fc7fcdb147f10f30414a2b757d4e3e663db9599b Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 6 Mar 2024 12:48:57 -0800
Subject: [PATCH 2/2] [RISCV] Insert a freeze before converting select to
AND/OR.
Select blocks poison, but AND/OR do not. We need to insert a freeze
to block poison propagation.
This creates suboptimal codegen which I will try to fix with other
patches. We should prioritize the correctness fix.
Fixes #84200.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 17 +-
llvm/test/CodeGen/RISCV/alu64.ll | 3 +-
.../CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll | 16 +-
llvm/test/CodeGen/RISCV/bfloat-convert.ll | 169 +-
llvm/test/CodeGen/RISCV/double-convert.ll | 137 +-
.../CodeGen/RISCV/double-round-conv-sat.ll | 804 +++++-----
llvm/test/CodeGen/RISCV/float-convert.ll | 236 +--
.../CodeGen/RISCV/float-round-conv-sat.ll | 636 ++++----
llvm/test/CodeGen/RISCV/forced-atomics.ll | 3 +-
llvm/test/CodeGen/RISCV/fpclamptosat.ll | 410 ++---
llvm/test/CodeGen/RISCV/half-convert.ll | 370 +++--
.../test/CodeGen/RISCV/half-round-conv-sat.ll | 1356 +++++++++--------
llvm/test/CodeGen/RISCV/iabs.ll | 88 +-
llvm/test/CodeGen/RISCV/pr84200.ll | 3 +-
llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll | 16 +-
llvm/test/CodeGen/RISCV/rv32zbs.ll | 40 +-
.../RISCV/rv64-legal-i32/rv64xtheadbb.ll | 16 +-
.../CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll | 13 +-
.../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 617 ++++----
.../CodeGen/RISCV/rvv/vec3-setcc-crash.ll | 26 +-
.../CodeGen/RISCV/signed-truncation-check.ll | 9 +-
21 files changed, 2599 insertions(+), 2386 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4c3dc63afd878d..1efb6ff409379a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -7246,25 +7246,25 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
// (select c, -1, y) -> -c | y
if (isAllOnesConstant(TrueV)) {
SDValue Neg = DAG.getNegative(CondV, DL, VT);
- return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
+ return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
}
// (select c, y, -1) -> (c-1) | y
if (isAllOnesConstant(FalseV)) {
SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
DAG.getAllOnesConstant(DL, VT));
- return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
+ return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
}
// (select c, 0, y) -> (c-1) & y
if (isNullConstant(TrueV)) {
SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
DAG.getAllOnesConstant(DL, VT));
- return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
+ return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
}
// (select c, y, 0) -> -c & y
if (isNullConstant(FalseV)) {
SDValue Neg = DAG.getNegative(CondV, DL, VT);
- return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
+ return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
}
}
@@ -7274,6 +7274,7 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
const APInt &FalseVal = FalseV->getAsAPIntVal();
if (~TrueVal == FalseVal) {
SDValue Neg = DAG.getNegative(CondV, DL, VT);
+ FalseV = DAG.getFreeze(FalseV);
return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
}
}
@@ -7289,14 +7290,14 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
// (select x, x, y) -> x | y
// (select !x, x, y) -> x & y
if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
- return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
- FalseV);
+ return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, DAG.getFreeze(TrueV),
+ DAG.getFreeze(FalseV));
}
// (select x, y, x) -> x & y
// (select !x, y, x) -> x | y
if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
- return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV,
- FalseV);
+ return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, DAG.getFreeze(TrueV),
+ DAG.getFreeze(FalseV));
}
}
diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll
index f032756e007b68..e16f6abcca244c 100644
--- a/llvm/test/CodeGen/RISCV/alu64.ll
+++ b/llvm/test/CodeGen/RISCV/alu64.ll
@@ -58,7 +58,8 @@ define i64 @sltiu(i64 %a) nounwind {
; RV32I-LABEL: sltiu:
; RV32I: # %bb.0:
; RV32I-NEXT: sltiu a0, a0, 3
-; RV32I-NEXT: seqz a1, a1
+; RV32I-NEXT: snez a1, a1
+; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: and a0, a1, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
index aa962d68fc5285..5914e45a153302 100644
--- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
@@ -372,10 +372,10 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; RV32IA-NEXT: # =>This Loop Header: Depth=1
; RV32IA-NEXT: # Child Loop BB2_3 Depth 2
; RV32IA-NEXT: mv a3, a2
-; RV32IA-NEXT: addi a2, a2, 1
-; RV32IA-NEXT: sltu a4, a3, a1
-; RV32IA-NEXT: neg a4, a4
-; RV32IA-NEXT: and a4, a4, a2
+; RV32IA-NEXT: addi a4, a2, 1
+; RV32IA-NEXT: sltu a2, a2, a1
+; RV32IA-NEXT: neg a2, a2
+; RV32IA-NEXT: and a4, a2, a4
; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start
; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1
; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
@@ -607,10 +607,10 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; RV64IA-NEXT: # =>This Loop Header: Depth=1
; RV64IA-NEXT: # Child Loop BB3_3 Depth 2
; RV64IA-NEXT: mv a3, a2
-; RV64IA-NEXT: addi a2, a2, 1
-; RV64IA-NEXT: sltu a4, a3, a1
-; RV64IA-NEXT: neg a4, a4
-; RV64IA-NEXT: and a4, a4, a2
+; RV64IA-NEXT: addi a4, a2, 1
+; RV64IA-NEXT: sltu a2, a2, a1
+; RV64IA-NEXT: neg a2, a2
+; RV64IA-NEXT: and a4, a2, a4
; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start
; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
index d533607ad54e38..0216d00be21854 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
@@ -456,121 +456,142 @@ define i64 @fcvt_l_bf16(bfloat %a) nounwind {
define i64 @fcvt_l_bf16_sat(bfloat %a) nounwind {
; RV32IZFBFMIN-LABEL: fcvt_l_bf16_sat:
; RV32IZFBFMIN: # %bb.0: # %start
-; RV32IZFBFMIN-NEXT: addi sp, sp, -16
-; RV32IZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFBFMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT: addi sp, sp, -32
+; RV32IZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
+; RV32IZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
+; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32IZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
+; RV32IZFBFMIN-NEXT: flt.s s0, fa5, fs0
+; RV32IZFBFMIN-NEXT: neg s1, s0
; RV32IZFBFMIN-NEXT: lui a0, 913408
; RV32IZFBFMIN-NEXT: fmv.w.x fa5, a0
-; RV32IZFBFMIN-NEXT: fle.s s0, fa5, fs0
+; RV32IZFBFMIN-NEXT: fle.s s2, fa5, fs0
+; RV32IZFBFMIN-NEXT: neg s3, s2
; RV32IZFBFMIN-NEXT: fmv.s fa0, fs0
; RV32IZFBFMIN-NEXT: call __fixsfdi
+; RV32IZFBFMIN-NEXT: and a0, s3, a0
+; RV32IZFBFMIN-NEXT: or a0, s1, a0
+; RV32IZFBFMIN-NEXT: feq.s a2, fs0, fs0
+; RV32IZFBFMIN-NEXT: neg a2, a2
; RV32IZFBFMIN-NEXT: lui a4, 524288
-; RV32IZFBFMIN-NEXT: lui a2, 524288
-; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_2
+; RV32IZFBFMIN-NEXT: li a5, 1
+; RV32IZFBFMIN-NEXT: lui a3, 524288
+; RV32IZFBFMIN-NEXT: bne s2, a5, .LBB10_2
; RV32IZFBFMIN-NEXT: # %bb.1: # %start
-; RV32IZFBFMIN-NEXT: mv a2, a1
+; RV32IZFBFMIN-NEXT: mv a3, a1
; RV32IZFBFMIN-NEXT: .LBB10_2: # %start
-; RV32IZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
-; RV32IZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
-; RV32IZFBFMIN-NEXT: flt.s a3, fa5, fs0
-; RV32IZFBFMIN-NEXT: beqz a3, .LBB10_4
+; RV32IZFBFMIN-NEXT: and a0, a2, a0
+; RV32IZFBFMIN-NEXT: beqz s0, .LBB10_4
; RV32IZFBFMIN-NEXT: # %bb.3:
-; RV32IZFBFMIN-NEXT: addi a2, a4, -1
+; RV32IZFBFMIN-NEXT: addi a3, a4, -1
; RV32IZFBFMIN-NEXT: .LBB10_4: # %start
-; RV32IZFBFMIN-NEXT: feq.s a1, fs0, fs0
-; RV32IZFBFMIN-NEXT: neg a4, a1
-; RV32IZFBFMIN-NEXT: and a1, a4, a2
-; RV32IZFBFMIN-NEXT: neg a2, a3
-; RV32IZFBFMIN-NEXT: neg a3, s0
-; RV32IZFBFMIN-NEXT: and a0, a3, a0
-; RV32IZFBFMIN-NEXT: or a0, a2, a0
-; RV32IZFBFMIN-NEXT: and a0, a4, a0
-; RV32IZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFBFMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
-; RV32IZFBFMIN-NEXT: addi sp, sp, 16
+; RV32IZFBFMIN-NEXT: and a1, a2, a3
+; RV32IZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
+; RV32IZFBFMIN-NEXT: addi sp, sp, 32
; RV32IZFBFMIN-NEXT: ret
;
; R32IDZFBFMIN-LABEL: fcvt_l_bf16_sat:
; R32IDZFBFMIN: # %bb.0: # %start
-; R32IDZFBFMIN-NEXT: addi sp, sp, -16
-; R32IDZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; R32IDZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; R32IDZFBFMIN-NEXT: addi sp, sp, -32
+; R32IDZFBFMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; R32IDZFBFMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; R32IDZFBFMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; R32IDZFBFMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; R32IDZFBFMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; R32IDZFBFMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
+; R32IDZFBFMIN-NEXT: lui a0, %hi(.LCPI10_0)
+; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fs0, fa0
+; R32IDZFBFMIN-NEXT: flt.s s0, fa5, fs0
+; R32IDZFBFMIN-NEXT: neg s1, s0
; R32IDZFBFMIN-NEXT: lui a0, 913408
; R32IDZFBFMIN-NEXT: fmv.w.x fa5, a0
-; R32IDZFBFMIN-NEXT: fle.s s0, fa5, fs0
+; R32IDZFBFMIN-NEXT: fle.s s2, fa5, fs0
+; R32IDZFBFMIN-NEXT: neg s3, s2
; R32IDZFBFMIN-NEXT: fmv.s fa0, fs0
; R32IDZFBFMIN-NEXT: call __fixsfdi
+; R32IDZFBFMIN-NEXT: and a0, s3, a0
+; R32IDZFBFMIN-NEXT: or a0, s1, a0
+; R32IDZFBFMIN-NEXT: feq.s a2, fs0, fs0
+; R32IDZFBFMIN-NEXT: neg a2, a2
; R32IDZFBFMIN-NEXT: lui a4, 524288
-; R32IDZFBFMIN-NEXT: lui a2, 524288
-; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_2
+; R32IDZFBFMIN-NEXT: li a5, 1
+; R32IDZFBFMIN-NEXT: lui a3, 524288
+; R32IDZFBFMIN-NEXT: bne s2, a5, .LBB10_2
; R32IDZFBFMIN-NEXT: # %bb.1: # %start
-; R32IDZFBFMIN-NEXT: mv a2, a1
+; R32IDZFBFMIN-NEXT: mv a3, a1
; R32IDZFBFMIN-NEXT: .LBB10_2: # %start
-; R32IDZFBFMIN-NEXT: lui a1, %hi(.LCPI10_0)
-; R32IDZFBFMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
-; R32IDZFBFMIN-NEXT: flt.s a3, fa5, fs0
-; R32IDZFBFMIN-NEXT: beqz a3, .LBB10_4
+; R32IDZFBFMIN-NEXT: and a0, a2, a0
+; R32IDZFBFMIN-NEXT: beqz s0, .LBB10_4
; R32IDZFBFMIN-NEXT: # %bb.3:
-; R32IDZFBFMIN-NEXT: addi a2, a4, -1
+; R32IDZFBFMIN-NEXT: addi a3, a4, -1
; R32IDZFBFMIN-NEXT: .LBB10_4: # %start
-; R32IDZFBFMIN-NEXT: feq.s a1, fs0, fs0
-; R32IDZFBFMIN-NEXT: neg a4, a1
-; R32IDZFBFMIN-NEXT: and a1, a4, a2
-; R32IDZFBFMIN-NEXT: neg a2, a3
-; R32IDZFBFMIN-NEXT: neg a3, s0
-; R32IDZFBFMIN-NEXT: and a0, a3, a0
-; R32IDZFBFMIN-NEXT: or a0, a2, a0
-; R32IDZFBFMIN-NEXT: and a0, a4, a0
-; R32IDZFBFMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; R32IDZFBFMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; R32IDZFBFMIN-NEXT: and a1, a2, a3
+; R32IDZFBFMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; R32IDZFBFMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; R32IDZFBFMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; R32IDZFBFMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; R32IDZFBFMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; R32IDZFBFMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
-; R32IDZFBFMIN-NEXT: addi sp, sp, 16
+; R32IDZFBFMIN-NEXT: addi sp, sp, 32
; R32IDZFBFMIN-NEXT: ret
;
; RV32ID-LABEL: fcvt_l_bf16_sat:
; RV32ID: # %bb.0: # %start
-; RV32ID-NEXT: addi sp, sp, -16
-; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: addi sp, sp, -32
+; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32ID-NEXT: lui a0, %hi(.LCPI10_0)
+; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: slli a0, a0, 16
; RV32ID-NEXT: fmv.w.x fs0, a0
+; RV32ID-NEXT: flt.s s0, fa5, fs0
+; RV32ID-NEXT: neg s1, s0
; RV32ID-NEXT: lui a0, 913408
; RV32ID-NEXT: fmv.w.x fa5, a0
-; RV32ID-NEXT: fle.s s0, fa5, fs0
+; RV32ID-NEXT: fle.s s2, fa5, fs0
+; RV32ID-NEXT: neg s3, s2
; RV32ID-NEXT: fmv.s fa0, fs0
; RV32ID-NEXT: call __fixsfdi
+; RV32ID-NEXT: and a0, s3, a0
+; RV32ID-NEXT: or a0, s1, a0
+; RV32ID-NEXT: feq.s a2, fs0, fs0
+; RV32ID-NEXT: neg a2, a2
; RV32ID-NEXT: lui a4, 524288
-; RV32ID-NEXT: lui a2, 524288
-; RV32ID-NEXT: beqz s0, .LBB10_2
+; RV32ID-NEXT: li a5, 1
+; RV32ID-NEXT: lui a3, 524288
+; RV32ID-NEXT: bne s2, a5, .LBB10_2
; RV32ID-NEXT: # %bb.1: # %start
-; RV32ID-NEXT: mv a2, a1
+; RV32ID-NEXT: mv a3, a1
; RV32ID-NEXT: .LBB10_2: # %start
-; RV32ID-NEXT: lui a1, %hi(.LCPI10_0)
-; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
-; RV32ID-NEXT: flt.s a3, fa5, fs0
-; RV32ID-NEXT: beqz a3, .LBB10_4
+; RV32ID-NEXT: and a0, a2, a0
+; RV32ID-NEXT: beqz s0, .LBB10_4
; RV32ID-NEXT: # %bb.3:
-; RV32ID-NEXT: addi a2, a4, -1
+; RV32ID-NEXT: addi a3, a4, -1
; RV32ID-NEXT: .LBB10_4: # %start
-; RV32ID-NEXT: feq.s a1, fs0, fs0
-; RV32ID-NEXT: neg a4, a1
-; RV32ID-NEXT: and a1, a4, a2
-; RV32ID-NEXT: neg a2, a3
-; RV32ID-NEXT: neg a3, s0
-; RV32ID-NEXT: and a0, a3, a0
-; RV32ID-NEXT: or a0, a2, a0
-; RV32ID-NEXT: and a0, a4, a0
-; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32ID-NEXT: and a1, a2, a3
+; RV32ID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32ID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32ID-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32ID-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32ID-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32ID-NEXT: addi sp, sp, 16
+; RV32ID-NEXT: addi sp, sp, 32
; RV32ID-NEXT: ret
;
; CHECK64ZFBFMIN-LABEL: fcvt_l_bf16_sat:
@@ -654,7 +675,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
; CHECK32ZFBFMIN-NEXT: neg s0, a0
; CHECK32ZFBFMIN-NEXT: fmv.w.x fa5, zero
; CHECK32ZFBFMIN-NEXT: fle.s a0, fa5, fa0
-; CHECK32ZFBFMIN-NEXT: neg s1, a0
+; CHECK32ZFBFMIN-NEXT: xori a0, a0, 1
+; CHECK32ZFBFMIN-NEXT: addi s1, a0, -1
; CHECK32ZFBFMIN-NEXT: call __fixunssfdi
; CHECK32ZFBFMIN-NEXT: and a0, s1, a0
; CHECK32ZFBFMIN-NEXT: or a0, s0, a0
@@ -681,7 +703,8 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind {
; RV32ID-NEXT: neg s0, a0
; RV32ID-NEXT: fmv.w.x fa5, zero
; RV32ID-NEXT: fle.s a0, fa5, fa0
-; RV32ID-NEXT: neg s1, a0
+; RV32ID-NEXT: xori a0, a0, 1
+; RV32ID-NEXT: addi s1, a0, -1
; RV32ID-NEXT: call __fixunssfdi
; RV32ID-NEXT: and a0, s1, a0
; RV32ID-NEXT: or a0, s0, a0
diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll
index eb8ffe75ef7697..f2e37f55521bac 100644
--- a/llvm/test/CodeGen/RISCV/double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/double-convert.ll
@@ -749,40 +749,47 @@ define i64 @fcvt_l_d(double %a) nounwind {
define i64 @fcvt_l_d_sat(double %a) nounwind {
; RV32IFD-LABEL: fcvt_l_d_sat:
; RV32IFD: # %bb.0: # %start
-; RV32IFD-NEXT: addi sp, sp, -16
-; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: addi sp, sp, -32
+; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: lui a0, %hi(.LCPI12_0)
; RV32IFD-NEXT: fld fa5, %lo(.LCPI12_0)(a0)
+; RV32IFD-NEXT: lui a0, %hi(.LCPI12_1)
+; RV32IFD-NEXT: fld fa4, %lo(.LCPI12_1)(a0)
; RV32IFD-NEXT: fmv.d fs0, fa0
-; RV32IFD-NEXT: fle.d s0, fa5, fa0
+; RV32IFD-NEXT: flt.d s0, fa5, fa0
+; RV32IFD-NEXT: neg s1, s0
+; RV32IFD-NEXT: fle.d s2, fa4, fa0
+; RV32IFD-NEXT: neg s3, s2
; RV32IFD-NEXT: call __fixdfdi
+; RV32IFD-NEXT: and a0, s3, a0
+; RV32IFD-NEXT: or a0, s1, a0
+; RV32IFD-NEXT: feq.d a2, fs0, fs0
+; RV32IFD-NEXT: neg a2, a2
; RV32IFD-NEXT: lui a4, 524288
-; RV32IFD-NEXT: lui a2, 524288
-; RV32IFD-NEXT: beqz s0, .LBB12_2
+; RV32IFD-NEXT: li a5, 1
+; RV32IFD-NEXT: lui a3, 524288
+; RV32IFD-NEXT: bne s2, a5, .LBB12_2
; RV32IFD-NEXT: # %bb.1: # %start
-; RV32IFD-NEXT: mv a2, a1
+; RV32IFD-NEXT: mv a3, a1
; RV32IFD-NEXT: .LBB12_2: # %start
-; RV32IFD-NEXT: lui a1, %hi(.LCPI12_1)
-; RV32IFD-NEXT: fld fa5, %lo(.LCPI12_1)(a1)
-; RV32IFD-NEXT: flt.d a3, fa5, fs0
-; RV32IFD-NEXT: beqz a3, .LBB12_4
+; RV32IFD-NEXT: and a0, a2, a0
+; RV32IFD-NEXT: beqz s0, .LBB12_4
; RV32IFD-NEXT: # %bb.3:
-; RV32IFD-NEXT: addi a2, a4, -1
+; RV32IFD-NEXT: addi a3, a4, -1
; RV32IFD-NEXT: .LBB12_4: # %start
-; RV32IFD-NEXT: feq.d a1, fs0, fs0
-; RV32IFD-NEXT: neg a4, a1
-; RV32IFD-NEXT: and a1, a4, a2
-; RV32IFD-NEXT: neg a2, a3
-; RV32IFD-NEXT: neg a3, s0
-; RV32IFD-NEXT: and a0, a3, a0
-; RV32IFD-NEXT: or a0, a2, a0
-; RV32IFD-NEXT: and a0, a4, a0
-; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: and a1, a2, a3
+; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: fcvt_l_d_sat:
@@ -800,40 +807,45 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
-; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
-; RV32IZFINXZDINX-NEXT: lw s0, 8(sp)
-; RV32IZFINXZDINX-NEXT: lw s1, 12(sp)
-; RV32IZFINXZDINX-NEXT: call __fixdfdi
+; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw a0, 0(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 4(sp)
+; RV32IZFINXZDINX-NEXT: lw s0, 0(sp)
+; RV32IZFINXZDINX-NEXT: lw s1, 4(sp)
; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI12_0)
; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI12_0+4)(a2)
; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI12_0)(a2)
-; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0
+; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0
+; RV32IZFINXZDINX-NEXT: neg s3, s2
+; RV32IZFINXZDINX-NEXT: call __fixdfdi
+; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI12_1)
+; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI12_1+4)(a2)
+; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI12_1)(a2)
+; RV32IZFINXZDINX-NEXT: and a0, s3, a0
+; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a3
+; RV32IZFINXZDINX-NEXT: or a0, a2, a0
+; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a2
; RV32IZFINXZDINX-NEXT: lui a5, 524288
-; RV32IZFINXZDINX-NEXT: lui a3, 524288
-; RV32IZFINXZDINX-NEXT: beqz a2, .LBB12_2
+; RV32IZFINXZDINX-NEXT: li a6, 1
+; RV32IZFINXZDINX-NEXT: lui a4, 524288
+; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB12_2
; RV32IZFINXZDINX-NEXT: # %bb.1: # %start
-; RV32IZFINXZDINX-NEXT: mv a3, a1
+; RV32IZFINXZDINX-NEXT: mv a4, a1
; RV32IZFINXZDINX-NEXT: .LBB12_2: # %start
-; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI12_1)
-; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI12_1)(a1)
-; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI12_1+4)(a1)
-; RV32IZFINXZDINX-NEXT: flt.d a4, a6, s0
-; RV32IZFINXZDINX-NEXT: beqz a4, .LBB12_4
+; RV32IZFINXZDINX-NEXT: and a0, a2, a0
+; RV32IZFINXZDINX-NEXT: beqz a3, .LBB12_4
; RV32IZFINXZDINX-NEXT: # %bb.3:
-; RV32IZFINXZDINX-NEXT: addi a3, a5, -1
+; RV32IZFINXZDINX-NEXT: addi a4, a5, -1
; RV32IZFINXZDINX-NEXT: .LBB12_4: # %start
-; RV32IZFINXZDINX-NEXT: feq.d a1, s0, s0
-; RV32IZFINXZDINX-NEXT: neg a5, a1
-; RV32IZFINXZDINX-NEXT: and a1, a5, a3
-; RV32IZFINXZDINX-NEXT: neg a2, a2
-; RV32IZFINXZDINX-NEXT: and a0, a2, a0
-; RV32IZFINXZDINX-NEXT: neg a2, a4
-; RV32IZFINXZDINX-NEXT: or a0, a2, a0
-; RV32IZFINXZDINX-NEXT: and a0, a5, a0
+; RV32IZFINXZDINX-NEXT: and a1, a2, a4
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
; RV32IZFINXZDINX-NEXT: ret
;
@@ -1013,23 +1025,23 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind {
; RV32IFD-NEXT: addi sp, sp, -16
; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
-; RV32IFD-NEXT: fmv.d fs0, fa0
+; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: lui a0, %hi(.LCPI14_0)
+; RV32IFD-NEXT: fld fa5, %lo(.LCPI14_0)(a0)
+; RV32IFD-NEXT: flt.d a0, fa5, fa0
+; RV32IFD-NEXT: neg s0, a0
; RV32IFD-NEXT: fcvt.d.w fa5, zero
; RV32IFD-NEXT: fle.d a0, fa5, fa0
-; RV32IFD-NEXT: neg s0, a0
+; RV32IFD-NEXT: xori a0, a0, 1
+; RV32IFD-NEXT: addi s1, a0, -1
; RV32IFD-NEXT: call __fixunsdfdi
-; RV32IFD-NEXT: lui a2, %hi(.LCPI14_0)
-; RV32IFD-NEXT: fld fa5, %lo(.LCPI14_0)(a2)
-; RV32IFD-NEXT: and a0, s0, a0
-; RV32IFD-NEXT: flt.d a2, fa5, fs0
-; RV32IFD-NEXT: neg a2, a2
-; RV32IFD-NEXT: or a0, a2, a0
-; RV32IFD-NEXT: and a1, s0, a1
-; RV32IFD-NEXT: or a1, a2, a1
+; RV32IFD-NEXT: and a0, s1, a0
+; RV32IFD-NEXT: or a0, s0, a0
+; RV32IFD-NEXT: and a1, s1, a1
+; RV32IFD-NEXT: or a1, s0, a1
; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 16
; RV32IFD-NEXT: ret
;
@@ -1054,11 +1066,12 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind {
; RV32IZFINXZDINX-NEXT: lw s1, 12(sp)
; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero
-; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI14_0)
-; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI14_0+4)(a4)
-; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI14_0)(a4)
; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0
-; RV32IZFINXZDINX-NEXT: neg a2, a2
+; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI14_0)
+; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI14_0)(a3)
+; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI14_0+4)(a3)
+; RV32IZFINXZDINX-NEXT: xori a2, a2, 1
+; RV32IZFINXZDINX-NEXT: addi a2, a2, -1
; RV32IZFINXZDINX-NEXT: and a0, a2, a0
; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0
; RV32IZFINXZDINX-NEXT: neg a3, a3
diff --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
index b8c6e84502408f..ff2d8e00630071 100644
--- a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
+++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
@@ -50,41 +50,48 @@ define signext i32 @test_floor_si32(double %x) {
define i64 @test_floor_si64(double %x) nounwind {
; RV32IFD-LABEL: test_floor_si64:
; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: addi sp, sp, -16
-; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: addi sp, sp, -32
+; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: call floor
; RV32IFD-NEXT: lui a0, %hi(.LCPI1_0)
; RV32IFD-NEXT: fld fa5, %lo(.LCPI1_0)(a0)
+; RV32IFD-NEXT: lui a0, %hi(.LCPI1_1)
+; RV32IFD-NEXT: fld fa4, %lo(.LCPI1_1)(a0)
; RV32IFD-NEXT: fmv.d fs0, fa0
-; RV32IFD-NEXT: fle.d s0, fa5, fa0
+; RV32IFD-NEXT: flt.d s0, fa5, fa0
+; RV32IFD-NEXT: neg s1, s0
+; RV32IFD-NEXT: fle.d s2, fa4, fa0
+; RV32IFD-NEXT: neg s3, s2
; RV32IFD-NEXT: call __fixdfdi
+; RV32IFD-NEXT: and a0, s3, a0
+; RV32IFD-NEXT: or a0, s1, a0
+; RV32IFD-NEXT: feq.d a2, fs0, fs0
+; RV32IFD-NEXT: neg a2, a2
; RV32IFD-NEXT: lui a4, 524288
-; RV32IFD-NEXT: lui a2, 524288
-; RV32IFD-NEXT: beqz s0, .LBB1_2
+; RV32IFD-NEXT: li a5, 1
+; RV32IFD-NEXT: lui a3, 524288
+; RV32IFD-NEXT: bne s2, a5, .LBB1_2
; RV32IFD-NEXT: # %bb.1:
-; RV32IFD-NEXT: mv a2, a1
+; RV32IFD-NEXT: mv a3, a1
; RV32IFD-NEXT: .LBB1_2:
-; RV32IFD-NEXT: lui a1, %hi(.LCPI1_1)
-; RV32IFD-NEXT: fld fa5, %lo(.LCPI1_1)(a1)
-; RV32IFD-NEXT: flt.d a3, fa5, fs0
-; RV32IFD-NEXT: beqz a3, .LBB1_4
+; RV32IFD-NEXT: and a0, a2, a0
+; RV32IFD-NEXT: beqz s0, .LBB1_4
; RV32IFD-NEXT: # %bb.3:
-; RV32IFD-NEXT: addi a2, a4, -1
+; RV32IFD-NEXT: addi a3, a4, -1
; RV32IFD-NEXT: .LBB1_4:
-; RV32IFD-NEXT: feq.d a1, fs0, fs0
-; RV32IFD-NEXT: neg a4, a1
-; RV32IFD-NEXT: and a1, a4, a2
-; RV32IFD-NEXT: neg a2, a3
-; RV32IFD-NEXT: neg a3, s0
-; RV32IFD-NEXT: and a0, a3, a0
-; RV32IFD-NEXT: or a0, a2, a0
-; RV32IFD-NEXT: and a0, a4, a0
-; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: and a1, a2, a3
+; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: test_floor_si64:
@@ -101,44 +108,47 @@ define i64 @test_floor_si64(double %x) nounwind {
; RV32IZFINXZDINX-NEXT: addi sp, sp, -32
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: call floor
-; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
-; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
-; RV32IZFINXZDINX-NEXT: lw s2, 8(sp)
-; RV32IZFINXZDINX-NEXT: lw s3, 12(sp)
+; RV32IZFINXZDINX-NEXT: sw a0, 0(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 4(sp)
+; RV32IZFINXZDINX-NEXT: lw s0, 0(sp)
+; RV32IZFINXZDINX-NEXT: lw s1, 4(sp)
; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI1_0)
; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI1_0+4)(a2)
; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI1_0)(a2)
-; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2
+; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0
+; RV32IZFINXZDINX-NEXT: neg s3, s2
; RV32IZFINXZDINX-NEXT: call __fixdfdi
+; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI1_1)
+; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI1_1+4)(a2)
+; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI1_1)(a2)
+; RV32IZFINXZDINX-NEXT: and a0, s3, a0
+; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a3
+; RV32IZFINXZDINX-NEXT: or a0, a2, a0
+; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a2
+; RV32IZFINXZDINX-NEXT: lui a5, 524288
+; RV32IZFINXZDINX-NEXT: li a6, 1
; RV32IZFINXZDINX-NEXT: lui a4, 524288
-; RV32IZFINXZDINX-NEXT: lui a2, 524288
-; RV32IZFINXZDINX-NEXT: beqz s0, .LBB1_2
+; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB1_2
; RV32IZFINXZDINX-NEXT: # %bb.1:
-; RV32IZFINXZDINX-NEXT: mv a2, a1
+; RV32IZFINXZDINX-NEXT: mv a4, a1
; RV32IZFINXZDINX-NEXT: .LBB1_2:
-; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI1_1)
-; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI1_1)(a1)
-; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI1_1+4)(a1)
-; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2
+; RV32IZFINXZDINX-NEXT: and a0, a2, a0
; RV32IZFINXZDINX-NEXT: beqz a3, .LBB1_4
; RV32IZFINXZDINX-NEXT: # %bb.3:
-; RV32IZFINXZDINX-NEXT: addi a2, a4, -1
+; RV32IZFINXZDINX-NEXT: addi a4, a5, -1
; RV32IZFINXZDINX-NEXT: .LBB1_4:
-; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2
-; RV32IZFINXZDINX-NEXT: neg a4, a1
-; RV32IZFINXZDINX-NEXT: and a1, a4, a2
-; RV32IZFINXZDINX-NEXT: neg a2, s0
-; RV32IZFINXZDINX-NEXT: and a0, a2, a0
-; RV32IZFINXZDINX-NEXT: neg a2, a3
-; RV32IZFINXZDINX-NEXT: or a0, a2, a0
-; RV32IZFINXZDINX-NEXT: and a0, a4, a0
+; RV32IZFINXZDINX-NEXT: and a1, a2, a4
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
; RV32IZFINXZDINX-NEXT: ret
;
@@ -208,7 +218,8 @@ define i64 @test_floor_ui64(double %x) nounwind {
; RV32IFD-NEXT: neg s0, a0
; RV32IFD-NEXT: fcvt.d.w fa5, zero
; RV32IFD-NEXT: fle.d a0, fa5, fa0
-; RV32IFD-NEXT: neg s1, a0
+; RV32IFD-NEXT: xori a0, a0, 1
+; RV32IFD-NEXT: addi s1, a0, -1
; RV32IFD-NEXT: call __fixunsdfdi
; RV32IFD-NEXT: and a0, s1, a0
; RV32IFD-NEXT: or a0, s0, a0
@@ -235,29 +246,28 @@ define i64 @test_floor_ui64(double %x) nounwind {
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: call floor
; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
; RV32IZFINXZDINX-NEXT: lw s0, 8(sp)
; RV32IZFINXZDINX-NEXT: lw s1, 12(sp)
+; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero
; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0
-; RV32IZFINXZDINX-NEXT: neg s2, a2
-; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
-; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI3_0)
-; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI3_0+4)(a2)
-; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI3_0)(a2)
-; RV32IZFINXZDINX-NEXT: and a0, s2, a0
-; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0
-; RV32IZFINXZDINX-NEXT: neg a2, a2
-; RV32IZFINXZDINX-NEXT: or a0, a2, a0
-; RV32IZFINXZDINX-NEXT: and a1, s2, a1
-; RV32IZFINXZDINX-NEXT: or a1, a2, a1
+; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI3_0)
+; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI3_0)(a3)
+; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI3_0+4)(a3)
+; RV32IZFINXZDINX-NEXT: xori a2, a2, 1
+; RV32IZFINXZDINX-NEXT: addi a2, a2, -1
+; RV32IZFINXZDINX-NEXT: and a0, a2, a0
+; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0
+; RV32IZFINXZDINX-NEXT: neg a3, a3
+; RV32IZFINXZDINX-NEXT: or a0, a3, a0
+; RV32IZFINXZDINX-NEXT: and a1, a2, a1
+; RV32IZFINXZDINX-NEXT: or a1, a3, a1
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
; RV32IZFINXZDINX-NEXT: ret
;
@@ -316,41 +326,48 @@ define signext i32 @test_ceil_si32(double %x) {
define i64 @test_ceil_si64(double %x) nounwind {
; RV32IFD-LABEL: test_ceil_si64:
; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: addi sp, sp, -16
-; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: addi sp, sp, -32
+; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: call ceil
; RV32IFD-NEXT: lui a0, %hi(.LCPI5_0)
; RV32IFD-NEXT: fld fa5, %lo(.LCPI5_0)(a0)
+; RV32IFD-NEXT: lui a0, %hi(.LCPI5_1)
+; RV32IFD-NEXT: fld fa4, %lo(.LCPI5_1)(a0)
; RV32IFD-NEXT: fmv.d fs0, fa0
-; RV32IFD-NEXT: fle.d s0, fa5, fa0
+; RV32IFD-NEXT: flt.d s0, fa5, fa0
+; RV32IFD-NEXT: neg s1, s0
+; RV32IFD-NEXT: fle.d s2, fa4, fa0
+; RV32IFD-NEXT: neg s3, s2
; RV32IFD-NEXT: call __fixdfdi
+; RV32IFD-NEXT: and a0, s3, a0
+; RV32IFD-NEXT: or a0, s1, a0
+; RV32IFD-NEXT: feq.d a2, fs0, fs0
+; RV32IFD-NEXT: neg a2, a2
; RV32IFD-NEXT: lui a4, 524288
-; RV32IFD-NEXT: lui a2, 524288
-; RV32IFD-NEXT: beqz s0, .LBB5_2
+; RV32IFD-NEXT: li a5, 1
+; RV32IFD-NEXT: lui a3, 524288
+; RV32IFD-NEXT: bne s2, a5, .LBB5_2
; RV32IFD-NEXT: # %bb.1:
-; RV32IFD-NEXT: mv a2, a1
+; RV32IFD-NEXT: mv a3, a1
; RV32IFD-NEXT: .LBB5_2:
-; RV32IFD-NEXT: lui a1, %hi(.LCPI5_1)
-; RV32IFD-NEXT: fld fa5, %lo(.LCPI5_1)(a1)
-; RV32IFD-NEXT: flt.d a3, fa5, fs0
-; RV32IFD-NEXT: beqz a3, .LBB5_4
+; RV32IFD-NEXT: and a0, a2, a0
+; RV32IFD-NEXT: beqz s0, .LBB5_4
; RV32IFD-NEXT: # %bb.3:
-; RV32IFD-NEXT: addi a2, a4, -1
+; RV32IFD-NEXT: addi a3, a4, -1
; RV32IFD-NEXT: .LBB5_4:
-; RV32IFD-NEXT: feq.d a1, fs0, fs0
-; RV32IFD-NEXT: neg a4, a1
-; RV32IFD-NEXT: and a1, a4, a2
-; RV32IFD-NEXT: neg a2, a3
-; RV32IFD-NEXT: neg a3, s0
-; RV32IFD-NEXT: and a0, a3, a0
-; RV32IFD-NEXT: or a0, a2, a0
-; RV32IFD-NEXT: and a0, a4, a0
-; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: and a1, a2, a3
+; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: test_ceil_si64:
@@ -367,44 +384,47 @@ define i64 @test_ceil_si64(double %x) nounwind {
; RV32IZFINXZDINX-NEXT: addi sp, sp, -32
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: call ceil
-; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
-; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
-; RV32IZFINXZDINX-NEXT: lw s2, 8(sp)
-; RV32IZFINXZDINX-NEXT: lw s3, 12(sp)
+; RV32IZFINXZDINX-NEXT: sw a0, 0(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 4(sp)
+; RV32IZFINXZDINX-NEXT: lw s0, 0(sp)
+; RV32IZFINXZDINX-NEXT: lw s1, 4(sp)
; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI5_0)
; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI5_0+4)(a2)
; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI5_0)(a2)
-; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2
+; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0
+; RV32IZFINXZDINX-NEXT: neg s3, s2
; RV32IZFINXZDINX-NEXT: call __fixdfdi
+; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI5_1)
+; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI5_1+4)(a2)
+; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI5_1)(a2)
+; RV32IZFINXZDINX-NEXT: and a0, s3, a0
+; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a3
+; RV32IZFINXZDINX-NEXT: or a0, a2, a0
+; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a2
+; RV32IZFINXZDINX-NEXT: lui a5, 524288
+; RV32IZFINXZDINX-NEXT: li a6, 1
; RV32IZFINXZDINX-NEXT: lui a4, 524288
-; RV32IZFINXZDINX-NEXT: lui a2, 524288
-; RV32IZFINXZDINX-NEXT: beqz s0, .LBB5_2
+; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB5_2
; RV32IZFINXZDINX-NEXT: # %bb.1:
-; RV32IZFINXZDINX-NEXT: mv a2, a1
+; RV32IZFINXZDINX-NEXT: mv a4, a1
; RV32IZFINXZDINX-NEXT: .LBB5_2:
-; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI5_1)
-; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI5_1)(a1)
-; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI5_1+4)(a1)
-; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2
+; RV32IZFINXZDINX-NEXT: and a0, a2, a0
; RV32IZFINXZDINX-NEXT: beqz a3, .LBB5_4
; RV32IZFINXZDINX-NEXT: # %bb.3:
-; RV32IZFINXZDINX-NEXT: addi a2, a4, -1
+; RV32IZFINXZDINX-NEXT: addi a4, a5, -1
; RV32IZFINXZDINX-NEXT: .LBB5_4:
-; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2
-; RV32IZFINXZDINX-NEXT: neg a4, a1
-; RV32IZFINXZDINX-NEXT: and a1, a4, a2
-; RV32IZFINXZDINX-NEXT: neg a2, s0
-; RV32IZFINXZDINX-NEXT: and a0, a2, a0
-; RV32IZFINXZDINX-NEXT: neg a2, a3
-; RV32IZFINXZDINX-NEXT: or a0, a2, a0
-; RV32IZFINXZDINX-NEXT: and a0, a4, a0
+; RV32IZFINXZDINX-NEXT: and a1, a2, a4
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
; RV32IZFINXZDINX-NEXT: ret
;
@@ -474,7 +494,8 @@ define i64 @test_ceil_ui64(double %x) nounwind {
; RV32IFD-NEXT: neg s0, a0
; RV32IFD-NEXT: fcvt.d.w fa5, zero
; RV32IFD-NEXT: fle.d a0, fa5, fa0
-; RV32IFD-NEXT: neg s1, a0
+; RV32IFD-NEXT: xori a0, a0, 1
+; RV32IFD-NEXT: addi s1, a0, -1
; RV32IFD-NEXT: call __fixunsdfdi
; RV32IFD-NEXT: and a0, s1, a0
; RV32IFD-NEXT: or a0, s0, a0
@@ -501,29 +522,28 @@ define i64 @test_ceil_ui64(double %x) nounwind {
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: call ceil
; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
; RV32IZFINXZDINX-NEXT: lw s0, 8(sp)
; RV32IZFINXZDINX-NEXT: lw s1, 12(sp)
+; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero
; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0
-; RV32IZFINXZDINX-NEXT: neg s2, a2
-; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
-; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI7_0)
-; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI7_0+4)(a2)
-; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI7_0)(a2)
-; RV32IZFINXZDINX-NEXT: and a0, s2, a0
-; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0
-; RV32IZFINXZDINX-NEXT: neg a2, a2
-; RV32IZFINXZDINX-NEXT: or a0, a2, a0
-; RV32IZFINXZDINX-NEXT: and a1, s2, a1
-; RV32IZFINXZDINX-NEXT: or a1, a2, a1
+; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI7_0)
+; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI7_0)(a3)
+; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI7_0+4)(a3)
+; RV32IZFINXZDINX-NEXT: xori a2, a2, 1
+; RV32IZFINXZDINX-NEXT: addi a2, a2, -1
+; RV32IZFINXZDINX-NEXT: and a0, a2, a0
+; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0
+; RV32IZFINXZDINX-NEXT: neg a3, a3
+; RV32IZFINXZDINX-NEXT: or a0, a3, a0
+; RV32IZFINXZDINX-NEXT: and a1, a2, a1
+; RV32IZFINXZDINX-NEXT: or a1, a3, a1
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
; RV32IZFINXZDINX-NEXT: ret
;
@@ -582,41 +602,48 @@ define signext i32 @test_trunc_si32(double %x) {
define i64 @test_trunc_si64(double %x) nounwind {
; RV32IFD-LABEL: test_trunc_si64:
; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: addi sp, sp, -16
-; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: addi sp, sp, -32
+; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: call trunc
; RV32IFD-NEXT: lui a0, %hi(.LCPI9_0)
; RV32IFD-NEXT: fld fa5, %lo(.LCPI9_0)(a0)
+; RV32IFD-NEXT: lui a0, %hi(.LCPI9_1)
+; RV32IFD-NEXT: fld fa4, %lo(.LCPI9_1)(a0)
; RV32IFD-NEXT: fmv.d fs0, fa0
-; RV32IFD-NEXT: fle.d s0, fa5, fa0
+; RV32IFD-NEXT: flt.d s0, fa5, fa0
+; RV32IFD-NEXT: neg s1, s0
+; RV32IFD-NEXT: fle.d s2, fa4, fa0
+; RV32IFD-NEXT: neg s3, s2
; RV32IFD-NEXT: call __fixdfdi
+; RV32IFD-NEXT: and a0, s3, a0
+; RV32IFD-NEXT: or a0, s1, a0
+; RV32IFD-NEXT: feq.d a2, fs0, fs0
+; RV32IFD-NEXT: neg a2, a2
; RV32IFD-NEXT: lui a4, 524288
-; RV32IFD-NEXT: lui a2, 524288
-; RV32IFD-NEXT: beqz s0, .LBB9_2
+; RV32IFD-NEXT: li a5, 1
+; RV32IFD-NEXT: lui a3, 524288
+; RV32IFD-NEXT: bne s2, a5, .LBB9_2
; RV32IFD-NEXT: # %bb.1:
-; RV32IFD-NEXT: mv a2, a1
+; RV32IFD-NEXT: mv a3, a1
; RV32IFD-NEXT: .LBB9_2:
-; RV32IFD-NEXT: lui a1, %hi(.LCPI9_1)
-; RV32IFD-NEXT: fld fa5, %lo(.LCPI9_1)(a1)
-; RV32IFD-NEXT: flt.d a3, fa5, fs0
-; RV32IFD-NEXT: beqz a3, .LBB9_4
+; RV32IFD-NEXT: and a0, a2, a0
+; RV32IFD-NEXT: beqz s0, .LBB9_4
; RV32IFD-NEXT: # %bb.3:
-; RV32IFD-NEXT: addi a2, a4, -1
+; RV32IFD-NEXT: addi a3, a4, -1
; RV32IFD-NEXT: .LBB9_4:
-; RV32IFD-NEXT: feq.d a1, fs0, fs0
-; RV32IFD-NEXT: neg a4, a1
-; RV32IFD-NEXT: and a1, a4, a2
-; RV32IFD-NEXT: neg a2, a3
-; RV32IFD-NEXT: neg a3, s0
-; RV32IFD-NEXT: and a0, a3, a0
-; RV32IFD-NEXT: or a0, a2, a0
-; RV32IFD-NEXT: and a0, a4, a0
-; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: and a1, a2, a3
+; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: test_trunc_si64:
@@ -633,44 +660,47 @@ define i64 @test_trunc_si64(double %x) nounwind {
; RV32IZFINXZDINX-NEXT: addi sp, sp, -32
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: call trunc
-; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
-; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
-; RV32IZFINXZDINX-NEXT: lw s2, 8(sp)
-; RV32IZFINXZDINX-NEXT: lw s3, 12(sp)
+; RV32IZFINXZDINX-NEXT: sw a0, 0(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 4(sp)
+; RV32IZFINXZDINX-NEXT: lw s0, 0(sp)
+; RV32IZFINXZDINX-NEXT: lw s1, 4(sp)
; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI9_0)
; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI9_0+4)(a2)
; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI9_0)(a2)
-; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2
+; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0
+; RV32IZFINXZDINX-NEXT: neg s3, s2
; RV32IZFINXZDINX-NEXT: call __fixdfdi
+; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI9_1)
+; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI9_1+4)(a2)
+; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI9_1)(a2)
+; RV32IZFINXZDINX-NEXT: and a0, s3, a0
+; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a3
+; RV32IZFINXZDINX-NEXT: or a0, a2, a0
+; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a2
+; RV32IZFINXZDINX-NEXT: lui a5, 524288
+; RV32IZFINXZDINX-NEXT: li a6, 1
; RV32IZFINXZDINX-NEXT: lui a4, 524288
-; RV32IZFINXZDINX-NEXT: lui a2, 524288
-; RV32IZFINXZDINX-NEXT: beqz s0, .LBB9_2
+; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB9_2
; RV32IZFINXZDINX-NEXT: # %bb.1:
-; RV32IZFINXZDINX-NEXT: mv a2, a1
+; RV32IZFINXZDINX-NEXT: mv a4, a1
; RV32IZFINXZDINX-NEXT: .LBB9_2:
-; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI9_1)
-; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI9_1)(a1)
-; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI9_1+4)(a1)
-; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2
+; RV32IZFINXZDINX-NEXT: and a0, a2, a0
; RV32IZFINXZDINX-NEXT: beqz a3, .LBB9_4
; RV32IZFINXZDINX-NEXT: # %bb.3:
-; RV32IZFINXZDINX-NEXT: addi a2, a4, -1
+; RV32IZFINXZDINX-NEXT: addi a4, a5, -1
; RV32IZFINXZDINX-NEXT: .LBB9_4:
-; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2
-; RV32IZFINXZDINX-NEXT: neg a4, a1
-; RV32IZFINXZDINX-NEXT: and a1, a4, a2
-; RV32IZFINXZDINX-NEXT: neg a2, s0
-; RV32IZFINXZDINX-NEXT: and a0, a2, a0
-; RV32IZFINXZDINX-NEXT: neg a2, a3
-; RV32IZFINXZDINX-NEXT: or a0, a2, a0
-; RV32IZFINXZDINX-NEXT: and a0, a4, a0
+; RV32IZFINXZDINX-NEXT: and a1, a2, a4
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
; RV32IZFINXZDINX-NEXT: ret
;
@@ -740,7 +770,8 @@ define i64 @test_trunc_ui64(double %x) nounwind {
; RV32IFD-NEXT: neg s0, a0
; RV32IFD-NEXT: fcvt.d.w fa5, zero
; RV32IFD-NEXT: fle.d a0, fa5, fa0
-; RV32IFD-NEXT: neg s1, a0
+; RV32IFD-NEXT: xori a0, a0, 1
+; RV32IFD-NEXT: addi s1, a0, -1
; RV32IFD-NEXT: call __fixunsdfdi
; RV32IFD-NEXT: and a0, s1, a0
; RV32IFD-NEXT: or a0, s0, a0
@@ -767,29 +798,28 @@ define i64 @test_trunc_ui64(double %x) nounwind {
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: call trunc
; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
; RV32IZFINXZDINX-NEXT: lw s0, 8(sp)
; RV32IZFINXZDINX-NEXT: lw s1, 12(sp)
+; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero
; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0
-; RV32IZFINXZDINX-NEXT: neg s2, a2
-; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
-; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI11_0)
-; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI11_0+4)(a2)
-; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI11_0)(a2)
-; RV32IZFINXZDINX-NEXT: and a0, s2, a0
-; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0
-; RV32IZFINXZDINX-NEXT: neg a2, a2
-; RV32IZFINXZDINX-NEXT: or a0, a2, a0
-; RV32IZFINXZDINX-NEXT: and a1, s2, a1
-; RV32IZFINXZDINX-NEXT: or a1, a2, a1
+; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI11_0)
+; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI11_0)(a3)
+; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI11_0+4)(a3)
+; RV32IZFINXZDINX-NEXT: xori a2, a2, 1
+; RV32IZFINXZDINX-NEXT: addi a2, a2, -1
+; RV32IZFINXZDINX-NEXT: and a0, a2, a0
+; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0
+; RV32IZFINXZDINX-NEXT: neg a3, a3
+; RV32IZFINXZDINX-NEXT: or a0, a3, a0
+; RV32IZFINXZDINX-NEXT: and a1, a2, a1
+; RV32IZFINXZDINX-NEXT: or a1, a3, a1
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
; RV32IZFINXZDINX-NEXT: ret
;
@@ -848,41 +878,48 @@ define signext i32 @test_round_si32(double %x) {
define i64 @test_round_si64(double %x) nounwind {
; RV32IFD-LABEL: test_round_si64:
; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: addi sp, sp, -16
-; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: addi sp, sp, -32
+; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: call round
; RV32IFD-NEXT: lui a0, %hi(.LCPI13_0)
; RV32IFD-NEXT: fld fa5, %lo(.LCPI13_0)(a0)
+; RV32IFD-NEXT: lui a0, %hi(.LCPI13_1)
+; RV32IFD-NEXT: fld fa4, %lo(.LCPI13_1)(a0)
; RV32IFD-NEXT: fmv.d fs0, fa0
-; RV32IFD-NEXT: fle.d s0, fa5, fa0
+; RV32IFD-NEXT: flt.d s0, fa5, fa0
+; RV32IFD-NEXT: neg s1, s0
+; RV32IFD-NEXT: fle.d s2, fa4, fa0
+; RV32IFD-NEXT: neg s3, s2
; RV32IFD-NEXT: call __fixdfdi
+; RV32IFD-NEXT: and a0, s3, a0
+; RV32IFD-NEXT: or a0, s1, a0
+; RV32IFD-NEXT: feq.d a2, fs0, fs0
+; RV32IFD-NEXT: neg a2, a2
; RV32IFD-NEXT: lui a4, 524288
-; RV32IFD-NEXT: lui a2, 524288
-; RV32IFD-NEXT: beqz s0, .LBB13_2
+; RV32IFD-NEXT: li a5, 1
+; RV32IFD-NEXT: lui a3, 524288
+; RV32IFD-NEXT: bne s2, a5, .LBB13_2
; RV32IFD-NEXT: # %bb.1:
-; RV32IFD-NEXT: mv a2, a1
+; RV32IFD-NEXT: mv a3, a1
; RV32IFD-NEXT: .LBB13_2:
-; RV32IFD-NEXT: lui a1, %hi(.LCPI13_1)
-; RV32IFD-NEXT: fld fa5, %lo(.LCPI13_1)(a1)
-; RV32IFD-NEXT: flt.d a3, fa5, fs0
-; RV32IFD-NEXT: beqz a3, .LBB13_4
+; RV32IFD-NEXT: and a0, a2, a0
+; RV32IFD-NEXT: beqz s0, .LBB13_4
; RV32IFD-NEXT: # %bb.3:
-; RV32IFD-NEXT: addi a2, a4, -1
+; RV32IFD-NEXT: addi a3, a4, -1
; RV32IFD-NEXT: .LBB13_4:
-; RV32IFD-NEXT: feq.d a1, fs0, fs0
-; RV32IFD-NEXT: neg a4, a1
-; RV32IFD-NEXT: and a1, a4, a2
-; RV32IFD-NEXT: neg a2, a3
-; RV32IFD-NEXT: neg a3, s0
-; RV32IFD-NEXT: and a0, a3, a0
-; RV32IFD-NEXT: or a0, a2, a0
-; RV32IFD-NEXT: and a0, a4, a0
-; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: and a1, a2, a3
+; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: test_round_si64:
@@ -899,44 +936,47 @@ define i64 @test_round_si64(double %x) nounwind {
; RV32IZFINXZDINX-NEXT: addi sp, sp, -32
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: call round
-; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
-; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
-; RV32IZFINXZDINX-NEXT: lw s2, 8(sp)
-; RV32IZFINXZDINX-NEXT: lw s3, 12(sp)
+; RV32IZFINXZDINX-NEXT: sw a0, 0(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 4(sp)
+; RV32IZFINXZDINX-NEXT: lw s0, 0(sp)
+; RV32IZFINXZDINX-NEXT: lw s1, 4(sp)
; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI13_0)
; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI13_0+4)(a2)
; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI13_0)(a2)
-; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2
+; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0
+; RV32IZFINXZDINX-NEXT: neg s3, s2
; RV32IZFINXZDINX-NEXT: call __fixdfdi
+; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI13_1)
+; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI13_1+4)(a2)
+; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI13_1)(a2)
+; RV32IZFINXZDINX-NEXT: and a0, s3, a0
+; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a3
+; RV32IZFINXZDINX-NEXT: or a0, a2, a0
+; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a2
+; RV32IZFINXZDINX-NEXT: lui a5, 524288
+; RV32IZFINXZDINX-NEXT: li a6, 1
; RV32IZFINXZDINX-NEXT: lui a4, 524288
-; RV32IZFINXZDINX-NEXT: lui a2, 524288
-; RV32IZFINXZDINX-NEXT: beqz s0, .LBB13_2
+; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB13_2
; RV32IZFINXZDINX-NEXT: # %bb.1:
-; RV32IZFINXZDINX-NEXT: mv a2, a1
+; RV32IZFINXZDINX-NEXT: mv a4, a1
; RV32IZFINXZDINX-NEXT: .LBB13_2:
-; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI13_1)
-; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI13_1)(a1)
-; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI13_1+4)(a1)
-; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2
+; RV32IZFINXZDINX-NEXT: and a0, a2, a0
; RV32IZFINXZDINX-NEXT: beqz a3, .LBB13_4
; RV32IZFINXZDINX-NEXT: # %bb.3:
-; RV32IZFINXZDINX-NEXT: addi a2, a4, -1
+; RV32IZFINXZDINX-NEXT: addi a4, a5, -1
; RV32IZFINXZDINX-NEXT: .LBB13_4:
-; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2
-; RV32IZFINXZDINX-NEXT: neg a4, a1
-; RV32IZFINXZDINX-NEXT: and a1, a4, a2
-; RV32IZFINXZDINX-NEXT: neg a2, s0
-; RV32IZFINXZDINX-NEXT: and a0, a2, a0
-; RV32IZFINXZDINX-NEXT: neg a2, a3
-; RV32IZFINXZDINX-NEXT: or a0, a2, a0
-; RV32IZFINXZDINX-NEXT: and a0, a4, a0
+; RV32IZFINXZDINX-NEXT: and a1, a2, a4
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
; RV32IZFINXZDINX-NEXT: ret
;
@@ -1006,7 +1046,8 @@ define i64 @test_round_ui64(double %x) nounwind {
; RV32IFD-NEXT: neg s0, a0
; RV32IFD-NEXT: fcvt.d.w fa5, zero
; RV32IFD-NEXT: fle.d a0, fa5, fa0
-; RV32IFD-NEXT: neg s1, a0
+; RV32IFD-NEXT: xori a0, a0, 1
+; RV32IFD-NEXT: addi s1, a0, -1
; RV32IFD-NEXT: call __fixunsdfdi
; RV32IFD-NEXT: and a0, s1, a0
; RV32IFD-NEXT: or a0, s0, a0
@@ -1033,29 +1074,28 @@ define i64 @test_round_ui64(double %x) nounwind {
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: call round
; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
; RV32IZFINXZDINX-NEXT: lw s0, 8(sp)
; RV32IZFINXZDINX-NEXT: lw s1, 12(sp)
+; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero
; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0
-; RV32IZFINXZDINX-NEXT: neg s2, a2
-; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
-; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI15_0)
-; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI15_0+4)(a2)
-; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI15_0)(a2)
-; RV32IZFINXZDINX-NEXT: and a0, s2, a0
-; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0
-; RV32IZFINXZDINX-NEXT: neg a2, a2
-; RV32IZFINXZDINX-NEXT: or a0, a2, a0
-; RV32IZFINXZDINX-NEXT: and a1, s2, a1
-; RV32IZFINXZDINX-NEXT: or a1, a2, a1
+; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI15_0)
+; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI15_0)(a3)
+; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI15_0+4)(a3)
+; RV32IZFINXZDINX-NEXT: xori a2, a2, 1
+; RV32IZFINXZDINX-NEXT: addi a2, a2, -1
+; RV32IZFINXZDINX-NEXT: and a0, a2, a0
+; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0
+; RV32IZFINXZDINX-NEXT: neg a3, a3
+; RV32IZFINXZDINX-NEXT: or a0, a3, a0
+; RV32IZFINXZDINX-NEXT: and a1, a2, a1
+; RV32IZFINXZDINX-NEXT: or a1, a3, a1
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
; RV32IZFINXZDINX-NEXT: ret
;
@@ -1114,41 +1154,48 @@ define signext i32 @test_roundeven_si32(double %x) {
define i64 @test_roundeven_si64(double %x) nounwind {
; RV32IFD-LABEL: test_roundeven_si64:
; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: addi sp, sp, -16
-; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: addi sp, sp, -32
+; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: call roundeven
; RV32IFD-NEXT: lui a0, %hi(.LCPI17_0)
; RV32IFD-NEXT: fld fa5, %lo(.LCPI17_0)(a0)
+; RV32IFD-NEXT: lui a0, %hi(.LCPI17_1)
+; RV32IFD-NEXT: fld fa4, %lo(.LCPI17_1)(a0)
; RV32IFD-NEXT: fmv.d fs0, fa0
-; RV32IFD-NEXT: fle.d s0, fa5, fa0
+; RV32IFD-NEXT: flt.d s0, fa5, fa0
+; RV32IFD-NEXT: neg s1, s0
+; RV32IFD-NEXT: fle.d s2, fa4, fa0
+; RV32IFD-NEXT: neg s3, s2
; RV32IFD-NEXT: call __fixdfdi
+; RV32IFD-NEXT: and a0, s3, a0
+; RV32IFD-NEXT: or a0, s1, a0
+; RV32IFD-NEXT: feq.d a2, fs0, fs0
+; RV32IFD-NEXT: neg a2, a2
; RV32IFD-NEXT: lui a4, 524288
-; RV32IFD-NEXT: lui a2, 524288
-; RV32IFD-NEXT: beqz s0, .LBB17_2
+; RV32IFD-NEXT: li a5, 1
+; RV32IFD-NEXT: lui a3, 524288
+; RV32IFD-NEXT: bne s2, a5, .LBB17_2
; RV32IFD-NEXT: # %bb.1:
-; RV32IFD-NEXT: mv a2, a1
+; RV32IFD-NEXT: mv a3, a1
; RV32IFD-NEXT: .LBB17_2:
-; RV32IFD-NEXT: lui a1, %hi(.LCPI17_1)
-; RV32IFD-NEXT: fld fa5, %lo(.LCPI17_1)(a1)
-; RV32IFD-NEXT: flt.d a3, fa5, fs0
-; RV32IFD-NEXT: beqz a3, .LBB17_4
+; RV32IFD-NEXT: and a0, a2, a0
+; RV32IFD-NEXT: beqz s0, .LBB17_4
; RV32IFD-NEXT: # %bb.3:
-; RV32IFD-NEXT: addi a2, a4, -1
+; RV32IFD-NEXT: addi a3, a4, -1
; RV32IFD-NEXT: .LBB17_4:
-; RV32IFD-NEXT: feq.d a1, fs0, fs0
-; RV32IFD-NEXT: neg a4, a1
-; RV32IFD-NEXT: and a1, a4, a2
-; RV32IFD-NEXT: neg a2, a3
-; RV32IFD-NEXT: neg a3, s0
-; RV32IFD-NEXT: and a0, a3, a0
-; RV32IFD-NEXT: or a0, a2, a0
-; RV32IFD-NEXT: and a0, a4, a0
-; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: and a1, a2, a3
+; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: test_roundeven_si64:
@@ -1165,44 +1212,47 @@ define i64 @test_roundeven_si64(double %x) nounwind {
; RV32IZFINXZDINX-NEXT: addi sp, sp, -32
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: call roundeven
-; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
-; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
-; RV32IZFINXZDINX-NEXT: lw s2, 8(sp)
-; RV32IZFINXZDINX-NEXT: lw s3, 12(sp)
+; RV32IZFINXZDINX-NEXT: sw a0, 0(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 4(sp)
+; RV32IZFINXZDINX-NEXT: lw s0, 0(sp)
+; RV32IZFINXZDINX-NEXT: lw s1, 4(sp)
; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI17_0)
; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI17_0+4)(a2)
; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI17_0)(a2)
-; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2
+; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0
+; RV32IZFINXZDINX-NEXT: neg s3, s2
; RV32IZFINXZDINX-NEXT: call __fixdfdi
+; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI17_1)
+; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI17_1+4)(a2)
+; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI17_1)(a2)
+; RV32IZFINXZDINX-NEXT: and a0, s3, a0
+; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a3
+; RV32IZFINXZDINX-NEXT: or a0, a2, a0
+; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a2
+; RV32IZFINXZDINX-NEXT: lui a5, 524288
+; RV32IZFINXZDINX-NEXT: li a6, 1
; RV32IZFINXZDINX-NEXT: lui a4, 524288
-; RV32IZFINXZDINX-NEXT: lui a2, 524288
-; RV32IZFINXZDINX-NEXT: beqz s0, .LBB17_2
+; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB17_2
; RV32IZFINXZDINX-NEXT: # %bb.1:
-; RV32IZFINXZDINX-NEXT: mv a2, a1
+; RV32IZFINXZDINX-NEXT: mv a4, a1
; RV32IZFINXZDINX-NEXT: .LBB17_2:
-; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI17_1)
-; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI17_1)(a1)
-; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI17_1+4)(a1)
-; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2
+; RV32IZFINXZDINX-NEXT: and a0, a2, a0
; RV32IZFINXZDINX-NEXT: beqz a3, .LBB17_4
; RV32IZFINXZDINX-NEXT: # %bb.3:
-; RV32IZFINXZDINX-NEXT: addi a2, a4, -1
+; RV32IZFINXZDINX-NEXT: addi a4, a5, -1
; RV32IZFINXZDINX-NEXT: .LBB17_4:
-; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2
-; RV32IZFINXZDINX-NEXT: neg a4, a1
-; RV32IZFINXZDINX-NEXT: and a1, a4, a2
-; RV32IZFINXZDINX-NEXT: neg a2, s0
-; RV32IZFINXZDINX-NEXT: and a0, a2, a0
-; RV32IZFINXZDINX-NEXT: neg a2, a3
-; RV32IZFINXZDINX-NEXT: or a0, a2, a0
-; RV32IZFINXZDINX-NEXT: and a0, a4, a0
+; RV32IZFINXZDINX-NEXT: and a1, a2, a4
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
; RV32IZFINXZDINX-NEXT: ret
;
@@ -1272,7 +1322,8 @@ define i64 @test_roundeven_ui64(double %x) nounwind {
; RV32IFD-NEXT: neg s0, a0
; RV32IFD-NEXT: fcvt.d.w fa5, zero
; RV32IFD-NEXT: fle.d a0, fa5, fa0
-; RV32IFD-NEXT: neg s1, a0
+; RV32IFD-NEXT: xori a0, a0, 1
+; RV32IFD-NEXT: addi s1, a0, -1
; RV32IFD-NEXT: call __fixunsdfdi
; RV32IFD-NEXT: and a0, s1, a0
; RV32IFD-NEXT: or a0, s0, a0
@@ -1299,29 +1350,28 @@ define i64 @test_roundeven_ui64(double %x) nounwind {
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: call roundeven
; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
; RV32IZFINXZDINX-NEXT: lw s0, 8(sp)
; RV32IZFINXZDINX-NEXT: lw s1, 12(sp)
+; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero
; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0
-; RV32IZFINXZDINX-NEXT: neg s2, a2
-; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
-; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI19_0)
-; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI19_0+4)(a2)
-; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI19_0)(a2)
-; RV32IZFINXZDINX-NEXT: and a0, s2, a0
-; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0
-; RV32IZFINXZDINX-NEXT: neg a2, a2
-; RV32IZFINXZDINX-NEXT: or a0, a2, a0
-; RV32IZFINXZDINX-NEXT: and a1, s2, a1
-; RV32IZFINXZDINX-NEXT: or a1, a2, a1
+; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI19_0)
+; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI19_0)(a3)
+; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI19_0+4)(a3)
+; RV32IZFINXZDINX-NEXT: xori a2, a2, 1
+; RV32IZFINXZDINX-NEXT: addi a2, a2, -1
+; RV32IZFINXZDINX-NEXT: and a0, a2, a0
+; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0
+; RV32IZFINXZDINX-NEXT: neg a3, a3
+; RV32IZFINXZDINX-NEXT: or a0, a3, a0
+; RV32IZFINXZDINX-NEXT: and a1, a2, a1
+; RV32IZFINXZDINX-NEXT: or a1, a3, a1
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
; RV32IZFINXZDINX-NEXT: ret
;
@@ -1380,41 +1430,48 @@ define signext i32 @test_rint_si32(double %x) {
define i64 @test_rint_si64(double %x) nounwind {
; RV32IFD-LABEL: test_rint_si64:
; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: addi sp, sp, -16
-; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: addi sp, sp, -32
+; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: call rint
; RV32IFD-NEXT: lui a0, %hi(.LCPI21_0)
; RV32IFD-NEXT: fld fa5, %lo(.LCPI21_0)(a0)
+; RV32IFD-NEXT: lui a0, %hi(.LCPI21_1)
+; RV32IFD-NEXT: fld fa4, %lo(.LCPI21_1)(a0)
; RV32IFD-NEXT: fmv.d fs0, fa0
-; RV32IFD-NEXT: fle.d s0, fa5, fa0
+; RV32IFD-NEXT: flt.d s0, fa5, fa0
+; RV32IFD-NEXT: neg s1, s0
+; RV32IFD-NEXT: fle.d s2, fa4, fa0
+; RV32IFD-NEXT: neg s3, s2
; RV32IFD-NEXT: call __fixdfdi
+; RV32IFD-NEXT: and a0, s3, a0
+; RV32IFD-NEXT: or a0, s1, a0
+; RV32IFD-NEXT: feq.d a2, fs0, fs0
+; RV32IFD-NEXT: neg a2, a2
; RV32IFD-NEXT: lui a4, 524288
-; RV32IFD-NEXT: lui a2, 524288
-; RV32IFD-NEXT: beqz s0, .LBB21_2
+; RV32IFD-NEXT: li a5, 1
+; RV32IFD-NEXT: lui a3, 524288
+; RV32IFD-NEXT: bne s2, a5, .LBB21_2
; RV32IFD-NEXT: # %bb.1:
-; RV32IFD-NEXT: mv a2, a1
+; RV32IFD-NEXT: mv a3, a1
; RV32IFD-NEXT: .LBB21_2:
-; RV32IFD-NEXT: lui a1, %hi(.LCPI21_1)
-; RV32IFD-NEXT: fld fa5, %lo(.LCPI21_1)(a1)
-; RV32IFD-NEXT: flt.d a3, fa5, fs0
-; RV32IFD-NEXT: beqz a3, .LBB21_4
+; RV32IFD-NEXT: and a0, a2, a0
+; RV32IFD-NEXT: beqz s0, .LBB21_4
; RV32IFD-NEXT: # %bb.3:
-; RV32IFD-NEXT: addi a2, a4, -1
+; RV32IFD-NEXT: addi a3, a4, -1
; RV32IFD-NEXT: .LBB21_4:
-; RV32IFD-NEXT: feq.d a1, fs0, fs0
-; RV32IFD-NEXT: neg a4, a1
-; RV32IFD-NEXT: and a1, a4, a2
-; RV32IFD-NEXT: neg a2, a3
-; RV32IFD-NEXT: neg a3, s0
-; RV32IFD-NEXT: and a0, a3, a0
-; RV32IFD-NEXT: or a0, a2, a0
-; RV32IFD-NEXT: and a0, a4, a0
-; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: and a1, a2, a3
+; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: test_rint_si64:
@@ -1431,44 +1488,47 @@ define i64 @test_rint_si64(double %x) nounwind {
; RV32IZFINXZDINX-NEXT: addi sp, sp, -32
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s3, 16(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: call rint
-; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
-; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
-; RV32IZFINXZDINX-NEXT: lw s2, 8(sp)
-; RV32IZFINXZDINX-NEXT: lw s3, 12(sp)
+; RV32IZFINXZDINX-NEXT: sw a0, 0(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 4(sp)
+; RV32IZFINXZDINX-NEXT: lw s0, 0(sp)
+; RV32IZFINXZDINX-NEXT: lw s1, 4(sp)
; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI21_0)
; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI21_0+4)(a2)
; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI21_0)(a2)
-; RV32IZFINXZDINX-NEXT: fle.d s0, a2, s2
+; RV32IZFINXZDINX-NEXT: fle.d s2, a2, s0
+; RV32IZFINXZDINX-NEXT: neg s3, s2
; RV32IZFINXZDINX-NEXT: call __fixdfdi
+; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI21_1)
+; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI21_1+4)(a2)
+; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI21_1)(a2)
+; RV32IZFINXZDINX-NEXT: and a0, s3, a0
+; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a3
+; RV32IZFINXZDINX-NEXT: or a0, a2, a0
+; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0
+; RV32IZFINXZDINX-NEXT: neg a2, a2
+; RV32IZFINXZDINX-NEXT: lui a5, 524288
+; RV32IZFINXZDINX-NEXT: li a6, 1
; RV32IZFINXZDINX-NEXT: lui a4, 524288
-; RV32IZFINXZDINX-NEXT: lui a2, 524288
-; RV32IZFINXZDINX-NEXT: beqz s0, .LBB21_2
+; RV32IZFINXZDINX-NEXT: bne s2, a6, .LBB21_2
; RV32IZFINXZDINX-NEXT: # %bb.1:
-; RV32IZFINXZDINX-NEXT: mv a2, a1
+; RV32IZFINXZDINX-NEXT: mv a4, a1
; RV32IZFINXZDINX-NEXT: .LBB21_2:
-; RV32IZFINXZDINX-NEXT: lui a1, %hi(.LCPI21_1)
-; RV32IZFINXZDINX-NEXT: lw a6, %lo(.LCPI21_1)(a1)
-; RV32IZFINXZDINX-NEXT: lw a7, %lo(.LCPI21_1+4)(a1)
-; RV32IZFINXZDINX-NEXT: flt.d a3, a6, s2
+; RV32IZFINXZDINX-NEXT: and a0, a2, a0
; RV32IZFINXZDINX-NEXT: beqz a3, .LBB21_4
; RV32IZFINXZDINX-NEXT: # %bb.3:
-; RV32IZFINXZDINX-NEXT: addi a2, a4, -1
+; RV32IZFINXZDINX-NEXT: addi a4, a5, -1
; RV32IZFINXZDINX-NEXT: .LBB21_4:
-; RV32IZFINXZDINX-NEXT: feq.d a1, s2, s2
-; RV32IZFINXZDINX-NEXT: neg a4, a1
-; RV32IZFINXZDINX-NEXT: and a1, a4, a2
-; RV32IZFINXZDINX-NEXT: neg a2, s0
-; RV32IZFINXZDINX-NEXT: and a0, a2, a0
-; RV32IZFINXZDINX-NEXT: neg a2, a3
-; RV32IZFINXZDINX-NEXT: or a0, a2, a0
-; RV32IZFINXZDINX-NEXT: and a0, a4, a0
+; RV32IZFINXZDINX-NEXT: and a1, a2, a4
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
; RV32IZFINXZDINX-NEXT: ret
;
@@ -1538,7 +1598,8 @@ define i64 @test_rint_ui64(double %x) nounwind {
; RV32IFD-NEXT: neg s0, a0
; RV32IFD-NEXT: fcvt.d.w fa5, zero
; RV32IFD-NEXT: fle.d a0, fa5, fa0
-; RV32IFD-NEXT: neg s1, a0
+; RV32IFD-NEXT: xori a0, a0, 1
+; RV32IFD-NEXT: addi s1, a0, -1
; RV32IFD-NEXT: call __fixunsdfdi
; RV32IFD-NEXT: and a0, s1, a0
; RV32IFD-NEXT: or a0, s0, a0
@@ -1565,29 +1626,28 @@ define i64 @test_rint_ui64(double %x) nounwind {
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: call rint
; RV32IZFINXZDINX-NEXT: sw a0, 8(sp)
; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
; RV32IZFINXZDINX-NEXT: lw s0, 8(sp)
; RV32IZFINXZDINX-NEXT: lw s1, 12(sp)
+; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero
; RV32IZFINXZDINX-NEXT: fle.d a2, a2, s0
-; RV32IZFINXZDINX-NEXT: neg s2, a2
-; RV32IZFINXZDINX-NEXT: call __fixunsdfdi
-; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI23_0)
-; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI23_0+4)(a2)
-; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI23_0)(a2)
-; RV32IZFINXZDINX-NEXT: and a0, s2, a0
-; RV32IZFINXZDINX-NEXT: flt.d a2, a2, s0
-; RV32IZFINXZDINX-NEXT: neg a2, a2
-; RV32IZFINXZDINX-NEXT: or a0, a2, a0
-; RV32IZFINXZDINX-NEXT: and a1, s2, a1
-; RV32IZFINXZDINX-NEXT: or a1, a2, a1
+; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI23_0)
+; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI23_0)(a3)
+; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI23_0+4)(a3)
+; RV32IZFINXZDINX-NEXT: xori a2, a2, 1
+; RV32IZFINXZDINX-NEXT: addi a2, a2, -1
+; RV32IZFINXZDINX-NEXT: and a0, a2, a0
+; RV32IZFINXZDINX-NEXT: flt.d a3, a4, s0
+; RV32IZFINXZDINX-NEXT: neg a3, a3
+; RV32IZFINXZDINX-NEXT: or a0, a3, a0
+; RV32IZFINXZDINX-NEXT: and a1, a2, a1
+; RV32IZFINXZDINX-NEXT: or a1, a3, a1
; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
; RV32IZFINXZDINX-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll
index f1e444b5b624b4..1a0e4e18291158 100644
--- a/llvm/test/CodeGen/RISCV/float-convert.ll
+++ b/llvm/test/CodeGen/RISCV/float-convert.ll
@@ -275,24 +275,26 @@ define i32 @fcvt_wu_s_sat(float %a) nounwind {
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: lui a1, 325632
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: call __gtsf2
+; RV32I-NEXT: sgtz a0, a0
+; RV32I-NEXT: neg s1, a0
+; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __gesf2
; RV32I-NEXT: slti a0, a0, 0
-; RV32I-NEXT: addi s1, a0, -1
+; RV32I-NEXT: addi s2, a0, -1
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __fixunssfsi
-; RV32I-NEXT: and s1, s1, a0
-; RV32I-NEXT: lui a1, 325632
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __gtsf2
-; RV32I-NEXT: sgtz a0, a0
-; RV32I-NEXT: neg a0, a0
-; RV32I-NEXT: or a0, a0, s1
+; RV32I-NEXT: and a0, s2, a0
+; RV32I-NEXT: or a0, s1, a0
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
@@ -613,40 +615,47 @@ define i64 @fcvt_l_s(float %a) nounwind {
define i64 @fcvt_l_s_sat(float %a) nounwind {
; RV32IF-LABEL: fcvt_l_s_sat:
; RV32IF: # %bb.0: # %start
-; RV32IF-NEXT: addi sp, sp, -16
-; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: addi sp, sp, -32
+; RV32IF-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: lui a0, %hi(.LCPI12_0)
+; RV32IF-NEXT: flw fa5, %lo(.LCPI12_0)(a0)
; RV32IF-NEXT: fmv.s fs0, fa0
+; RV32IF-NEXT: flt.s s0, fa5, fa0
+; RV32IF-NEXT: neg s1, s0
; RV32IF-NEXT: lui a0, 913408
; RV32IF-NEXT: fmv.w.x fa5, a0
-; RV32IF-NEXT: fle.s s0, fa5, fa0
+; RV32IF-NEXT: fle.s s2, fa5, fa0
+; RV32IF-NEXT: neg s3, s2
; RV32IF-NEXT: call __fixsfdi
+; RV32IF-NEXT: and a0, s3, a0
+; RV32IF-NEXT: or a0, s1, a0
+; RV32IF-NEXT: feq.s a2, fs0, fs0
+; RV32IF-NEXT: neg a2, a2
; RV32IF-NEXT: lui a4, 524288
-; RV32IF-NEXT: lui a2, 524288
-; RV32IF-NEXT: beqz s0, .LBB12_2
+; RV32IF-NEXT: li a5, 1
+; RV32IF-NEXT: lui a3, 524288
+; RV32IF-NEXT: bne s2, a5, .LBB12_2
; RV32IF-NEXT: # %bb.1: # %start
-; RV32IF-NEXT: mv a2, a1
+; RV32IF-NEXT: mv a3, a1
; RV32IF-NEXT: .LBB12_2: # %start
-; RV32IF-NEXT: lui a1, %hi(.LCPI12_0)
-; RV32IF-NEXT: flw fa5, %lo(.LCPI12_0)(a1)
-; RV32IF-NEXT: flt.s a3, fa5, fs0
-; RV32IF-NEXT: beqz a3, .LBB12_4
+; RV32IF-NEXT: and a0, a2, a0
+; RV32IF-NEXT: beqz s0, .LBB12_4
; RV32IF-NEXT: # %bb.3:
-; RV32IF-NEXT: addi a2, a4, -1
+; RV32IF-NEXT: addi a3, a4, -1
; RV32IF-NEXT: .LBB12_4: # %start
-; RV32IF-NEXT: feq.s a1, fs0, fs0
-; RV32IF-NEXT: neg a4, a1
-; RV32IF-NEXT: and a1, a4, a2
-; RV32IF-NEXT: neg a2, a3
-; RV32IF-NEXT: neg a3, s0
-; RV32IF-NEXT: and a0, a3, a0
-; RV32IF-NEXT: or a0, a2, a0
-; RV32IF-NEXT: and a0, a4, a0
-; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: and a1, a2, a3
+; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 32
; RV32IF-NEXT: ret
;
; RV64IF-LABEL: fcvt_l_s_sat:
@@ -664,35 +673,38 @@ define i64 @fcvt_l_s_sat(float %a) nounwind {
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IZFINX-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: mv s0, a0
; RV32IZFINX-NEXT: lui a0, 913408
; RV32IZFINX-NEXT: fle.s s1, a0, s0
+; RV32IZFINX-NEXT: neg s2, s1
; RV32IZFINX-NEXT: mv a0, s0
; RV32IZFINX-NEXT: call __fixsfdi
+; RV32IZFINX-NEXT: lui a2, %hi(.LCPI12_0)
+; RV32IZFINX-NEXT: lw a2, %lo(.LCPI12_0)(a2)
+; RV32IZFINX-NEXT: and a0, s2, a0
+; RV32IZFINX-NEXT: flt.s a3, a2, s0
+; RV32IZFINX-NEXT: neg a2, a3
+; RV32IZFINX-NEXT: or a0, a2, a0
+; RV32IZFINX-NEXT: feq.s a2, s0, s0
+; RV32IZFINX-NEXT: neg a2, a2
+; RV32IZFINX-NEXT: lui a5, 524288
+; RV32IZFINX-NEXT: li a6, 1
; RV32IZFINX-NEXT: lui a4, 524288
-; RV32IZFINX-NEXT: lui a2, 524288
-; RV32IZFINX-NEXT: beqz s1, .LBB12_2
+; RV32IZFINX-NEXT: bne s1, a6, .LBB12_2
; RV32IZFINX-NEXT: # %bb.1: # %start
-; RV32IZFINX-NEXT: mv a2, a1
+; RV32IZFINX-NEXT: mv a4, a1
; RV32IZFINX-NEXT: .LBB12_2: # %start
-; RV32IZFINX-NEXT: lui a1, %hi(.LCPI12_0)
-; RV32IZFINX-NEXT: lw a1, %lo(.LCPI12_0)(a1)
-; RV32IZFINX-NEXT: flt.s a3, a1, s0
+; RV32IZFINX-NEXT: and a0, a2, a0
; RV32IZFINX-NEXT: beqz a3, .LBB12_4
; RV32IZFINX-NEXT: # %bb.3:
-; RV32IZFINX-NEXT: addi a2, a4, -1
+; RV32IZFINX-NEXT: addi a4, a5, -1
; RV32IZFINX-NEXT: .LBB12_4: # %start
-; RV32IZFINX-NEXT: feq.s a1, s0, s0
-; RV32IZFINX-NEXT: neg a4, a1
-; RV32IZFINX-NEXT: and a1, a4, a2
-; RV32IZFINX-NEXT: neg a2, s1
-; RV32IZFINX-NEXT: and a0, a2, a0
-; RV32IZFINX-NEXT: neg a2, a3
-; RV32IZFINX-NEXT: or a0, a2, a0
-; RV32IZFINX-NEXT: and a0, a4, a0
+; RV32IZFINX-NEXT: and a1, a2, a4
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZFINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: addi sp, sp, 16
; RV32IZFINX-NEXT: ret
;
@@ -863,23 +875,23 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: fmv.s fs0, fa0
+; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: lui a0, %hi(.LCPI14_0)
+; RV32IF-NEXT: flw fa5, %lo(.LCPI14_0)(a0)
+; RV32IF-NEXT: flt.s a0, fa5, fa0
+; RV32IF-NEXT: neg s0, a0
; RV32IF-NEXT: fmv.w.x fa5, zero
; RV32IF-NEXT: fle.s a0, fa5, fa0
-; RV32IF-NEXT: neg s0, a0
+; RV32IF-NEXT: xori a0, a0, 1
+; RV32IF-NEXT: addi s1, a0, -1
; RV32IF-NEXT: call __fixunssfdi
-; RV32IF-NEXT: lui a2, %hi(.LCPI14_0)
-; RV32IF-NEXT: flw fa5, %lo(.LCPI14_0)(a2)
-; RV32IF-NEXT: and a0, s0, a0
-; RV32IF-NEXT: flt.s a2, fa5, fs0
-; RV32IF-NEXT: neg a2, a2
-; RV32IF-NEXT: or a0, a2, a0
-; RV32IF-NEXT: and a1, s0, a1
-; RV32IF-NEXT: or a1, a2, a1
+; RV32IF-NEXT: and a0, s1, a0
+; RV32IF-NEXT: or a0, s0, a0
+; RV32IF-NEXT: and a1, s1, a1
+; RV32IF-NEXT: or a1, s0, a1
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
;
@@ -898,19 +910,18 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT: mv s0, a0
-; RV32IZFINX-NEXT: fle.s a0, zero, a0
-; RV32IZFINX-NEXT: neg s1, a0
-; RV32IZFINX-NEXT: mv a0, s0
+; RV32IZFINX-NEXT: lui a1, %hi(.LCPI14_0)
+; RV32IZFINX-NEXT: lw a1, %lo(.LCPI14_0)(a1)
+; RV32IZFINX-NEXT: flt.s a1, a1, a0
+; RV32IZFINX-NEXT: neg s0, a1
+; RV32IZFINX-NEXT: fle.s a1, zero, a0
+; RV32IZFINX-NEXT: xori a1, a1, 1
+; RV32IZFINX-NEXT: addi s1, a1, -1
; RV32IZFINX-NEXT: call __fixunssfdi
-; RV32IZFINX-NEXT: lui a2, %hi(.LCPI14_0)
-; RV32IZFINX-NEXT: lw a2, %lo(.LCPI14_0)(a2)
; RV32IZFINX-NEXT: and a0, s1, a0
-; RV32IZFINX-NEXT: flt.s a2, a2, s0
-; RV32IZFINX-NEXT: neg a2, a2
-; RV32IZFINX-NEXT: or a0, a2, a0
+; RV32IZFINX-NEXT: or a0, s0, a0
; RV32IZFINX-NEXT: and a1, s1, a1
-; RV32IZFINX-NEXT: or a1, a2, a1
+; RV32IZFINX-NEXT: or a1, s0, a1
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
@@ -928,36 +939,33 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
;
; RV32I-LABEL: fcvt_lu_s_sat:
; RV32I: # %bb.0: # %start
-; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: lui a1, 391168
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: call __gtsf2
+; RV32I-NEXT: sgtz a0, a0
+; RV32I-NEXT: neg s1, a0
+; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __gesf2
; RV32I-NEXT: slti a0, a0, 0
; RV32I-NEXT: addi s2, a0, -1
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __fixunssfdi
-; RV32I-NEXT: mv s1, a1
-; RV32I-NEXT: and s3, s2, a0
-; RV32I-NEXT: lui a1, 391168
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __gtsf2
-; RV32I-NEXT: sgtz a0, a0
-; RV32I-NEXT: neg a1, a0
-; RV32I-NEXT: or a0, a1, s3
-; RV32I-NEXT: and a2, s2, s1
-; RV32I-NEXT: or a1, a1, a2
-; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: addi sp, sp, 32
+; RV32I-NEXT: and a0, s2, a0
+; RV32I-NEXT: or a0, s1, a0
+; RV32I-NEXT: and a1, s2, a1
+; RV32I-NEXT: or a1, s1, a1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV64I-LABEL: fcvt_lu_s_sat:
@@ -966,24 +974,26 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: lui a1, 391168
+; RV64I-NEXT: addiw a1, a1, -1
+; RV64I-NEXT: call __gtsf2
+; RV64I-NEXT: sgtz a0, a0
+; RV64I-NEXT: neg s1, a0
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __gesf2
; RV64I-NEXT: slti a0, a0, 0
-; RV64I-NEXT: addi s1, a0, -1
+; RV64I-NEXT: addi s2, a0, -1
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __fixunssfdi
-; RV64I-NEXT: and s1, s1, a0
-; RV64I-NEXT: lui a1, 391168
-; RV64I-NEXT: addiw a1, a1, -1
-; RV64I-NEXT: mv a0, s0
-; RV64I-NEXT: call __gtsf2
-; RV64I-NEXT: sgtz a0, a0
-; RV64I-NEXT: neg a0, a0
-; RV64I-NEXT: or a0, a0, s1
+; RV64I-NEXT: and a0, s2, a0
+; RV64I-NEXT: or a0, s1, a0
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
start:
@@ -2089,24 +2099,26 @@ define zeroext i32 @fcvt_wu_s_sat_zext(float %a) nounwind {
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: lui a1, 325632
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: call __gtsf2
+; RV32I-NEXT: sgtz a0, a0
+; RV32I-NEXT: neg s1, a0
+; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __gesf2
; RV32I-NEXT: slti a0, a0, 0
-; RV32I-NEXT: addi s1, a0, -1
+; RV32I-NEXT: addi s2, a0, -1
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __fixunssfsi
-; RV32I-NEXT: and s1, s1, a0
-; RV32I-NEXT: lui a1, 325632
-; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: call __gtsf2
-; RV32I-NEXT: sgtz a0, a0
-; RV32I-NEXT: neg a0, a0
-; RV32I-NEXT: or a0, a0, s1
+; RV32I-NEXT: and a0, s2, a0
+; RV32I-NEXT: or a0, s1, a0
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
index 5e99c7eb905628..f91aac11876d41 100644
--- a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
+++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
@@ -37,7 +37,8 @@ define i64 @test_floor_si64(float %x) nounwind {
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: lui a0, 307200
; RV32IF-NEXT: fmv.w.x fa5, a0
@@ -52,32 +53,34 @@ define i64 @test_floor_si64(float %x) nounwind {
; RV32IF-NEXT: lui a0, 913408
; RV32IF-NEXT: fmv.w.x fa5, a0
; RV32IF-NEXT: fle.s s0, fa5, fs0
+; RV32IF-NEXT: neg s1, s0
; RV32IF-NEXT: fmv.s fa0, fs0
; RV32IF-NEXT: call __fixsfdi
+; RV32IF-NEXT: lui a2, %hi(.LCPI1_0)
+; RV32IF-NEXT: flw fa5, %lo(.LCPI1_0)(a2)
+; RV32IF-NEXT: and a0, s1, a0
+; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: neg a2, a3
+; RV32IF-NEXT: or a0, a2, a0
+; RV32IF-NEXT: feq.s a2, fs0, fs0
+; RV32IF-NEXT: neg a2, a2
+; RV32IF-NEXT: lui a5, 524288
+; RV32IF-NEXT: li a6, 1
; RV32IF-NEXT: lui a4, 524288
-; RV32IF-NEXT: lui a2, 524288
-; RV32IF-NEXT: beqz s0, .LBB1_4
+; RV32IF-NEXT: bne s0, a6, .LBB1_4
; RV32IF-NEXT: # %bb.3:
-; RV32IF-NEXT: mv a2, a1
+; RV32IF-NEXT: mv a4, a1
; RV32IF-NEXT: .LBB1_4:
-; RV32IF-NEXT: lui a1, %hi(.LCPI1_0)
-; RV32IF-NEXT: flw fa5, %lo(.LCPI1_0)(a1)
-; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: and a0, a2, a0
; RV32IF-NEXT: beqz a3, .LBB1_6
; RV32IF-NEXT: # %bb.5:
-; RV32IF-NEXT: addi a2, a4, -1
+; RV32IF-NEXT: addi a4, a5, -1
; RV32IF-NEXT: .LBB1_6:
-; RV32IF-NEXT: feq.s a1, fs0, fs0
-; RV32IF-NEXT: neg a4, a1
-; RV32IF-NEXT: and a1, a4, a2
-; RV32IF-NEXT: neg a2, s0
-; RV32IF-NEXT: and a0, a2, a0
-; RV32IF-NEXT: neg a2, a3
-; RV32IF-NEXT: or a0, a2, a0
-; RV32IF-NEXT: and a0, a4, a0
+; RV32IF-NEXT: and a1, a2, a4
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
;
@@ -115,23 +118,24 @@ define i64 @test_floor_si64(float %x) nounwind {
; RV32IZFINX-NEXT: lui a2, %hi(.LCPI1_0)
; RV32IZFINX-NEXT: lw a2, %lo(.LCPI1_0)(a2)
; RV32IZFINX-NEXT: and a0, s2, a0
-; RV32IZFINX-NEXT: flt.s a4, a2, s0
-; RV32IZFINX-NEXT: neg a2, a4
+; RV32IZFINX-NEXT: flt.s a3, a2, s0
+; RV32IZFINX-NEXT: neg a2, a3
; RV32IZFINX-NEXT: or a0, a2, a0
; RV32IZFINX-NEXT: feq.s a2, s0, s0
; RV32IZFINX-NEXT: neg a2, a2
; RV32IZFINX-NEXT: lui a5, 524288
-; RV32IZFINX-NEXT: lui a3, 524288
-; RV32IZFINX-NEXT: beqz s1, .LBB1_4
+; RV32IZFINX-NEXT: li a6, 1
+; RV32IZFINX-NEXT: lui a4, 524288
+; RV32IZFINX-NEXT: bne s1, a6, .LBB1_4
; RV32IZFINX-NEXT: # %bb.3:
-; RV32IZFINX-NEXT: mv a3, a1
+; RV32IZFINX-NEXT: mv a4, a1
; RV32IZFINX-NEXT: .LBB1_4:
; RV32IZFINX-NEXT: and a0, a2, a0
-; RV32IZFINX-NEXT: beqz a4, .LBB1_6
+; RV32IZFINX-NEXT: beqz a3, .LBB1_6
; RV32IZFINX-NEXT: # %bb.5:
-; RV32IZFINX-NEXT: addi a3, a5, -1
+; RV32IZFINX-NEXT: addi a4, a5, -1
; RV32IZFINX-NEXT: .LBB1_6:
-; RV32IZFINX-NEXT: and a1, a2, a3
+; RV32IZFINX-NEXT: and a1, a2, a4
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
@@ -180,8 +184,7 @@ define i64 @test_floor_ui64(float %x) nounwind {
; RV32IF: # %bb.0:
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: lui a0, 307200
; RV32IF-NEXT: fmv.w.x fa5, a0
@@ -193,22 +196,22 @@ define i64 @test_floor_ui64(float %x) nounwind {
; RV32IF-NEXT: fcvt.s.w fa5, a0, rdn
; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0
; RV32IF-NEXT: .LBB3_2:
-; RV32IF-NEXT: fmv.w.x fa5, zero
-; RV32IF-NEXT: fle.s a0, fa5, fs0
-; RV32IF-NEXT: neg s0, a0
; RV32IF-NEXT: fmv.s fa0, fs0
; RV32IF-NEXT: call __fixunssfdi
-; RV32IF-NEXT: lui a2, %hi(.LCPI3_0)
-; RV32IF-NEXT: flw fa5, %lo(.LCPI3_0)(a2)
-; RV32IF-NEXT: and a0, s0, a0
-; RV32IF-NEXT: flt.s a2, fa5, fs0
-; RV32IF-NEXT: neg a2, a2
-; RV32IF-NEXT: or a0, a2, a0
-; RV32IF-NEXT: and a1, s0, a1
-; RV32IF-NEXT: or a1, a2, a1
+; RV32IF-NEXT: fmv.w.x fa5, zero
+; RV32IF-NEXT: fle.s a2, fa5, fs0
+; RV32IF-NEXT: lui a3, %hi(.LCPI3_0)
+; RV32IF-NEXT: flw fa5, %lo(.LCPI3_0)(a3)
+; RV32IF-NEXT: xori a2, a2, 1
+; RV32IF-NEXT: addi a2, a2, -1
+; RV32IF-NEXT: and a0, a2, a0
+; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: neg a3, a3
+; RV32IF-NEXT: or a0, a3, a0
+; RV32IF-NEXT: and a1, a2, a1
+; RV32IF-NEXT: or a1, a3, a1
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
;
@@ -226,7 +229,6 @@ define i64 @test_floor_ui64(float %x) nounwind {
; RV32IZFINX-NEXT: addi sp, sp, -16
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: mv s0, a0
; RV32IZFINX-NEXT: lui a0, 307200
; RV32IZFINX-NEXT: fabs.s a1, s0
@@ -237,21 +239,21 @@ define i64 @test_floor_ui64(float %x) nounwind {
; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rdn
; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0
; RV32IZFINX-NEXT: .LBB3_2:
-; RV32IZFINX-NEXT: fle.s a0, zero, s0
-; RV32IZFINX-NEXT: neg s1, a0
; RV32IZFINX-NEXT: mv a0, s0
; RV32IZFINX-NEXT: call __fixunssfdi
-; RV32IZFINX-NEXT: lui a2, %hi(.LCPI3_0)
-; RV32IZFINX-NEXT: lw a2, %lo(.LCPI3_0)(a2)
-; RV32IZFINX-NEXT: and a0, s1, a0
-; RV32IZFINX-NEXT: flt.s a2, a2, s0
-; RV32IZFINX-NEXT: neg a2, a2
-; RV32IZFINX-NEXT: or a0, a2, a0
-; RV32IZFINX-NEXT: and a1, s1, a1
-; RV32IZFINX-NEXT: or a1, a2, a1
+; RV32IZFINX-NEXT: fle.s a2, zero, s0
+; RV32IZFINX-NEXT: lui a3, %hi(.LCPI3_0)
+; RV32IZFINX-NEXT: lw a3, %lo(.LCPI3_0)(a3)
+; RV32IZFINX-NEXT: xori a2, a2, 1
+; RV32IZFINX-NEXT: addi a2, a2, -1
+; RV32IZFINX-NEXT: and a0, a2, a0
+; RV32IZFINX-NEXT: flt.s a3, a3, s0
+; RV32IZFINX-NEXT: neg a3, a3
+; RV32IZFINX-NEXT: or a0, a3, a0
+; RV32IZFINX-NEXT: and a1, a2, a1
+; RV32IZFINX-NEXT: or a1, a3, a1
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: addi sp, sp, 16
; RV32IZFINX-NEXT: ret
;
@@ -297,7 +299,8 @@ define i64 @test_ceil_si64(float %x) nounwind {
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: lui a0, 307200
; RV32IF-NEXT: fmv.w.x fa5, a0
@@ -312,32 +315,34 @@ define i64 @test_ceil_si64(float %x) nounwind {
; RV32IF-NEXT: lui a0, 913408
; RV32IF-NEXT: fmv.w.x fa5, a0
; RV32IF-NEXT: fle.s s0, fa5, fs0
+; RV32IF-NEXT: neg s1, s0
; RV32IF-NEXT: fmv.s fa0, fs0
; RV32IF-NEXT: call __fixsfdi
+; RV32IF-NEXT: lui a2, %hi(.LCPI5_0)
+; RV32IF-NEXT: flw fa5, %lo(.LCPI5_0)(a2)
+; RV32IF-NEXT: and a0, s1, a0
+; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: neg a2, a3
+; RV32IF-NEXT: or a0, a2, a0
+; RV32IF-NEXT: feq.s a2, fs0, fs0
+; RV32IF-NEXT: neg a2, a2
+; RV32IF-NEXT: lui a5, 524288
+; RV32IF-NEXT: li a6, 1
; RV32IF-NEXT: lui a4, 524288
-; RV32IF-NEXT: lui a2, 524288
-; RV32IF-NEXT: beqz s0, .LBB5_4
+; RV32IF-NEXT: bne s0, a6, .LBB5_4
; RV32IF-NEXT: # %bb.3:
-; RV32IF-NEXT: mv a2, a1
+; RV32IF-NEXT: mv a4, a1
; RV32IF-NEXT: .LBB5_4:
-; RV32IF-NEXT: lui a1, %hi(.LCPI5_0)
-; RV32IF-NEXT: flw fa5, %lo(.LCPI5_0)(a1)
-; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: and a0, a2, a0
; RV32IF-NEXT: beqz a3, .LBB5_6
; RV32IF-NEXT: # %bb.5:
-; RV32IF-NEXT: addi a2, a4, -1
+; RV32IF-NEXT: addi a4, a5, -1
; RV32IF-NEXT: .LBB5_6:
-; RV32IF-NEXT: feq.s a1, fs0, fs0
-; RV32IF-NEXT: neg a4, a1
-; RV32IF-NEXT: and a1, a4, a2
-; RV32IF-NEXT: neg a2, s0
-; RV32IF-NEXT: and a0, a2, a0
-; RV32IF-NEXT: neg a2, a3
-; RV32IF-NEXT: or a0, a2, a0
-; RV32IF-NEXT: and a0, a4, a0
+; RV32IF-NEXT: and a1, a2, a4
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
;
@@ -375,23 +380,24 @@ define i64 @test_ceil_si64(float %x) nounwind {
; RV32IZFINX-NEXT: lui a2, %hi(.LCPI5_0)
; RV32IZFINX-NEXT: lw a2, %lo(.LCPI5_0)(a2)
; RV32IZFINX-NEXT: and a0, s2, a0
-; RV32IZFINX-NEXT: flt.s a4, a2, s0
-; RV32IZFINX-NEXT: neg a2, a4
+; RV32IZFINX-NEXT: flt.s a3, a2, s0
+; RV32IZFINX-NEXT: neg a2, a3
; RV32IZFINX-NEXT: or a0, a2, a0
; RV32IZFINX-NEXT: feq.s a2, s0, s0
; RV32IZFINX-NEXT: neg a2, a2
; RV32IZFINX-NEXT: lui a5, 524288
-; RV32IZFINX-NEXT: lui a3, 524288
-; RV32IZFINX-NEXT: beqz s1, .LBB5_4
+; RV32IZFINX-NEXT: li a6, 1
+; RV32IZFINX-NEXT: lui a4, 524288
+; RV32IZFINX-NEXT: bne s1, a6, .LBB5_4
; RV32IZFINX-NEXT: # %bb.3:
-; RV32IZFINX-NEXT: mv a3, a1
+; RV32IZFINX-NEXT: mv a4, a1
; RV32IZFINX-NEXT: .LBB5_4:
; RV32IZFINX-NEXT: and a0, a2, a0
-; RV32IZFINX-NEXT: beqz a4, .LBB5_6
+; RV32IZFINX-NEXT: beqz a3, .LBB5_6
; RV32IZFINX-NEXT: # %bb.5:
-; RV32IZFINX-NEXT: addi a3, a5, -1
+; RV32IZFINX-NEXT: addi a4, a5, -1
; RV32IZFINX-NEXT: .LBB5_6:
-; RV32IZFINX-NEXT: and a1, a2, a3
+; RV32IZFINX-NEXT: and a1, a2, a4
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
@@ -440,8 +446,7 @@ define i64 @test_ceil_ui64(float %x) nounwind {
; RV32IF: # %bb.0:
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: lui a0, 307200
; RV32IF-NEXT: fmv.w.x fa5, a0
@@ -453,22 +458,22 @@ define i64 @test_ceil_ui64(float %x) nounwind {
; RV32IF-NEXT: fcvt.s.w fa5, a0, rup
; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0
; RV32IF-NEXT: .LBB7_2:
-; RV32IF-NEXT: fmv.w.x fa5, zero
-; RV32IF-NEXT: fle.s a0, fa5, fs0
-; RV32IF-NEXT: neg s0, a0
; RV32IF-NEXT: fmv.s fa0, fs0
; RV32IF-NEXT: call __fixunssfdi
-; RV32IF-NEXT: lui a2, %hi(.LCPI7_0)
-; RV32IF-NEXT: flw fa5, %lo(.LCPI7_0)(a2)
-; RV32IF-NEXT: and a0, s0, a0
-; RV32IF-NEXT: flt.s a2, fa5, fs0
-; RV32IF-NEXT: neg a2, a2
-; RV32IF-NEXT: or a0, a2, a0
-; RV32IF-NEXT: and a1, s0, a1
-; RV32IF-NEXT: or a1, a2, a1
+; RV32IF-NEXT: fmv.w.x fa5, zero
+; RV32IF-NEXT: fle.s a2, fa5, fs0
+; RV32IF-NEXT: lui a3, %hi(.LCPI7_0)
+; RV32IF-NEXT: flw fa5, %lo(.LCPI7_0)(a3)
+; RV32IF-NEXT: xori a2, a2, 1
+; RV32IF-NEXT: addi a2, a2, -1
+; RV32IF-NEXT: and a0, a2, a0
+; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: neg a3, a3
+; RV32IF-NEXT: or a0, a3, a0
+; RV32IF-NEXT: and a1, a2, a1
+; RV32IF-NEXT: or a1, a3, a1
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
;
@@ -486,7 +491,6 @@ define i64 @test_ceil_ui64(float %x) nounwind {
; RV32IZFINX-NEXT: addi sp, sp, -16
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: mv s0, a0
; RV32IZFINX-NEXT: lui a0, 307200
; RV32IZFINX-NEXT: fabs.s a1, s0
@@ -497,21 +501,21 @@ define i64 @test_ceil_ui64(float %x) nounwind {
; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rup
; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0
; RV32IZFINX-NEXT: .LBB7_2:
-; RV32IZFINX-NEXT: fle.s a0, zero, s0
-; RV32IZFINX-NEXT: neg s1, a0
; RV32IZFINX-NEXT: mv a0, s0
; RV32IZFINX-NEXT: call __fixunssfdi
-; RV32IZFINX-NEXT: lui a2, %hi(.LCPI7_0)
-; RV32IZFINX-NEXT: lw a2, %lo(.LCPI7_0)(a2)
-; RV32IZFINX-NEXT: and a0, s1, a0
-; RV32IZFINX-NEXT: flt.s a2, a2, s0
-; RV32IZFINX-NEXT: neg a2, a2
-; RV32IZFINX-NEXT: or a0, a2, a0
-; RV32IZFINX-NEXT: and a1, s1, a1
-; RV32IZFINX-NEXT: or a1, a2, a1
+; RV32IZFINX-NEXT: fle.s a2, zero, s0
+; RV32IZFINX-NEXT: lui a3, %hi(.LCPI7_0)
+; RV32IZFINX-NEXT: lw a3, %lo(.LCPI7_0)(a3)
+; RV32IZFINX-NEXT: xori a2, a2, 1
+; RV32IZFINX-NEXT: addi a2, a2, -1
+; RV32IZFINX-NEXT: and a0, a2, a0
+; RV32IZFINX-NEXT: flt.s a3, a3, s0
+; RV32IZFINX-NEXT: neg a3, a3
+; RV32IZFINX-NEXT: or a0, a3, a0
+; RV32IZFINX-NEXT: and a1, a2, a1
+; RV32IZFINX-NEXT: or a1, a3, a1
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: addi sp, sp, 16
; RV32IZFINX-NEXT: ret
;
@@ -557,7 +561,8 @@ define i64 @test_trunc_si64(float %x) nounwind {
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: lui a0, 307200
; RV32IF-NEXT: fmv.w.x fa5, a0
@@ -572,32 +577,34 @@ define i64 @test_trunc_si64(float %x) nounwind {
; RV32IF-NEXT: lui a0, 913408
; RV32IF-NEXT: fmv.w.x fa5, a0
; RV32IF-NEXT: fle.s s0, fa5, fs0
+; RV32IF-NEXT: neg s1, s0
; RV32IF-NEXT: fmv.s fa0, fs0
; RV32IF-NEXT: call __fixsfdi
+; RV32IF-NEXT: lui a2, %hi(.LCPI9_0)
+; RV32IF-NEXT: flw fa5, %lo(.LCPI9_0)(a2)
+; RV32IF-NEXT: and a0, s1, a0
+; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: neg a2, a3
+; RV32IF-NEXT: or a0, a2, a0
+; RV32IF-NEXT: feq.s a2, fs0, fs0
+; RV32IF-NEXT: neg a2, a2
+; RV32IF-NEXT: lui a5, 524288
+; RV32IF-NEXT: li a6, 1
; RV32IF-NEXT: lui a4, 524288
-; RV32IF-NEXT: lui a2, 524288
-; RV32IF-NEXT: beqz s0, .LBB9_4
+; RV32IF-NEXT: bne s0, a6, .LBB9_4
; RV32IF-NEXT: # %bb.3:
-; RV32IF-NEXT: mv a2, a1
+; RV32IF-NEXT: mv a4, a1
; RV32IF-NEXT: .LBB9_4:
-; RV32IF-NEXT: lui a1, %hi(.LCPI9_0)
-; RV32IF-NEXT: flw fa5, %lo(.LCPI9_0)(a1)
-; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: and a0, a2, a0
; RV32IF-NEXT: beqz a3, .LBB9_6
; RV32IF-NEXT: # %bb.5:
-; RV32IF-NEXT: addi a2, a4, -1
+; RV32IF-NEXT: addi a4, a5, -1
; RV32IF-NEXT: .LBB9_6:
-; RV32IF-NEXT: feq.s a1, fs0, fs0
-; RV32IF-NEXT: neg a4, a1
-; RV32IF-NEXT: and a1, a4, a2
-; RV32IF-NEXT: neg a2, s0
-; RV32IF-NEXT: and a0, a2, a0
-; RV32IF-NEXT: neg a2, a3
-; RV32IF-NEXT: or a0, a2, a0
-; RV32IF-NEXT: and a0, a4, a0
+; RV32IF-NEXT: and a1, a2, a4
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
;
@@ -635,23 +642,24 @@ define i64 @test_trunc_si64(float %x) nounwind {
; RV32IZFINX-NEXT: lui a2, %hi(.LCPI9_0)
; RV32IZFINX-NEXT: lw a2, %lo(.LCPI9_0)(a2)
; RV32IZFINX-NEXT: and a0, s2, a0
-; RV32IZFINX-NEXT: flt.s a4, a2, s0
-; RV32IZFINX-NEXT: neg a2, a4
+; RV32IZFINX-NEXT: flt.s a3, a2, s0
+; RV32IZFINX-NEXT: neg a2, a3
; RV32IZFINX-NEXT: or a0, a2, a0
; RV32IZFINX-NEXT: feq.s a2, s0, s0
; RV32IZFINX-NEXT: neg a2, a2
; RV32IZFINX-NEXT: lui a5, 524288
-; RV32IZFINX-NEXT: lui a3, 524288
-; RV32IZFINX-NEXT: beqz s1, .LBB9_4
+; RV32IZFINX-NEXT: li a6, 1
+; RV32IZFINX-NEXT: lui a4, 524288
+; RV32IZFINX-NEXT: bne s1, a6, .LBB9_4
; RV32IZFINX-NEXT: # %bb.3:
-; RV32IZFINX-NEXT: mv a3, a1
+; RV32IZFINX-NEXT: mv a4, a1
; RV32IZFINX-NEXT: .LBB9_4:
; RV32IZFINX-NEXT: and a0, a2, a0
-; RV32IZFINX-NEXT: beqz a4, .LBB9_6
+; RV32IZFINX-NEXT: beqz a3, .LBB9_6
; RV32IZFINX-NEXT: # %bb.5:
-; RV32IZFINX-NEXT: addi a3, a5, -1
+; RV32IZFINX-NEXT: addi a4, a5, -1
; RV32IZFINX-NEXT: .LBB9_6:
-; RV32IZFINX-NEXT: and a1, a2, a3
+; RV32IZFINX-NEXT: and a1, a2, a4
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
@@ -700,8 +708,7 @@ define i64 @test_trunc_ui64(float %x) nounwind {
; RV32IF: # %bb.0:
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: lui a0, 307200
; RV32IF-NEXT: fmv.w.x fa5, a0
@@ -713,22 +720,22 @@ define i64 @test_trunc_ui64(float %x) nounwind {
; RV32IF-NEXT: fcvt.s.w fa5, a0, rtz
; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0
; RV32IF-NEXT: .LBB11_2:
-; RV32IF-NEXT: fmv.w.x fa5, zero
-; RV32IF-NEXT: fle.s a0, fa5, fs0
-; RV32IF-NEXT: neg s0, a0
; RV32IF-NEXT: fmv.s fa0, fs0
; RV32IF-NEXT: call __fixunssfdi
-; RV32IF-NEXT: lui a2, %hi(.LCPI11_0)
-; RV32IF-NEXT: flw fa5, %lo(.LCPI11_0)(a2)
-; RV32IF-NEXT: and a0, s0, a0
-; RV32IF-NEXT: flt.s a2, fa5, fs0
-; RV32IF-NEXT: neg a2, a2
-; RV32IF-NEXT: or a0, a2, a0
-; RV32IF-NEXT: and a1, s0, a1
-; RV32IF-NEXT: or a1, a2, a1
+; RV32IF-NEXT: fmv.w.x fa5, zero
+; RV32IF-NEXT: fle.s a2, fa5, fs0
+; RV32IF-NEXT: lui a3, %hi(.LCPI11_0)
+; RV32IF-NEXT: flw fa5, %lo(.LCPI11_0)(a3)
+; RV32IF-NEXT: xori a2, a2, 1
+; RV32IF-NEXT: addi a2, a2, -1
+; RV32IF-NEXT: and a0, a2, a0
+; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: neg a3, a3
+; RV32IF-NEXT: or a0, a3, a0
+; RV32IF-NEXT: and a1, a2, a1
+; RV32IF-NEXT: or a1, a3, a1
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
;
@@ -746,7 +753,6 @@ define i64 @test_trunc_ui64(float %x) nounwind {
; RV32IZFINX-NEXT: addi sp, sp, -16
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: mv s0, a0
; RV32IZFINX-NEXT: lui a0, 307200
; RV32IZFINX-NEXT: fabs.s a1, s0
@@ -757,21 +763,21 @@ define i64 @test_trunc_ui64(float %x) nounwind {
; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rtz
; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0
; RV32IZFINX-NEXT: .LBB11_2:
-; RV32IZFINX-NEXT: fle.s a0, zero, s0
-; RV32IZFINX-NEXT: neg s1, a0
; RV32IZFINX-NEXT: mv a0, s0
; RV32IZFINX-NEXT: call __fixunssfdi
-; RV32IZFINX-NEXT: lui a2, %hi(.LCPI11_0)
-; RV32IZFINX-NEXT: lw a2, %lo(.LCPI11_0)(a2)
-; RV32IZFINX-NEXT: and a0, s1, a0
-; RV32IZFINX-NEXT: flt.s a2, a2, s0
-; RV32IZFINX-NEXT: neg a2, a2
-; RV32IZFINX-NEXT: or a0, a2, a0
-; RV32IZFINX-NEXT: and a1, s1, a1
-; RV32IZFINX-NEXT: or a1, a2, a1
+; RV32IZFINX-NEXT: fle.s a2, zero, s0
+; RV32IZFINX-NEXT: lui a3, %hi(.LCPI11_0)
+; RV32IZFINX-NEXT: lw a3, %lo(.LCPI11_0)(a3)
+; RV32IZFINX-NEXT: xori a2, a2, 1
+; RV32IZFINX-NEXT: addi a2, a2, -1
+; RV32IZFINX-NEXT: and a0, a2, a0
+; RV32IZFINX-NEXT: flt.s a3, a3, s0
+; RV32IZFINX-NEXT: neg a3, a3
+; RV32IZFINX-NEXT: or a0, a3, a0
+; RV32IZFINX-NEXT: and a1, a2, a1
+; RV32IZFINX-NEXT: or a1, a3, a1
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: addi sp, sp, 16
; RV32IZFINX-NEXT: ret
;
@@ -817,7 +823,8 @@ define i64 @test_round_si64(float %x) nounwind {
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: lui a0, 307200
; RV32IF-NEXT: fmv.w.x fa5, a0
@@ -832,32 +839,34 @@ define i64 @test_round_si64(float %x) nounwind {
; RV32IF-NEXT: lui a0, 913408
; RV32IF-NEXT: fmv.w.x fa5, a0
; RV32IF-NEXT: fle.s s0, fa5, fs0
+; RV32IF-NEXT: neg s1, s0
; RV32IF-NEXT: fmv.s fa0, fs0
; RV32IF-NEXT: call __fixsfdi
+; RV32IF-NEXT: lui a2, %hi(.LCPI13_0)
+; RV32IF-NEXT: flw fa5, %lo(.LCPI13_0)(a2)
+; RV32IF-NEXT: and a0, s1, a0
+; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: neg a2, a3
+; RV32IF-NEXT: or a0, a2, a0
+; RV32IF-NEXT: feq.s a2, fs0, fs0
+; RV32IF-NEXT: neg a2, a2
+; RV32IF-NEXT: lui a5, 524288
+; RV32IF-NEXT: li a6, 1
; RV32IF-NEXT: lui a4, 524288
-; RV32IF-NEXT: lui a2, 524288
-; RV32IF-NEXT: beqz s0, .LBB13_4
+; RV32IF-NEXT: bne s0, a6, .LBB13_4
; RV32IF-NEXT: # %bb.3:
-; RV32IF-NEXT: mv a2, a1
+; RV32IF-NEXT: mv a4, a1
; RV32IF-NEXT: .LBB13_4:
-; RV32IF-NEXT: lui a1, %hi(.LCPI13_0)
-; RV32IF-NEXT: flw fa5, %lo(.LCPI13_0)(a1)
-; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: and a0, a2, a0
; RV32IF-NEXT: beqz a3, .LBB13_6
; RV32IF-NEXT: # %bb.5:
-; RV32IF-NEXT: addi a2, a4, -1
+; RV32IF-NEXT: addi a4, a5, -1
; RV32IF-NEXT: .LBB13_6:
-; RV32IF-NEXT: feq.s a1, fs0, fs0
-; RV32IF-NEXT: neg a4, a1
-; RV32IF-NEXT: and a1, a4, a2
-; RV32IF-NEXT: neg a2, s0
-; RV32IF-NEXT: and a0, a2, a0
-; RV32IF-NEXT: neg a2, a3
-; RV32IF-NEXT: or a0, a2, a0
-; RV32IF-NEXT: and a0, a4, a0
+; RV32IF-NEXT: and a1, a2, a4
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
;
@@ -895,23 +904,24 @@ define i64 @test_round_si64(float %x) nounwind {
; RV32IZFINX-NEXT: lui a2, %hi(.LCPI13_0)
; RV32IZFINX-NEXT: lw a2, %lo(.LCPI13_0)(a2)
; RV32IZFINX-NEXT: and a0, s2, a0
-; RV32IZFINX-NEXT: flt.s a4, a2, s0
-; RV32IZFINX-NEXT: neg a2, a4
+; RV32IZFINX-NEXT: flt.s a3, a2, s0
+; RV32IZFINX-NEXT: neg a2, a3
; RV32IZFINX-NEXT: or a0, a2, a0
; RV32IZFINX-NEXT: feq.s a2, s0, s0
; RV32IZFINX-NEXT: neg a2, a2
; RV32IZFINX-NEXT: lui a5, 524288
-; RV32IZFINX-NEXT: lui a3, 524288
-; RV32IZFINX-NEXT: beqz s1, .LBB13_4
+; RV32IZFINX-NEXT: li a6, 1
+; RV32IZFINX-NEXT: lui a4, 524288
+; RV32IZFINX-NEXT: bne s1, a6, .LBB13_4
; RV32IZFINX-NEXT: # %bb.3:
-; RV32IZFINX-NEXT: mv a3, a1
+; RV32IZFINX-NEXT: mv a4, a1
; RV32IZFINX-NEXT: .LBB13_4:
; RV32IZFINX-NEXT: and a0, a2, a0
-; RV32IZFINX-NEXT: beqz a4, .LBB13_6
+; RV32IZFINX-NEXT: beqz a3, .LBB13_6
; RV32IZFINX-NEXT: # %bb.5:
-; RV32IZFINX-NEXT: addi a3, a5, -1
+; RV32IZFINX-NEXT: addi a4, a5, -1
; RV32IZFINX-NEXT: .LBB13_6:
-; RV32IZFINX-NEXT: and a1, a2, a3
+; RV32IZFINX-NEXT: and a1, a2, a4
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
@@ -960,8 +970,7 @@ define i64 @test_round_ui64(float %x) nounwind {
; RV32IF: # %bb.0:
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: lui a0, 307200
; RV32IF-NEXT: fmv.w.x fa5, a0
@@ -973,22 +982,22 @@ define i64 @test_round_ui64(float %x) nounwind {
; RV32IF-NEXT: fcvt.s.w fa5, a0, rmm
; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0
; RV32IF-NEXT: .LBB15_2:
-; RV32IF-NEXT: fmv.w.x fa5, zero
-; RV32IF-NEXT: fle.s a0, fa5, fs0
-; RV32IF-NEXT: neg s0, a0
; RV32IF-NEXT: fmv.s fa0, fs0
; RV32IF-NEXT: call __fixunssfdi
-; RV32IF-NEXT: lui a2, %hi(.LCPI15_0)
-; RV32IF-NEXT: flw fa5, %lo(.LCPI15_0)(a2)
-; RV32IF-NEXT: and a0, s0, a0
-; RV32IF-NEXT: flt.s a2, fa5, fs0
-; RV32IF-NEXT: neg a2, a2
-; RV32IF-NEXT: or a0, a2, a0
-; RV32IF-NEXT: and a1, s0, a1
-; RV32IF-NEXT: or a1, a2, a1
+; RV32IF-NEXT: fmv.w.x fa5, zero
+; RV32IF-NEXT: fle.s a2, fa5, fs0
+; RV32IF-NEXT: lui a3, %hi(.LCPI15_0)
+; RV32IF-NEXT: flw fa5, %lo(.LCPI15_0)(a3)
+; RV32IF-NEXT: xori a2, a2, 1
+; RV32IF-NEXT: addi a2, a2, -1
+; RV32IF-NEXT: and a0, a2, a0
+; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: neg a3, a3
+; RV32IF-NEXT: or a0, a3, a0
+; RV32IF-NEXT: and a1, a2, a1
+; RV32IF-NEXT: or a1, a3, a1
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
;
@@ -1006,7 +1015,6 @@ define i64 @test_round_ui64(float %x) nounwind {
; RV32IZFINX-NEXT: addi sp, sp, -16
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: mv s0, a0
; RV32IZFINX-NEXT: lui a0, 307200
; RV32IZFINX-NEXT: fabs.s a1, s0
@@ -1017,21 +1025,21 @@ define i64 @test_round_ui64(float %x) nounwind {
; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rmm
; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0
; RV32IZFINX-NEXT: .LBB15_2:
-; RV32IZFINX-NEXT: fle.s a0, zero, s0
-; RV32IZFINX-NEXT: neg s1, a0
; RV32IZFINX-NEXT: mv a0, s0
; RV32IZFINX-NEXT: call __fixunssfdi
-; RV32IZFINX-NEXT: lui a2, %hi(.LCPI15_0)
-; RV32IZFINX-NEXT: lw a2, %lo(.LCPI15_0)(a2)
-; RV32IZFINX-NEXT: and a0, s1, a0
-; RV32IZFINX-NEXT: flt.s a2, a2, s0
-; RV32IZFINX-NEXT: neg a2, a2
-; RV32IZFINX-NEXT: or a0, a2, a0
-; RV32IZFINX-NEXT: and a1, s1, a1
-; RV32IZFINX-NEXT: or a1, a2, a1
+; RV32IZFINX-NEXT: fle.s a2, zero, s0
+; RV32IZFINX-NEXT: lui a3, %hi(.LCPI15_0)
+; RV32IZFINX-NEXT: lw a3, %lo(.LCPI15_0)(a3)
+; RV32IZFINX-NEXT: xori a2, a2, 1
+; RV32IZFINX-NEXT: addi a2, a2, -1
+; RV32IZFINX-NEXT: and a0, a2, a0
+; RV32IZFINX-NEXT: flt.s a3, a3, s0
+; RV32IZFINX-NEXT: neg a3, a3
+; RV32IZFINX-NEXT: or a0, a3, a0
+; RV32IZFINX-NEXT: and a1, a2, a1
+; RV32IZFINX-NEXT: or a1, a3, a1
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: addi sp, sp, 16
; RV32IZFINX-NEXT: ret
;
@@ -1077,7 +1085,8 @@ define i64 @test_roundeven_si64(float %x) nounwind {
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: lui a0, 307200
; RV32IF-NEXT: fmv.w.x fa5, a0
@@ -1092,32 +1101,34 @@ define i64 @test_roundeven_si64(float %x) nounwind {
; RV32IF-NEXT: lui a0, 913408
; RV32IF-NEXT: fmv.w.x fa5, a0
; RV32IF-NEXT: fle.s s0, fa5, fs0
+; RV32IF-NEXT: neg s1, s0
; RV32IF-NEXT: fmv.s fa0, fs0
; RV32IF-NEXT: call __fixsfdi
+; RV32IF-NEXT: lui a2, %hi(.LCPI17_0)
+; RV32IF-NEXT: flw fa5, %lo(.LCPI17_0)(a2)
+; RV32IF-NEXT: and a0, s1, a0
+; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: neg a2, a3
+; RV32IF-NEXT: or a0, a2, a0
+; RV32IF-NEXT: feq.s a2, fs0, fs0
+; RV32IF-NEXT: neg a2, a2
+; RV32IF-NEXT: lui a5, 524288
+; RV32IF-NEXT: li a6, 1
; RV32IF-NEXT: lui a4, 524288
-; RV32IF-NEXT: lui a2, 524288
-; RV32IF-NEXT: beqz s0, .LBB17_4
+; RV32IF-NEXT: bne s0, a6, .LBB17_4
; RV32IF-NEXT: # %bb.3:
-; RV32IF-NEXT: mv a2, a1
+; RV32IF-NEXT: mv a4, a1
; RV32IF-NEXT: .LBB17_4:
-; RV32IF-NEXT: lui a1, %hi(.LCPI17_0)
-; RV32IF-NEXT: flw fa5, %lo(.LCPI17_0)(a1)
-; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: and a0, a2, a0
; RV32IF-NEXT: beqz a3, .LBB17_6
; RV32IF-NEXT: # %bb.5:
-; RV32IF-NEXT: addi a2, a4, -1
+; RV32IF-NEXT: addi a4, a5, -1
; RV32IF-NEXT: .LBB17_6:
-; RV32IF-NEXT: feq.s a1, fs0, fs0
-; RV32IF-NEXT: neg a4, a1
-; RV32IF-NEXT: and a1, a4, a2
-; RV32IF-NEXT: neg a2, s0
-; RV32IF-NEXT: and a0, a2, a0
-; RV32IF-NEXT: neg a2, a3
-; RV32IF-NEXT: or a0, a2, a0
-; RV32IF-NEXT: and a0, a4, a0
+; RV32IF-NEXT: and a1, a2, a4
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
;
@@ -1155,23 +1166,24 @@ define i64 @test_roundeven_si64(float %x) nounwind {
; RV32IZFINX-NEXT: lui a2, %hi(.LCPI17_0)
; RV32IZFINX-NEXT: lw a2, %lo(.LCPI17_0)(a2)
; RV32IZFINX-NEXT: and a0, s2, a0
-; RV32IZFINX-NEXT: flt.s a4, a2, s0
-; RV32IZFINX-NEXT: neg a2, a4
+; RV32IZFINX-NEXT: flt.s a3, a2, s0
+; RV32IZFINX-NEXT: neg a2, a3
; RV32IZFINX-NEXT: or a0, a2, a0
; RV32IZFINX-NEXT: feq.s a2, s0, s0
; RV32IZFINX-NEXT: neg a2, a2
; RV32IZFINX-NEXT: lui a5, 524288
-; RV32IZFINX-NEXT: lui a3, 524288
-; RV32IZFINX-NEXT: beqz s1, .LBB17_4
+; RV32IZFINX-NEXT: li a6, 1
+; RV32IZFINX-NEXT: lui a4, 524288
+; RV32IZFINX-NEXT: bne s1, a6, .LBB17_4
; RV32IZFINX-NEXT: # %bb.3:
-; RV32IZFINX-NEXT: mv a3, a1
+; RV32IZFINX-NEXT: mv a4, a1
; RV32IZFINX-NEXT: .LBB17_4:
; RV32IZFINX-NEXT: and a0, a2, a0
-; RV32IZFINX-NEXT: beqz a4, .LBB17_6
+; RV32IZFINX-NEXT: beqz a3, .LBB17_6
; RV32IZFINX-NEXT: # %bb.5:
-; RV32IZFINX-NEXT: addi a3, a5, -1
+; RV32IZFINX-NEXT: addi a4, a5, -1
; RV32IZFINX-NEXT: .LBB17_6:
-; RV32IZFINX-NEXT: and a1, a2, a3
+; RV32IZFINX-NEXT: and a1, a2, a4
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
@@ -1220,8 +1232,7 @@ define i64 @test_roundeven_ui64(float %x) nounwind {
; RV32IF: # %bb.0:
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: lui a0, 307200
; RV32IF-NEXT: fmv.w.x fa5, a0
@@ -1233,22 +1244,22 @@ define i64 @test_roundeven_ui64(float %x) nounwind {
; RV32IF-NEXT: fcvt.s.w fa5, a0, rne
; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0
; RV32IF-NEXT: .LBB19_2:
-; RV32IF-NEXT: fmv.w.x fa5, zero
-; RV32IF-NEXT: fle.s a0, fa5, fs0
-; RV32IF-NEXT: neg s0, a0
; RV32IF-NEXT: fmv.s fa0, fs0
; RV32IF-NEXT: call __fixunssfdi
-; RV32IF-NEXT: lui a2, %hi(.LCPI19_0)
-; RV32IF-NEXT: flw fa5, %lo(.LCPI19_0)(a2)
-; RV32IF-NEXT: and a0, s0, a0
-; RV32IF-NEXT: flt.s a2, fa5, fs0
-; RV32IF-NEXT: neg a2, a2
-; RV32IF-NEXT: or a0, a2, a0
-; RV32IF-NEXT: and a1, s0, a1
-; RV32IF-NEXT: or a1, a2, a1
+; RV32IF-NEXT: fmv.w.x fa5, zero
+; RV32IF-NEXT: fle.s a2, fa5, fs0
+; RV32IF-NEXT: lui a3, %hi(.LCPI19_0)
+; RV32IF-NEXT: flw fa5, %lo(.LCPI19_0)(a3)
+; RV32IF-NEXT: xori a2, a2, 1
+; RV32IF-NEXT: addi a2, a2, -1
+; RV32IF-NEXT: and a0, a2, a0
+; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: neg a3, a3
+; RV32IF-NEXT: or a0, a3, a0
+; RV32IF-NEXT: and a1, a2, a1
+; RV32IF-NEXT: or a1, a3, a1
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
;
@@ -1266,7 +1277,6 @@ define i64 @test_roundeven_ui64(float %x) nounwind {
; RV32IZFINX-NEXT: addi sp, sp, -16
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: mv s0, a0
; RV32IZFINX-NEXT: lui a0, 307200
; RV32IZFINX-NEXT: fabs.s a1, s0
@@ -1277,21 +1287,21 @@ define i64 @test_roundeven_ui64(float %x) nounwind {
; RV32IZFINX-NEXT: fcvt.s.w a0, a0, rne
; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0
; RV32IZFINX-NEXT: .LBB19_2:
-; RV32IZFINX-NEXT: fle.s a0, zero, s0
-; RV32IZFINX-NEXT: neg s1, a0
; RV32IZFINX-NEXT: mv a0, s0
; RV32IZFINX-NEXT: call __fixunssfdi
-; RV32IZFINX-NEXT: lui a2, %hi(.LCPI19_0)
-; RV32IZFINX-NEXT: lw a2, %lo(.LCPI19_0)(a2)
-; RV32IZFINX-NEXT: and a0, s1, a0
-; RV32IZFINX-NEXT: flt.s a2, a2, s0
-; RV32IZFINX-NEXT: neg a2, a2
-; RV32IZFINX-NEXT: or a0, a2, a0
-; RV32IZFINX-NEXT: and a1, s1, a1
-; RV32IZFINX-NEXT: or a1, a2, a1
+; RV32IZFINX-NEXT: fle.s a2, zero, s0
+; RV32IZFINX-NEXT: lui a3, %hi(.LCPI19_0)
+; RV32IZFINX-NEXT: lw a3, %lo(.LCPI19_0)(a3)
+; RV32IZFINX-NEXT: xori a2, a2, 1
+; RV32IZFINX-NEXT: addi a2, a2, -1
+; RV32IZFINX-NEXT: and a0, a2, a0
+; RV32IZFINX-NEXT: flt.s a3, a3, s0
+; RV32IZFINX-NEXT: neg a3, a3
+; RV32IZFINX-NEXT: or a0, a3, a0
+; RV32IZFINX-NEXT: and a1, a2, a1
+; RV32IZFINX-NEXT: or a1, a3, a1
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: addi sp, sp, 16
; RV32IZFINX-NEXT: ret
;
@@ -1337,7 +1347,8 @@ define i64 @test_rint_si64(float %x) nounwind {
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: lui a0, 307200
; RV32IF-NEXT: fmv.w.x fa5, a0
@@ -1352,32 +1363,34 @@ define i64 @test_rint_si64(float %x) nounwind {
; RV32IF-NEXT: lui a0, 913408
; RV32IF-NEXT: fmv.w.x fa5, a0
; RV32IF-NEXT: fle.s s0, fa5, fs0
+; RV32IF-NEXT: neg s1, s0
; RV32IF-NEXT: fmv.s fa0, fs0
; RV32IF-NEXT: call __fixsfdi
+; RV32IF-NEXT: lui a2, %hi(.LCPI21_0)
+; RV32IF-NEXT: flw fa5, %lo(.LCPI21_0)(a2)
+; RV32IF-NEXT: and a0, s1, a0
+; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: neg a2, a3
+; RV32IF-NEXT: or a0, a2, a0
+; RV32IF-NEXT: feq.s a2, fs0, fs0
+; RV32IF-NEXT: neg a2, a2
+; RV32IF-NEXT: lui a5, 524288
+; RV32IF-NEXT: li a6, 1
; RV32IF-NEXT: lui a4, 524288
-; RV32IF-NEXT: lui a2, 524288
-; RV32IF-NEXT: beqz s0, .LBB21_4
+; RV32IF-NEXT: bne s0, a6, .LBB21_4
; RV32IF-NEXT: # %bb.3:
-; RV32IF-NEXT: mv a2, a1
+; RV32IF-NEXT: mv a4, a1
; RV32IF-NEXT: .LBB21_4:
-; RV32IF-NEXT: lui a1, %hi(.LCPI21_0)
-; RV32IF-NEXT: flw fa5, %lo(.LCPI21_0)(a1)
-; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: and a0, a2, a0
; RV32IF-NEXT: beqz a3, .LBB21_6
; RV32IF-NEXT: # %bb.5:
-; RV32IF-NEXT: addi a2, a4, -1
+; RV32IF-NEXT: addi a4, a5, -1
; RV32IF-NEXT: .LBB21_6:
-; RV32IF-NEXT: feq.s a1, fs0, fs0
-; RV32IF-NEXT: neg a4, a1
-; RV32IF-NEXT: and a1, a4, a2
-; RV32IF-NEXT: neg a2, s0
-; RV32IF-NEXT: and a0, a2, a0
-; RV32IF-NEXT: neg a2, a3
-; RV32IF-NEXT: or a0, a2, a0
-; RV32IF-NEXT: and a0, a4, a0
+; RV32IF-NEXT: and a1, a2, a4
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
;
@@ -1415,23 +1428,24 @@ define i64 @test_rint_si64(float %x) nounwind {
; RV32IZFINX-NEXT: lui a2, %hi(.LCPI21_0)
; RV32IZFINX-NEXT: lw a2, %lo(.LCPI21_0)(a2)
; RV32IZFINX-NEXT: and a0, s2, a0
-; RV32IZFINX-NEXT: flt.s a4, a2, s0
-; RV32IZFINX-NEXT: neg a2, a4
+; RV32IZFINX-NEXT: flt.s a3, a2, s0
+; RV32IZFINX-NEXT: neg a2, a3
; RV32IZFINX-NEXT: or a0, a2, a0
; RV32IZFINX-NEXT: feq.s a2, s0, s0
; RV32IZFINX-NEXT: neg a2, a2
; RV32IZFINX-NEXT: lui a5, 524288
-; RV32IZFINX-NEXT: lui a3, 524288
-; RV32IZFINX-NEXT: beqz s1, .LBB21_4
+; RV32IZFINX-NEXT: li a6, 1
+; RV32IZFINX-NEXT: lui a4, 524288
+; RV32IZFINX-NEXT: bne s1, a6, .LBB21_4
; RV32IZFINX-NEXT: # %bb.3:
-; RV32IZFINX-NEXT: mv a3, a1
+; RV32IZFINX-NEXT: mv a4, a1
; RV32IZFINX-NEXT: .LBB21_4:
; RV32IZFINX-NEXT: and a0, a2, a0
-; RV32IZFINX-NEXT: beqz a4, .LBB21_6
+; RV32IZFINX-NEXT: beqz a3, .LBB21_6
; RV32IZFINX-NEXT: # %bb.5:
-; RV32IZFINX-NEXT: addi a3, a5, -1
+; RV32IZFINX-NEXT: addi a4, a5, -1
; RV32IZFINX-NEXT: .LBB21_6:
-; RV32IZFINX-NEXT: and a1, a2, a3
+; RV32IZFINX-NEXT: and a1, a2, a4
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
@@ -1480,8 +1494,7 @@ define i64 @test_rint_ui64(float %x) nounwind {
; RV32IF: # %bb.0:
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IF-NEXT: fmv.s fs0, fa0
; RV32IF-NEXT: lui a0, 307200
; RV32IF-NEXT: fmv.w.x fa5, a0
@@ -1493,22 +1506,22 @@ define i64 @test_rint_ui64(float %x) nounwind {
; RV32IF-NEXT: fcvt.s.w fa5, a0
; RV32IF-NEXT: fsgnj.s fs0, fa5, fs0
; RV32IF-NEXT: .LBB23_2:
-; RV32IF-NEXT: fmv.w.x fa5, zero
-; RV32IF-NEXT: fle.s a0, fa5, fs0
-; RV32IF-NEXT: neg s0, a0
; RV32IF-NEXT: fmv.s fa0, fs0
; RV32IF-NEXT: call __fixunssfdi
-; RV32IF-NEXT: lui a2, %hi(.LCPI23_0)
-; RV32IF-NEXT: flw fa5, %lo(.LCPI23_0)(a2)
-; RV32IF-NEXT: and a0, s0, a0
-; RV32IF-NEXT: flt.s a2, fa5, fs0
-; RV32IF-NEXT: neg a2, a2
-; RV32IF-NEXT: or a0, a2, a0
-; RV32IF-NEXT: and a1, s0, a1
-; RV32IF-NEXT: or a1, a2, a1
+; RV32IF-NEXT: fmv.w.x fa5, zero
+; RV32IF-NEXT: fle.s a2, fa5, fs0
+; RV32IF-NEXT: lui a3, %hi(.LCPI23_0)
+; RV32IF-NEXT: flw fa5, %lo(.LCPI23_0)(a3)
+; RV32IF-NEXT: xori a2, a2, 1
+; RV32IF-NEXT: addi a2, a2, -1
+; RV32IF-NEXT: and a0, a2, a0
+; RV32IF-NEXT: flt.s a3, fa5, fs0
+; RV32IF-NEXT: neg a3, a3
+; RV32IF-NEXT: or a0, a3, a0
+; RV32IF-NEXT: and a1, a2, a1
+; RV32IF-NEXT: or a1, a3, a1
; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 16
; RV32IF-NEXT: ret
;
@@ -1526,7 +1539,6 @@ define i64 @test_rint_ui64(float %x) nounwind {
; RV32IZFINX-NEXT: addi sp, sp, -16
; RV32IZFINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZFINX-NEXT: mv s0, a0
; RV32IZFINX-NEXT: lui a0, 307200
; RV32IZFINX-NEXT: fabs.s a1, s0
@@ -1537,21 +1549,21 @@ define i64 @test_rint_ui64(float %x) nounwind {
; RV32IZFINX-NEXT: fcvt.s.w a0, a0
; RV32IZFINX-NEXT: fsgnj.s s0, a0, s0
; RV32IZFINX-NEXT: .LBB23_2:
-; RV32IZFINX-NEXT: fle.s a0, zero, s0
-; RV32IZFINX-NEXT: neg s1, a0
; RV32IZFINX-NEXT: mv a0, s0
; RV32IZFINX-NEXT: call __fixunssfdi
-; RV32IZFINX-NEXT: lui a2, %hi(.LCPI23_0)
-; RV32IZFINX-NEXT: lw a2, %lo(.LCPI23_0)(a2)
-; RV32IZFINX-NEXT: and a0, s1, a0
-; RV32IZFINX-NEXT: flt.s a2, a2, s0
-; RV32IZFINX-NEXT: neg a2, a2
-; RV32IZFINX-NEXT: or a0, a2, a0
-; RV32IZFINX-NEXT: and a1, s1, a1
-; RV32IZFINX-NEXT: or a1, a2, a1
+; RV32IZFINX-NEXT: fle.s a2, zero, s0
+; RV32IZFINX-NEXT: lui a3, %hi(.LCPI23_0)
+; RV32IZFINX-NEXT: lw a3, %lo(.LCPI23_0)(a3)
+; RV32IZFINX-NEXT: xori a2, a2, 1
+; RV32IZFINX-NEXT: addi a2, a2, -1
+; RV32IZFINX-NEXT: and a0, a2, a0
+; RV32IZFINX-NEXT: flt.s a3, a3, s0
+; RV32IZFINX-NEXT: neg a3, a3
+; RV32IZFINX-NEXT: or a0, a3, a0
+; RV32IZFINX-NEXT: and a1, a2, a1
+; RV32IZFINX-NEXT: or a1, a3, a1
; RV32IZFINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZFINX-NEXT: addi sp, sp, 16
; RV32IZFINX-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll
index f6a53a9d76dd35..659e0748dd5325 100644
--- a/llvm/test/CodeGen/RISCV/forced-atomics.ll
+++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll
@@ -3672,7 +3672,8 @@ define i64 @rmw64_umin_seq_cst(ptr %p) nounwind {
; RV32-NEXT: .LBB52_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: sltiu a0, a4, 2
-; RV32-NEXT: seqz a2, a1
+; RV32-NEXT: snez a2, a1
+; RV32-NEXT: addi a2, a2, -1
; RV32-NEXT: and a0, a2, a0
; RV32-NEXT: mv a2, a4
; RV32-NEXT: bnez a0, .LBB52_1
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index 9e93ad0043a7e0..630d16e7c888b9 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -115,7 +115,8 @@ define i32 @utest_f64i32(double %x) {
; RV32IF-NEXT: .cfi_offset ra, -4
; RV32IF-NEXT: call __fixunsdfdi
; RV32IF-NEXT: sltiu a2, a0, -1
-; RV32IF-NEXT: seqz a1, a1
+; RV32IF-NEXT: snez a1, a1
+; RV32IF-NEXT: addi a1, a1, -1
; RV32IF-NEXT: and a1, a1, a2
; RV32IF-NEXT: addi a1, a1, -1
; RV32IF-NEXT: or a0, a1, a0
@@ -430,7 +431,8 @@ define i32 @utesth_f16i32(half %x) {
; RV32-NEXT: call __extendhfsf2
; RV32-NEXT: call __fixunssfdi
; RV32-NEXT: sltiu a2, a0, -1
-; RV32-NEXT: seqz a1, a1
+; RV32-NEXT: snez a1, a1
+; RV32-NEXT: addi a1, a1, -1
; RV32-NEXT: and a1, a1, a2
; RV32-NEXT: addi a1, a1, -1
; RV32-NEXT: or a0, a1, a0
@@ -1043,8 +1045,8 @@ define i64 @stest_f64i64(double %x) {
; RV32IF-NEXT: mv a1, a0
; RV32IF-NEXT: addi a0, sp, 8
; RV32IF-NEXT: call __fixdfti
-; RV32IF-NEXT: lw a0, 20(sp)
-; RV32IF-NEXT: lw a2, 16(sp)
+; RV32IF-NEXT: lw a0, 16(sp)
+; RV32IF-NEXT: lw a2, 20(sp)
; RV32IF-NEXT: lw a1, 12(sp)
; RV32IF-NEXT: lw a4, 8(sp)
; RV32IF-NEXT: lui a3, 524288
@@ -1052,25 +1054,25 @@ define i64 @stest_f64i64(double %x) {
; RV32IF-NEXT: beq a1, a5, .LBB18_2
; RV32IF-NEXT: # %bb.1: # %entry
; RV32IF-NEXT: sltu a6, a1, a5
-; RV32IF-NEXT: or a7, a2, a0
+; RV32IF-NEXT: or a7, a0, a2
; RV32IF-NEXT: bnez a7, .LBB18_3
; RV32IF-NEXT: j .LBB18_4
; RV32IF-NEXT: .LBB18_2:
; RV32IF-NEXT: sltiu a6, a4, -1
-; RV32IF-NEXT: or a7, a2, a0
+; RV32IF-NEXT: or a7, a0, a2
; RV32IF-NEXT: beqz a7, .LBB18_4
; RV32IF-NEXT: .LBB18_3: # %entry
-; RV32IF-NEXT: slti a6, a0, 0
+; RV32IF-NEXT: slti a6, a2, 0
; RV32IF-NEXT: .LBB18_4: # %entry
-; RV32IF-NEXT: neg a7, a6
-; RV32IF-NEXT: addi t0, a6, -1
+; RV32IF-NEXT: addi a7, a6, -1
+; RV32IF-NEXT: neg t0, a6
; RV32IF-NEXT: bnez a6, .LBB18_6
; RV32IF-NEXT: # %bb.5: # %entry
; RV32IF-NEXT: mv a1, a5
; RV32IF-NEXT: .LBB18_6: # %entry
-; RV32IF-NEXT: or a4, t0, a4
-; RV32IF-NEXT: and a5, a7, a0
-; RV32IF-NEXT: and a2, a7, a2
+; RV32IF-NEXT: or a4, a7, a4
+; RV32IF-NEXT: and a2, t0, a2
+; RV32IF-NEXT: and a5, t0, a0
; RV32IF-NEXT: beq a1, a3, .LBB18_8
; RV32IF-NEXT: # %bb.7: # %entry
; RV32IF-NEXT: sltu a0, a3, a1
@@ -1078,11 +1080,11 @@ define i64 @stest_f64i64(double %x) {
; RV32IF-NEXT: .LBB18_8:
; RV32IF-NEXT: snez a0, a4
; RV32IF-NEXT: .LBB18_9: # %entry
-; RV32IF-NEXT: and a2, a2, a5
+; RV32IF-NEXT: and a5, a5, a2
; RV32IF-NEXT: li a3, -1
-; RV32IF-NEXT: beq a2, a3, .LBB18_11
+; RV32IF-NEXT: beq a5, a3, .LBB18_11
; RV32IF-NEXT: # %bb.10: # %entry
-; RV32IF-NEXT: slti a0, a5, 0
+; RV32IF-NEXT: slti a0, a2, 0
; RV32IF-NEXT: xori a0, a0, 1
; RV32IF-NEXT: .LBB18_11: # %entry
; RV32IF-NEXT: bnez a0, .LBB18_13
@@ -1142,8 +1144,8 @@ define i64 @stest_f64i64(double %x) {
; RV32IFD-NEXT: .cfi_offset ra, -4
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call __fixdfti
-; RV32IFD-NEXT: lw a0, 20(sp)
-; RV32IFD-NEXT: lw a2, 16(sp)
+; RV32IFD-NEXT: lw a0, 16(sp)
+; RV32IFD-NEXT: lw a2, 20(sp)
; RV32IFD-NEXT: lw a1, 12(sp)
; RV32IFD-NEXT: lw a4, 8(sp)
; RV32IFD-NEXT: lui a3, 524288
@@ -1151,25 +1153,25 @@ define i64 @stest_f64i64(double %x) {
; RV32IFD-NEXT: beq a1, a5, .LBB18_2
; RV32IFD-NEXT: # %bb.1: # %entry
; RV32IFD-NEXT: sltu a6, a1, a5
-; RV32IFD-NEXT: or a7, a2, a0
+; RV32IFD-NEXT: or a7, a0, a2
; RV32IFD-NEXT: bnez a7, .LBB18_3
; RV32IFD-NEXT: j .LBB18_4
; RV32IFD-NEXT: .LBB18_2:
; RV32IFD-NEXT: sltiu a6, a4, -1
-; RV32IFD-NEXT: or a7, a2, a0
+; RV32IFD-NEXT: or a7, a0, a2
; RV32IFD-NEXT: beqz a7, .LBB18_4
; RV32IFD-NEXT: .LBB18_3: # %entry
-; RV32IFD-NEXT: slti a6, a0, 0
+; RV32IFD-NEXT: slti a6, a2, 0
; RV32IFD-NEXT: .LBB18_4: # %entry
-; RV32IFD-NEXT: neg a7, a6
-; RV32IFD-NEXT: addi t0, a6, -1
+; RV32IFD-NEXT: addi a7, a6, -1
+; RV32IFD-NEXT: neg t0, a6
; RV32IFD-NEXT: bnez a6, .LBB18_6
; RV32IFD-NEXT: # %bb.5: # %entry
; RV32IFD-NEXT: mv a1, a5
; RV32IFD-NEXT: .LBB18_6: # %entry
-; RV32IFD-NEXT: or a4, t0, a4
-; RV32IFD-NEXT: and a5, a7, a0
-; RV32IFD-NEXT: and a2, a7, a2
+; RV32IFD-NEXT: or a4, a7, a4
+; RV32IFD-NEXT: and a2, t0, a2
+; RV32IFD-NEXT: and a5, t0, a0
; RV32IFD-NEXT: beq a1, a3, .LBB18_8
; RV32IFD-NEXT: # %bb.7: # %entry
; RV32IFD-NEXT: sltu a0, a3, a1
@@ -1177,11 +1179,11 @@ define i64 @stest_f64i64(double %x) {
; RV32IFD-NEXT: .LBB18_8:
; RV32IFD-NEXT: snez a0, a4
; RV32IFD-NEXT: .LBB18_9: # %entry
-; RV32IFD-NEXT: and a2, a2, a5
+; RV32IFD-NEXT: and a5, a5, a2
; RV32IFD-NEXT: li a3, -1
-; RV32IFD-NEXT: beq a2, a3, .LBB18_11
+; RV32IFD-NEXT: beq a5, a3, .LBB18_11
; RV32IFD-NEXT: # %bb.10: # %entry
-; RV32IFD-NEXT: slti a0, a5, 0
+; RV32IFD-NEXT: slti a0, a2, 0
; RV32IFD-NEXT: xori a0, a0, 1
; RV32IFD-NEXT: .LBB18_11: # %entry
; RV32IFD-NEXT: bnez a0, .LBB18_13
@@ -1227,8 +1229,10 @@ define i64 @utest_f64i64(double %x) {
; RV32IF-NEXT: lw a1, 20(sp)
; RV32IF-NEXT: lw a2, 12(sp)
; RV32IF-NEXT: lw a3, 8(sp)
-; RV32IF-NEXT: or a4, a1, a0
-; RV32IF-NEXT: seqz a4, a4
+; RV32IF-NEXT: seqz a4, a0
+; RV32IF-NEXT: snez a5, a1
+; RV32IF-NEXT: addi a5, a5, -1
+; RV32IF-NEXT: and a4, a5, a4
; RV32IF-NEXT: xori a0, a0, 1
; RV32IF-NEXT: or a0, a0, a1
; RV32IF-NEXT: seqz a0, a0
@@ -1267,8 +1271,10 @@ define i64 @utest_f64i64(double %x) {
; RV32IFD-NEXT: lw a1, 20(sp)
; RV32IFD-NEXT: lw a2, 12(sp)
; RV32IFD-NEXT: lw a3, 8(sp)
-; RV32IFD-NEXT: or a4, a1, a0
-; RV32IFD-NEXT: seqz a4, a4
+; RV32IFD-NEXT: seqz a4, a0
+; RV32IFD-NEXT: snez a5, a1
+; RV32IFD-NEXT: addi a5, a5, -1
+; RV32IFD-NEXT: and a4, a5, a4
; RV32IFD-NEXT: xori a0, a0, 1
; RV32IFD-NEXT: or a0, a0, a1
; RV32IFD-NEXT: seqz a0, a0
@@ -1318,8 +1324,8 @@ define i64 @ustest_f64i64(double %x) {
; RV32IF-NEXT: # %bb.4: # %entry
; RV32IF-NEXT: li a0, 1
; RV32IF-NEXT: .LBB20_5: # %entry
-; RV32IF-NEXT: lw a3, 8(sp)
-; RV32IF-NEXT: lw a4, 12(sp)
+; RV32IF-NEXT: lw a4, 8(sp)
+; RV32IF-NEXT: lw a3, 12(sp)
; RV32IF-NEXT: and a5, a2, a1
; RV32IF-NEXT: beqz a5, .LBB20_7
; RV32IF-NEXT: # %bb.6: # %entry
@@ -1328,17 +1334,18 @@ define i64 @ustest_f64i64(double %x) {
; RV32IF-NEXT: .LBB20_7:
; RV32IF-NEXT: snez a1, a0
; RV32IF-NEXT: .LBB20_8: # %entry
-; RV32IF-NEXT: and a4, a2, a4
+; RV32IF-NEXT: and a3, a2, a3
; RV32IF-NEXT: or a0, a0, a5
-; RV32IF-NEXT: and a2, a2, a3
+; RV32IF-NEXT: and a2, a2, a4
; RV32IF-NEXT: bnez a0, .LBB20_10
; RV32IF-NEXT: # %bb.9:
-; RV32IF-NEXT: or a0, a2, a4
-; RV32IF-NEXT: snez a1, a0
+; RV32IF-NEXT: snez a0, a3
+; RV32IF-NEXT: snez a1, a2
+; RV32IF-NEXT: or a1, a1, a0
; RV32IF-NEXT: .LBB20_10: # %entry
; RV32IF-NEXT: neg a1, a1
; RV32IF-NEXT: and a0, a1, a2
-; RV32IF-NEXT: and a1, a1, a4
+; RV32IF-NEXT: and a1, a1, a3
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 32
; RV32IF-NEXT: ret
@@ -1397,8 +1404,8 @@ define i64 @ustest_f64i64(double %x) {
; RV32IFD-NEXT: # %bb.4: # %entry
; RV32IFD-NEXT: li a0, 1
; RV32IFD-NEXT: .LBB20_5: # %entry
-; RV32IFD-NEXT: lw a3, 8(sp)
-; RV32IFD-NEXT: lw a4, 12(sp)
+; RV32IFD-NEXT: lw a4, 8(sp)
+; RV32IFD-NEXT: lw a3, 12(sp)
; RV32IFD-NEXT: and a5, a2, a1
; RV32IFD-NEXT: beqz a5, .LBB20_7
; RV32IFD-NEXT: # %bb.6: # %entry
@@ -1407,17 +1414,18 @@ define i64 @ustest_f64i64(double %x) {
; RV32IFD-NEXT: .LBB20_7:
; RV32IFD-NEXT: snez a1, a0
; RV32IFD-NEXT: .LBB20_8: # %entry
-; RV32IFD-NEXT: and a4, a2, a4
+; RV32IFD-NEXT: and a3, a2, a3
; RV32IFD-NEXT: or a0, a0, a5
-; RV32IFD-NEXT: and a2, a2, a3
+; RV32IFD-NEXT: and a2, a2, a4
; RV32IFD-NEXT: bnez a0, .LBB20_10
; RV32IFD-NEXT: # %bb.9:
-; RV32IFD-NEXT: or a0, a2, a4
-; RV32IFD-NEXT: snez a1, a0
+; RV32IFD-NEXT: snez a0, a3
+; RV32IFD-NEXT: snez a1, a2
+; RV32IFD-NEXT: or a1, a1, a0
; RV32IFD-NEXT: .LBB20_10: # %entry
; RV32IFD-NEXT: neg a1, a1
; RV32IFD-NEXT: and a0, a1, a2
-; RV32IFD-NEXT: and a1, a1, a4
+; RV32IFD-NEXT: and a1, a1, a3
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
@@ -1440,8 +1448,8 @@ define i64 @stest_f32i64(float %x) {
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti
-; RV32-NEXT: lw a0, 20(sp)
-; RV32-NEXT: lw a2, 16(sp)
+; RV32-NEXT: lw a0, 16(sp)
+; RV32-NEXT: lw a2, 20(sp)
; RV32-NEXT: lw a1, 12(sp)
; RV32-NEXT: lw a4, 8(sp)
; RV32-NEXT: lui a3, 524288
@@ -1449,25 +1457,25 @@ define i64 @stest_f32i64(float %x) {
; RV32-NEXT: beq a1, a5, .LBB21_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: or a7, a2, a0
+; RV32-NEXT: or a7, a0, a2
; RV32-NEXT: bnez a7, .LBB21_3
; RV32-NEXT: j .LBB21_4
; RV32-NEXT: .LBB21_2:
; RV32-NEXT: sltiu a6, a4, -1
-; RV32-NEXT: or a7, a2, a0
+; RV32-NEXT: or a7, a0, a2
; RV32-NEXT: beqz a7, .LBB21_4
; RV32-NEXT: .LBB21_3: # %entry
-; RV32-NEXT: slti a6, a0, 0
+; RV32-NEXT: slti a6, a2, 0
; RV32-NEXT: .LBB21_4: # %entry
-; RV32-NEXT: neg a7, a6
-; RV32-NEXT: addi t0, a6, -1
+; RV32-NEXT: addi a7, a6, -1
+; RV32-NEXT: neg t0, a6
; RV32-NEXT: bnez a6, .LBB21_6
; RV32-NEXT: # %bb.5: # %entry
; RV32-NEXT: mv a1, a5
; RV32-NEXT: .LBB21_6: # %entry
-; RV32-NEXT: or a4, t0, a4
-; RV32-NEXT: and a5, a7, a0
-; RV32-NEXT: and a2, a7, a2
+; RV32-NEXT: or a4, a7, a4
+; RV32-NEXT: and a2, t0, a2
+; RV32-NEXT: and a5, t0, a0
; RV32-NEXT: beq a1, a3, .LBB21_8
; RV32-NEXT: # %bb.7: # %entry
; RV32-NEXT: sltu a0, a3, a1
@@ -1475,11 +1483,11 @@ define i64 @stest_f32i64(float %x) {
; RV32-NEXT: .LBB21_8:
; RV32-NEXT: snez a0, a4
; RV32-NEXT: .LBB21_9: # %entry
-; RV32-NEXT: and a2, a2, a5
+; RV32-NEXT: and a5, a5, a2
; RV32-NEXT: li a3, -1
-; RV32-NEXT: beq a2, a3, .LBB21_11
+; RV32-NEXT: beq a5, a3, .LBB21_11
; RV32-NEXT: # %bb.10: # %entry
-; RV32-NEXT: slti a0, a5, 0
+; RV32-NEXT: slti a0, a2, 0
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: .LBB21_11: # %entry
; RV32-NEXT: bnez a0, .LBB21_13
@@ -1523,8 +1531,10 @@ define i64 @utest_f32i64(float %x) {
; RV32-NEXT: lw a1, 20(sp)
; RV32-NEXT: lw a2, 12(sp)
; RV32-NEXT: lw a3, 8(sp)
-; RV32-NEXT: or a4, a1, a0
-; RV32-NEXT: seqz a4, a4
+; RV32-NEXT: seqz a4, a0
+; RV32-NEXT: snez a5, a1
+; RV32-NEXT: addi a5, a5, -1
+; RV32-NEXT: and a4, a5, a4
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: seqz a0, a0
@@ -1586,8 +1596,8 @@ define i64 @ustest_f32i64(float %x) {
; RV32-NEXT: # %bb.4: # %entry
; RV32-NEXT: li a0, 1
; RV32-NEXT: .LBB23_5: # %entry
-; RV32-NEXT: lw a3, 8(sp)
-; RV32-NEXT: lw a4, 12(sp)
+; RV32-NEXT: lw a4, 8(sp)
+; RV32-NEXT: lw a3, 12(sp)
; RV32-NEXT: and a5, a2, a1
; RV32-NEXT: beqz a5, .LBB23_7
; RV32-NEXT: # %bb.6: # %entry
@@ -1596,17 +1606,18 @@ define i64 @ustest_f32i64(float %x) {
; RV32-NEXT: .LBB23_7:
; RV32-NEXT: snez a1, a0
; RV32-NEXT: .LBB23_8: # %entry
-; RV32-NEXT: and a4, a2, a4
+; RV32-NEXT: and a3, a2, a3
; RV32-NEXT: or a0, a0, a5
-; RV32-NEXT: and a2, a2, a3
+; RV32-NEXT: and a2, a2, a4
; RV32-NEXT: bnez a0, .LBB23_10
; RV32-NEXT: # %bb.9:
-; RV32-NEXT: or a0, a2, a4
-; RV32-NEXT: snez a1, a0
+; RV32-NEXT: snez a0, a3
+; RV32-NEXT: snez a1, a2
+; RV32-NEXT: or a1, a1, a0
; RV32-NEXT: .LBB23_10: # %entry
; RV32-NEXT: neg a1, a1
; RV32-NEXT: and a0, a1, a2
-; RV32-NEXT: and a1, a1, a4
+; RV32-NEXT: and a1, a1, a3
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
@@ -1657,8 +1668,8 @@ define i64 @stest_f16i64(half %x) {
; RV32-NEXT: call __extendhfsf2
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti
-; RV32-NEXT: lw a0, 20(sp)
-; RV32-NEXT: lw a2, 16(sp)
+; RV32-NEXT: lw a0, 16(sp)
+; RV32-NEXT: lw a2, 20(sp)
; RV32-NEXT: lw a1, 12(sp)
; RV32-NEXT: lw a4, 8(sp)
; RV32-NEXT: lui a3, 524288
@@ -1666,25 +1677,25 @@ define i64 @stest_f16i64(half %x) {
; RV32-NEXT: beq a1, a5, .LBB24_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: or a7, a2, a0
+; RV32-NEXT: or a7, a0, a2
; RV32-NEXT: bnez a7, .LBB24_3
; RV32-NEXT: j .LBB24_4
; RV32-NEXT: .LBB24_2:
; RV32-NEXT: sltiu a6, a4, -1
-; RV32-NEXT: or a7, a2, a0
+; RV32-NEXT: or a7, a0, a2
; RV32-NEXT: beqz a7, .LBB24_4
; RV32-NEXT: .LBB24_3: # %entry
-; RV32-NEXT: slti a6, a0, 0
+; RV32-NEXT: slti a6, a2, 0
; RV32-NEXT: .LBB24_4: # %entry
-; RV32-NEXT: neg a7, a6
-; RV32-NEXT: addi t0, a6, -1
+; RV32-NEXT: addi a7, a6, -1
+; RV32-NEXT: neg t0, a6
; RV32-NEXT: bnez a6, .LBB24_6
; RV32-NEXT: # %bb.5: # %entry
; RV32-NEXT: mv a1, a5
; RV32-NEXT: .LBB24_6: # %entry
-; RV32-NEXT: or a4, t0, a4
-; RV32-NEXT: and a5, a7, a0
-; RV32-NEXT: and a2, a7, a2
+; RV32-NEXT: or a4, a7, a4
+; RV32-NEXT: and a2, t0, a2
+; RV32-NEXT: and a5, t0, a0
; RV32-NEXT: beq a1, a3, .LBB24_8
; RV32-NEXT: # %bb.7: # %entry
; RV32-NEXT: sltu a0, a3, a1
@@ -1692,11 +1703,11 @@ define i64 @stest_f16i64(half %x) {
; RV32-NEXT: .LBB24_8:
; RV32-NEXT: snez a0, a4
; RV32-NEXT: .LBB24_9: # %entry
-; RV32-NEXT: and a2, a2, a5
+; RV32-NEXT: and a5, a5, a2
; RV32-NEXT: li a3, -1
-; RV32-NEXT: beq a2, a3, .LBB24_11
+; RV32-NEXT: beq a5, a3, .LBB24_11
; RV32-NEXT: # %bb.10: # %entry
-; RV32-NEXT: slti a0, a5, 0
+; RV32-NEXT: slti a0, a2, 0
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: .LBB24_11: # %entry
; RV32-NEXT: bnez a0, .LBB24_13
@@ -1772,8 +1783,10 @@ define i64 @utesth_f16i64(half %x) {
; RV32-NEXT: lw a1, 20(sp)
; RV32-NEXT: lw a2, 12(sp)
; RV32-NEXT: lw a3, 8(sp)
-; RV32-NEXT: or a4, a1, a0
-; RV32-NEXT: seqz a4, a4
+; RV32-NEXT: seqz a4, a0
+; RV32-NEXT: snez a5, a1
+; RV32-NEXT: addi a5, a5, -1
+; RV32-NEXT: and a4, a5, a4
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: seqz a0, a0
@@ -1837,8 +1850,8 @@ define i64 @ustest_f16i64(half %x) {
; RV32-NEXT: # %bb.4: # %entry
; RV32-NEXT: li a0, 1
; RV32-NEXT: .LBB26_5: # %entry
-; RV32-NEXT: lw a3, 8(sp)
-; RV32-NEXT: lw a4, 12(sp)
+; RV32-NEXT: lw a4, 8(sp)
+; RV32-NEXT: lw a3, 12(sp)
; RV32-NEXT: and a5, a2, a1
; RV32-NEXT: beqz a5, .LBB26_7
; RV32-NEXT: # %bb.6: # %entry
@@ -1847,17 +1860,18 @@ define i64 @ustest_f16i64(half %x) {
; RV32-NEXT: .LBB26_7:
; RV32-NEXT: snez a1, a0
; RV32-NEXT: .LBB26_8: # %entry
-; RV32-NEXT: and a4, a2, a4
+; RV32-NEXT: and a3, a2, a3
; RV32-NEXT: or a0, a0, a5
-; RV32-NEXT: and a2, a2, a3
+; RV32-NEXT: and a2, a2, a4
; RV32-NEXT: bnez a0, .LBB26_10
; RV32-NEXT: # %bb.9:
-; RV32-NEXT: or a0, a2, a4
-; RV32-NEXT: snez a1, a0
+; RV32-NEXT: snez a0, a3
+; RV32-NEXT: snez a1, a2
+; RV32-NEXT: or a1, a1, a0
; RV32-NEXT: .LBB26_10: # %entry
; RV32-NEXT: neg a1, a1
; RV32-NEXT: and a0, a1, a2
-; RV32-NEXT: and a1, a1, a4
+; RV32-NEXT: and a1, a1, a3
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
@@ -2891,8 +2905,8 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IF-NEXT: mv a1, a0
; RV32IF-NEXT: addi a0, sp, 8
; RV32IF-NEXT: call __fixdfti
-; RV32IF-NEXT: lw a0, 20(sp)
-; RV32IF-NEXT: lw a2, 16(sp)
+; RV32IF-NEXT: lw a0, 16(sp)
+; RV32IF-NEXT: lw a2, 20(sp)
; RV32IF-NEXT: lw a1, 12(sp)
; RV32IF-NEXT: lw a4, 8(sp)
; RV32IF-NEXT: lui a3, 524288
@@ -2900,25 +2914,25 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IF-NEXT: beq a1, a5, .LBB45_2
; RV32IF-NEXT: # %bb.1: # %entry
; RV32IF-NEXT: sltu a6, a1, a5
-; RV32IF-NEXT: or a7, a2, a0
+; RV32IF-NEXT: or a7, a0, a2
; RV32IF-NEXT: bnez a7, .LBB45_3
; RV32IF-NEXT: j .LBB45_4
; RV32IF-NEXT: .LBB45_2:
; RV32IF-NEXT: sltiu a6, a4, -1
-; RV32IF-NEXT: or a7, a2, a0
+; RV32IF-NEXT: or a7, a0, a2
; RV32IF-NEXT: beqz a7, .LBB45_4
; RV32IF-NEXT: .LBB45_3: # %entry
-; RV32IF-NEXT: slti a6, a0, 0
+; RV32IF-NEXT: slti a6, a2, 0
; RV32IF-NEXT: .LBB45_4: # %entry
-; RV32IF-NEXT: neg a7, a6
-; RV32IF-NEXT: addi t0, a6, -1
+; RV32IF-NEXT: addi a7, a6, -1
+; RV32IF-NEXT: neg t0, a6
; RV32IF-NEXT: bnez a6, .LBB45_6
; RV32IF-NEXT: # %bb.5: # %entry
; RV32IF-NEXT: mv a1, a5
; RV32IF-NEXT: .LBB45_6: # %entry
-; RV32IF-NEXT: or a4, t0, a4
-; RV32IF-NEXT: and a5, a7, a0
-; RV32IF-NEXT: and a2, a7, a2
+; RV32IF-NEXT: or a4, a7, a4
+; RV32IF-NEXT: and a2, t0, a2
+; RV32IF-NEXT: and a5, t0, a0
; RV32IF-NEXT: beq a1, a3, .LBB45_8
; RV32IF-NEXT: # %bb.7: # %entry
; RV32IF-NEXT: sltu a0, a3, a1
@@ -2926,11 +2940,11 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IF-NEXT: .LBB45_8:
; RV32IF-NEXT: snez a0, a4
; RV32IF-NEXT: .LBB45_9: # %entry
-; RV32IF-NEXT: and a2, a2, a5
+; RV32IF-NEXT: and a5, a5, a2
; RV32IF-NEXT: li a3, -1
-; RV32IF-NEXT: beq a2, a3, .LBB45_11
+; RV32IF-NEXT: beq a5, a3, .LBB45_11
; RV32IF-NEXT: # %bb.10: # %entry
-; RV32IF-NEXT: slti a0, a5, 0
+; RV32IF-NEXT: slti a0, a2, 0
; RV32IF-NEXT: xori a0, a0, 1
; RV32IF-NEXT: .LBB45_11: # %entry
; RV32IF-NEXT: bnez a0, .LBB45_13
@@ -2990,8 +3004,8 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: .cfi_offset ra, -4
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call __fixdfti
-; RV32IFD-NEXT: lw a0, 20(sp)
-; RV32IFD-NEXT: lw a2, 16(sp)
+; RV32IFD-NEXT: lw a0, 16(sp)
+; RV32IFD-NEXT: lw a2, 20(sp)
; RV32IFD-NEXT: lw a1, 12(sp)
; RV32IFD-NEXT: lw a4, 8(sp)
; RV32IFD-NEXT: lui a3, 524288
@@ -2999,25 +3013,25 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: beq a1, a5, .LBB45_2
; RV32IFD-NEXT: # %bb.1: # %entry
; RV32IFD-NEXT: sltu a6, a1, a5
-; RV32IFD-NEXT: or a7, a2, a0
+; RV32IFD-NEXT: or a7, a0, a2
; RV32IFD-NEXT: bnez a7, .LBB45_3
; RV32IFD-NEXT: j .LBB45_4
; RV32IFD-NEXT: .LBB45_2:
; RV32IFD-NEXT: sltiu a6, a4, -1
-; RV32IFD-NEXT: or a7, a2, a0
+; RV32IFD-NEXT: or a7, a0, a2
; RV32IFD-NEXT: beqz a7, .LBB45_4
; RV32IFD-NEXT: .LBB45_3: # %entry
-; RV32IFD-NEXT: slti a6, a0, 0
+; RV32IFD-NEXT: slti a6, a2, 0
; RV32IFD-NEXT: .LBB45_4: # %entry
-; RV32IFD-NEXT: neg a7, a6
-; RV32IFD-NEXT: addi t0, a6, -1
+; RV32IFD-NEXT: addi a7, a6, -1
+; RV32IFD-NEXT: neg t0, a6
; RV32IFD-NEXT: bnez a6, .LBB45_6
; RV32IFD-NEXT: # %bb.5: # %entry
; RV32IFD-NEXT: mv a1, a5
; RV32IFD-NEXT: .LBB45_6: # %entry
-; RV32IFD-NEXT: or a4, t0, a4
-; RV32IFD-NEXT: and a5, a7, a0
-; RV32IFD-NEXT: and a2, a7, a2
+; RV32IFD-NEXT: or a4, a7, a4
+; RV32IFD-NEXT: and a2, t0, a2
+; RV32IFD-NEXT: and a5, t0, a0
; RV32IFD-NEXT: beq a1, a3, .LBB45_8
; RV32IFD-NEXT: # %bb.7: # %entry
; RV32IFD-NEXT: sltu a0, a3, a1
@@ -3025,11 +3039,11 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: .LBB45_8:
; RV32IFD-NEXT: snez a0, a4
; RV32IFD-NEXT: .LBB45_9: # %entry
-; RV32IFD-NEXT: and a2, a2, a5
+; RV32IFD-NEXT: and a5, a5, a2
; RV32IFD-NEXT: li a3, -1
-; RV32IFD-NEXT: beq a2, a3, .LBB45_11
+; RV32IFD-NEXT: beq a5, a3, .LBB45_11
; RV32IFD-NEXT: # %bb.10: # %entry
-; RV32IFD-NEXT: slti a0, a5, 0
+; RV32IFD-NEXT: slti a0, a2, 0
; RV32IFD-NEXT: xori a0, a0, 1
; RV32IFD-NEXT: .LBB45_11: # %entry
; RV32IFD-NEXT: bnez a0, .LBB45_13
@@ -3073,8 +3087,10 @@ define i64 @utest_f64i64_mm(double %x) {
; RV32IF-NEXT: lw a1, 20(sp)
; RV32IF-NEXT: lw a2, 12(sp)
; RV32IF-NEXT: lw a3, 8(sp)
-; RV32IF-NEXT: or a4, a1, a0
-; RV32IF-NEXT: seqz a4, a4
+; RV32IF-NEXT: seqz a4, a0
+; RV32IF-NEXT: snez a5, a1
+; RV32IF-NEXT: addi a5, a5, -1
+; RV32IF-NEXT: and a4, a5, a4
; RV32IF-NEXT: xori a0, a0, 1
; RV32IF-NEXT: or a0, a0, a1
; RV32IF-NEXT: seqz a0, a0
@@ -3113,8 +3129,10 @@ define i64 @utest_f64i64_mm(double %x) {
; RV32IFD-NEXT: lw a1, 20(sp)
; RV32IFD-NEXT: lw a2, 12(sp)
; RV32IFD-NEXT: lw a3, 8(sp)
-; RV32IFD-NEXT: or a4, a1, a0
-; RV32IFD-NEXT: seqz a4, a4
+; RV32IFD-NEXT: seqz a4, a0
+; RV32IFD-NEXT: snez a5, a1
+; RV32IFD-NEXT: addi a5, a5, -1
+; RV32IFD-NEXT: and a4, a5, a4
; RV32IFD-NEXT: xori a0, a0, 1
; RV32IFD-NEXT: or a0, a0, a1
; RV32IFD-NEXT: seqz a0, a0
@@ -3144,30 +3162,30 @@ define i64 @ustest_f64i64_mm(double %x) {
; RV32IF-NEXT: mv a1, a0
; RV32IF-NEXT: addi a0, sp, 8
; RV32IF-NEXT: call __fixdfti
-; RV32IF-NEXT: lw a0, 8(sp)
-; RV32IF-NEXT: lw a1, 12(sp)
-; RV32IF-NEXT: lw a2, 20(sp)
+; RV32IF-NEXT: lw a0, 20(sp)
+; RV32IF-NEXT: lw a1, 8(sp)
+; RV32IF-NEXT: lw a2, 12(sp)
; RV32IF-NEXT: lw a3, 16(sp)
-; RV32IF-NEXT: beqz a2, .LBB47_2
+; RV32IF-NEXT: beqz a0, .LBB47_2
; RV32IF-NEXT: # %bb.1: # %entry
-; RV32IF-NEXT: slti a4, a2, 0
+; RV32IF-NEXT: slti a4, a0, 0
; RV32IF-NEXT: j .LBB47_3
; RV32IF-NEXT: .LBB47_2:
; RV32IF-NEXT: seqz a4, a3
; RV32IF-NEXT: .LBB47_3: # %entry
; RV32IF-NEXT: xori a3, a3, 1
-; RV32IF-NEXT: or a3, a3, a2
+; RV32IF-NEXT: or a3, a3, a0
; RV32IF-NEXT: seqz a3, a3
; RV32IF-NEXT: addi a3, a3, -1
; RV32IF-NEXT: and a3, a3, a4
; RV32IF-NEXT: neg a3, a3
+; RV32IF-NEXT: and a2, a3, a2
; RV32IF-NEXT: and a1, a3, a1
; RV32IF-NEXT: and a0, a3, a0
-; RV32IF-NEXT: and a2, a3, a2
-; RV32IF-NEXT: slti a2, a2, 0
-; RV32IF-NEXT: addi a2, a2, -1
-; RV32IF-NEXT: and a0, a2, a0
-; RV32IF-NEXT: and a1, a2, a1
+; RV32IF-NEXT: slti a0, a0, 0
+; RV32IF-NEXT: addi a3, a0, -1
+; RV32IF-NEXT: and a0, a3, a1
+; RV32IF-NEXT: and a1, a3, a2
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 32
; RV32IF-NEXT: ret
@@ -3202,30 +3220,30 @@ define i64 @ustest_f64i64_mm(double %x) {
; RV32IFD-NEXT: .cfi_offset ra, -4
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call __fixdfti
-; RV32IFD-NEXT: lw a0, 8(sp)
-; RV32IFD-NEXT: lw a1, 12(sp)
-; RV32IFD-NEXT: lw a2, 20(sp)
+; RV32IFD-NEXT: lw a0, 20(sp)
+; RV32IFD-NEXT: lw a1, 8(sp)
+; RV32IFD-NEXT: lw a2, 12(sp)
; RV32IFD-NEXT: lw a3, 16(sp)
-; RV32IFD-NEXT: beqz a2, .LBB47_2
+; RV32IFD-NEXT: beqz a0, .LBB47_2
; RV32IFD-NEXT: # %bb.1: # %entry
-; RV32IFD-NEXT: slti a4, a2, 0
+; RV32IFD-NEXT: slti a4, a0, 0
; RV32IFD-NEXT: j .LBB47_3
; RV32IFD-NEXT: .LBB47_2:
; RV32IFD-NEXT: seqz a4, a3
; RV32IFD-NEXT: .LBB47_3: # %entry
; RV32IFD-NEXT: xori a3, a3, 1
-; RV32IFD-NEXT: or a3, a3, a2
+; RV32IFD-NEXT: or a3, a3, a0
; RV32IFD-NEXT: seqz a3, a3
; RV32IFD-NEXT: addi a3, a3, -1
; RV32IFD-NEXT: and a3, a3, a4
; RV32IFD-NEXT: neg a3, a3
+; RV32IFD-NEXT: and a2, a3, a2
; RV32IFD-NEXT: and a1, a3, a1
; RV32IFD-NEXT: and a0, a3, a0
-; RV32IFD-NEXT: and a2, a3, a2
-; RV32IFD-NEXT: slti a2, a2, 0
-; RV32IFD-NEXT: addi a2, a2, -1
-; RV32IFD-NEXT: and a0, a2, a0
-; RV32IFD-NEXT: and a1, a2, a1
+; RV32IFD-NEXT: slti a0, a0, 0
+; RV32IFD-NEXT: addi a3, a0, -1
+; RV32IFD-NEXT: and a0, a3, a1
+; RV32IFD-NEXT: and a1, a3, a2
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
@@ -3246,8 +3264,8 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti
-; RV32-NEXT: lw a0, 20(sp)
-; RV32-NEXT: lw a2, 16(sp)
+; RV32-NEXT: lw a0, 16(sp)
+; RV32-NEXT: lw a2, 20(sp)
; RV32-NEXT: lw a1, 12(sp)
; RV32-NEXT: lw a4, 8(sp)
; RV32-NEXT: lui a3, 524288
@@ -3255,25 +3273,25 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: beq a1, a5, .LBB48_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: or a7, a2, a0
+; RV32-NEXT: or a7, a0, a2
; RV32-NEXT: bnez a7, .LBB48_3
; RV32-NEXT: j .LBB48_4
; RV32-NEXT: .LBB48_2:
; RV32-NEXT: sltiu a6, a4, -1
-; RV32-NEXT: or a7, a2, a0
+; RV32-NEXT: or a7, a0, a2
; RV32-NEXT: beqz a7, .LBB48_4
; RV32-NEXT: .LBB48_3: # %entry
-; RV32-NEXT: slti a6, a0, 0
+; RV32-NEXT: slti a6, a2, 0
; RV32-NEXT: .LBB48_4: # %entry
-; RV32-NEXT: neg a7, a6
-; RV32-NEXT: addi t0, a6, -1
+; RV32-NEXT: addi a7, a6, -1
+; RV32-NEXT: neg t0, a6
; RV32-NEXT: bnez a6, .LBB48_6
; RV32-NEXT: # %bb.5: # %entry
; RV32-NEXT: mv a1, a5
; RV32-NEXT: .LBB48_6: # %entry
-; RV32-NEXT: or a4, t0, a4
-; RV32-NEXT: and a5, a7, a0
-; RV32-NEXT: and a2, a7, a2
+; RV32-NEXT: or a4, a7, a4
+; RV32-NEXT: and a2, t0, a2
+; RV32-NEXT: and a5, t0, a0
; RV32-NEXT: beq a1, a3, .LBB48_8
; RV32-NEXT: # %bb.7: # %entry
; RV32-NEXT: sltu a0, a3, a1
@@ -3281,11 +3299,11 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: .LBB48_8:
; RV32-NEXT: snez a0, a4
; RV32-NEXT: .LBB48_9: # %entry
-; RV32-NEXT: and a2, a2, a5
+; RV32-NEXT: and a5, a5, a2
; RV32-NEXT: li a3, -1
-; RV32-NEXT: beq a2, a3, .LBB48_11
+; RV32-NEXT: beq a5, a3, .LBB48_11
; RV32-NEXT: # %bb.10: # %entry
-; RV32-NEXT: slti a0, a5, 0
+; RV32-NEXT: slti a0, a2, 0
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: .LBB48_11: # %entry
; RV32-NEXT: bnez a0, .LBB48_13
@@ -3327,8 +3345,10 @@ define i64 @utest_f32i64_mm(float %x) {
; RV32-NEXT: lw a1, 20(sp)
; RV32-NEXT: lw a2, 12(sp)
; RV32-NEXT: lw a3, 8(sp)
-; RV32-NEXT: or a4, a1, a0
-; RV32-NEXT: seqz a4, a4
+; RV32-NEXT: seqz a4, a0
+; RV32-NEXT: snez a5, a1
+; RV32-NEXT: addi a5, a5, -1
+; RV32-NEXT: and a4, a5, a4
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: seqz a0, a0
@@ -3370,30 +3390,30 @@ define i64 @ustest_f32i64_mm(float %x) {
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti
-; RV32-NEXT: lw a0, 8(sp)
-; RV32-NEXT: lw a1, 12(sp)
-; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: lw a1, 8(sp)
+; RV32-NEXT: lw a2, 12(sp)
; RV32-NEXT: lw a3, 16(sp)
-; RV32-NEXT: beqz a2, .LBB50_2
+; RV32-NEXT: beqz a0, .LBB50_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: slti a4, a2, 0
+; RV32-NEXT: slti a4, a0, 0
; RV32-NEXT: j .LBB50_3
; RV32-NEXT: .LBB50_2:
; RV32-NEXT: seqz a4, a3
; RV32-NEXT: .LBB50_3: # %entry
; RV32-NEXT: xori a3, a3, 1
-; RV32-NEXT: or a3, a3, a2
+; RV32-NEXT: or a3, a3, a0
; RV32-NEXT: seqz a3, a3
; RV32-NEXT: addi a3, a3, -1
; RV32-NEXT: and a3, a3, a4
; RV32-NEXT: neg a3, a3
+; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: and a1, a3, a1
; RV32-NEXT: and a0, a3, a0
-; RV32-NEXT: and a2, a3, a2
-; RV32-NEXT: slti a2, a2, 0
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a0, a2, a0
-; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: slti a0, a0, 0
+; RV32-NEXT: addi a3, a0, -1
+; RV32-NEXT: and a0, a3, a1
+; RV32-NEXT: and a1, a3, a2
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
@@ -3437,8 +3457,8 @@ define i64 @stest_f16i64_mm(half %x) {
; RV32-NEXT: call __extendhfsf2
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti
-; RV32-NEXT: lw a0, 20(sp)
-; RV32-NEXT: lw a2, 16(sp)
+; RV32-NEXT: lw a0, 16(sp)
+; RV32-NEXT: lw a2, 20(sp)
; RV32-NEXT: lw a1, 12(sp)
; RV32-NEXT: lw a4, 8(sp)
; RV32-NEXT: lui a3, 524288
@@ -3446,25 +3466,25 @@ define i64 @stest_f16i64_mm(half %x) {
; RV32-NEXT: beq a1, a5, .LBB51_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: or a7, a2, a0
+; RV32-NEXT: or a7, a0, a2
; RV32-NEXT: bnez a7, .LBB51_3
; RV32-NEXT: j .LBB51_4
; RV32-NEXT: .LBB51_2:
; RV32-NEXT: sltiu a6, a4, -1
-; RV32-NEXT: or a7, a2, a0
+; RV32-NEXT: or a7, a0, a2
; RV32-NEXT: beqz a7, .LBB51_4
; RV32-NEXT: .LBB51_3: # %entry
-; RV32-NEXT: slti a6, a0, 0
+; RV32-NEXT: slti a6, a2, 0
; RV32-NEXT: .LBB51_4: # %entry
-; RV32-NEXT: neg a7, a6
-; RV32-NEXT: addi t0, a6, -1
+; RV32-NEXT: addi a7, a6, -1
+; RV32-NEXT: neg t0, a6
; RV32-NEXT: bnez a6, .LBB51_6
; RV32-NEXT: # %bb.5: # %entry
; RV32-NEXT: mv a1, a5
; RV32-NEXT: .LBB51_6: # %entry
-; RV32-NEXT: or a4, t0, a4
-; RV32-NEXT: and a5, a7, a0
-; RV32-NEXT: and a2, a7, a2
+; RV32-NEXT: or a4, a7, a4
+; RV32-NEXT: and a2, t0, a2
+; RV32-NEXT: and a5, t0, a0
; RV32-NEXT: beq a1, a3, .LBB51_8
; RV32-NEXT: # %bb.7: # %entry
; RV32-NEXT: sltu a0, a3, a1
@@ -3472,11 +3492,11 @@ define i64 @stest_f16i64_mm(half %x) {
; RV32-NEXT: .LBB51_8:
; RV32-NEXT: snez a0, a4
; RV32-NEXT: .LBB51_9: # %entry
-; RV32-NEXT: and a2, a2, a5
+; RV32-NEXT: and a5, a5, a2
; RV32-NEXT: li a3, -1
-; RV32-NEXT: beq a2, a3, .LBB51_11
+; RV32-NEXT: beq a5, a3, .LBB51_11
; RV32-NEXT: # %bb.10: # %entry
-; RV32-NEXT: slti a0, a5, 0
+; RV32-NEXT: slti a0, a2, 0
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: .LBB51_11: # %entry
; RV32-NEXT: bnez a0, .LBB51_13
@@ -3550,8 +3570,10 @@ define i64 @utesth_f16i64_mm(half %x) {
; RV32-NEXT: lw a1, 20(sp)
; RV32-NEXT: lw a2, 12(sp)
; RV32-NEXT: lw a3, 8(sp)
-; RV32-NEXT: or a4, a1, a0
-; RV32-NEXT: seqz a4, a4
+; RV32-NEXT: seqz a4, a0
+; RV32-NEXT: snez a5, a1
+; RV32-NEXT: addi a5, a5, -1
+; RV32-NEXT: and a4, a5, a4
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: seqz a0, a0
@@ -3595,30 +3617,30 @@ define i64 @ustest_f16i64_mm(half %x) {
; RV32-NEXT: call __extendhfsf2
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti
-; RV32-NEXT: lw a0, 8(sp)
-; RV32-NEXT: lw a1, 12(sp)
-; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: lw a1, 8(sp)
+; RV32-NEXT: lw a2, 12(sp)
; RV32-NEXT: lw a3, 16(sp)
-; RV32-NEXT: beqz a2, .LBB53_2
+; RV32-NEXT: beqz a0, .LBB53_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: slti a4, a2, 0
+; RV32-NEXT: slti a4, a0, 0
; RV32-NEXT: j .LBB53_3
; RV32-NEXT: .LBB53_2:
; RV32-NEXT: seqz a4, a3
; RV32-NEXT: .LBB53_3: # %entry
; RV32-NEXT: xori a3, a3, 1
-; RV32-NEXT: or a3, a3, a2
+; RV32-NEXT: or a3, a3, a0
; RV32-NEXT: seqz a3, a3
; RV32-NEXT: addi a3, a3, -1
; RV32-NEXT: and a3, a3, a4
; RV32-NEXT: neg a3, a3
+; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: and a1, a3, a1
; RV32-NEXT: and a0, a3, a0
-; RV32-NEXT: and a2, a3, a2
-; RV32-NEXT: slti a2, a2, 0
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a0, a2, a0
-; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: slti a0, a0, 0
+; RV32-NEXT: addi a3, a0, -1
+; RV32-NEXT: and a0, a3, a1
+; RV32-NEXT: and a1, a3, a2
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll
index daaceed3941c53..518cd7da2ab771 100644
--- a/llvm/test/CodeGen/RISCV/half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/half-convert.ll
@@ -2145,41 +2145,48 @@ define i64 @fcvt_l_h(half %a) nounwind {
define i64 @fcvt_l_h_sat(half %a) nounwind {
; RV32IZFH-LABEL: fcvt_l_h_sat:
; RV32IZFH: # %bb.0: # %start
-; RV32IZFH-NEXT: addi sp, sp, -16
-; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: addi sp, sp, -32
+; RV32IZFH-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: lui a0, %hi(.LCPI10_0)
+; RV32IZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32IZFH-NEXT: fcvt.s.h fs0, fa0
+; RV32IZFH-NEXT: flt.s s0, fa5, fs0
+; RV32IZFH-NEXT: neg s1, s0
; RV32IZFH-NEXT: lui a0, 913408
; RV32IZFH-NEXT: fmv.w.x fa5, a0
-; RV32IZFH-NEXT: fle.s s0, fa5, fs0
+; RV32IZFH-NEXT: fle.s s2, fa5, fs0
+; RV32IZFH-NEXT: neg s3, s2
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixsfdi
+; RV32IZFH-NEXT: and a0, s3, a0
+; RV32IZFH-NEXT: or a0, s1, a0
+; RV32IZFH-NEXT: feq.s a2, fs0, fs0
+; RV32IZFH-NEXT: neg a2, a2
; RV32IZFH-NEXT: lui a4, 524288
-; RV32IZFH-NEXT: lui a2, 524288
-; RV32IZFH-NEXT: beqz s0, .LBB10_2
+; RV32IZFH-NEXT: li a5, 1
+; RV32IZFH-NEXT: lui a3, 524288
+; RV32IZFH-NEXT: bne s2, a5, .LBB10_2
; RV32IZFH-NEXT: # %bb.1: # %start
-; RV32IZFH-NEXT: mv a2, a1
+; RV32IZFH-NEXT: mv a3, a1
; RV32IZFH-NEXT: .LBB10_2: # %start
-; RV32IZFH-NEXT: lui a1, %hi(.LCPI10_0)
-; RV32IZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
-; RV32IZFH-NEXT: flt.s a3, fa5, fs0
-; RV32IZFH-NEXT: beqz a3, .LBB10_4
+; RV32IZFH-NEXT: and a0, a2, a0
+; RV32IZFH-NEXT: beqz s0, .LBB10_4
; RV32IZFH-NEXT: # %bb.3:
-; RV32IZFH-NEXT: addi a2, a4, -1
+; RV32IZFH-NEXT: addi a3, a4, -1
; RV32IZFH-NEXT: .LBB10_4: # %start
-; RV32IZFH-NEXT: feq.s a1, fs0, fs0
-; RV32IZFH-NEXT: neg a4, a1
-; RV32IZFH-NEXT: and a1, a4, a2
-; RV32IZFH-NEXT: neg a2, a3
-; RV32IZFH-NEXT: neg a3, s0
-; RV32IZFH-NEXT: and a0, a3, a0
-; RV32IZFH-NEXT: or a0, a2, a0
-; RV32IZFH-NEXT: and a0, a4, a0
-; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: addi sp, sp, 16
+; RV32IZFH-NEXT: and a1, a2, a3
+; RV32IZFH-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: addi sp, sp, 32
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: fcvt_l_h_sat:
@@ -2193,41 +2200,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
;
; RV32IDZFH-LABEL: fcvt_l_h_sat:
; RV32IDZFH: # %bb.0: # %start
-; RV32IDZFH-NEXT: addi sp, sp, -16
-; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IDZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IDZFH-NEXT: addi sp, sp, -32
+; RV32IDZFH-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IDZFH-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IDZFH-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IDZFH-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IDZFH-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IDZFH-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IDZFH-NEXT: lui a0, %hi(.LCPI10_0)
+; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32IDZFH-NEXT: fcvt.s.h fs0, fa0
+; RV32IDZFH-NEXT: flt.s s0, fa5, fs0
+; RV32IDZFH-NEXT: neg s1, s0
; RV32IDZFH-NEXT: lui a0, 913408
; RV32IDZFH-NEXT: fmv.w.x fa5, a0
-; RV32IDZFH-NEXT: fle.s s0, fa5, fs0
+; RV32IDZFH-NEXT: fle.s s2, fa5, fs0
+; RV32IDZFH-NEXT: neg s3, s2
; RV32IDZFH-NEXT: fmv.s fa0, fs0
; RV32IDZFH-NEXT: call __fixsfdi
+; RV32IDZFH-NEXT: and a0, s3, a0
+; RV32IDZFH-NEXT: or a0, s1, a0
+; RV32IDZFH-NEXT: feq.s a2, fs0, fs0
+; RV32IDZFH-NEXT: neg a2, a2
; RV32IDZFH-NEXT: lui a4, 524288
-; RV32IDZFH-NEXT: lui a2, 524288
-; RV32IDZFH-NEXT: beqz s0, .LBB10_2
+; RV32IDZFH-NEXT: li a5, 1
+; RV32IDZFH-NEXT: lui a3, 524288
+; RV32IDZFH-NEXT: bne s2, a5, .LBB10_2
; RV32IDZFH-NEXT: # %bb.1: # %start
-; RV32IDZFH-NEXT: mv a2, a1
+; RV32IDZFH-NEXT: mv a3, a1
; RV32IDZFH-NEXT: .LBB10_2: # %start
-; RV32IDZFH-NEXT: lui a1, %hi(.LCPI10_0)
-; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
-; RV32IDZFH-NEXT: flt.s a3, fa5, fs0
-; RV32IDZFH-NEXT: beqz a3, .LBB10_4
+; RV32IDZFH-NEXT: and a0, a2, a0
+; RV32IDZFH-NEXT: beqz s0, .LBB10_4
; RV32IDZFH-NEXT: # %bb.3:
-; RV32IDZFH-NEXT: addi a2, a4, -1
+; RV32IDZFH-NEXT: addi a3, a4, -1
; RV32IDZFH-NEXT: .LBB10_4: # %start
-; RV32IDZFH-NEXT: feq.s a1, fs0, fs0
-; RV32IDZFH-NEXT: neg a4, a1
-; RV32IDZFH-NEXT: and a1, a4, a2
-; RV32IDZFH-NEXT: neg a2, a3
-; RV32IDZFH-NEXT: neg a3, s0
-; RV32IDZFH-NEXT: and a0, a3, a0
-; RV32IDZFH-NEXT: or a0, a2, a0
-; RV32IDZFH-NEXT: and a0, a4, a0
-; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IDZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IDZFH-NEXT: and a1, a2, a3
+; RV32IDZFH-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IDZFH-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IDZFH-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IDZFH-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IDZFH-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IDZFH-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IDZFH-NEXT: addi sp, sp, 16
+; RV32IDZFH-NEXT: addi sp, sp, 32
; RV32IDZFH-NEXT: ret
;
; RV64IDZFH-LABEL: fcvt_l_h_sat:
@@ -2263,8 +2277,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
; RV32IZHINX-NEXT: feq.s a2, s0, s0
; RV32IZHINX-NEXT: neg a2, a2
; RV32IZHINX-NEXT: lui a4, 524288
+; RV32IZHINX-NEXT: li a5, 1
; RV32IZHINX-NEXT: lui a3, 524288
-; RV32IZHINX-NEXT: beqz s3, .LBB10_2
+; RV32IZHINX-NEXT: bne s3, a5, .LBB10_2
; RV32IZHINX-NEXT: # %bb.1: # %start
; RV32IZHINX-NEXT: mv a3, a1
; RV32IZHINX-NEXT: .LBB10_2: # %start
@@ -2316,8 +2331,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
; RV32IZDINXZHINX-NEXT: feq.s a2, s0, s0
; RV32IZDINXZHINX-NEXT: neg a2, a2
; RV32IZDINXZHINX-NEXT: lui a4, 524288
+; RV32IZDINXZHINX-NEXT: li a5, 1
; RV32IZDINXZHINX-NEXT: lui a3, 524288
-; RV32IZDINXZHINX-NEXT: beqz s3, .LBB10_2
+; RV32IZDINXZHINX-NEXT: bne s3, a5, .LBB10_2
; RV32IZDINXZHINX-NEXT: # %bb.1: # %start
; RV32IZDINXZHINX-NEXT: mv a3, a1
; RV32IZDINXZHINX-NEXT: .LBB10_2: # %start
@@ -2448,42 +2464,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
;
; RV32ID-ILP32-LABEL: fcvt_l_h_sat:
; RV32ID-ILP32: # %bb.0: # %start
-; RV32ID-ILP32-NEXT: addi sp, sp, -16
-; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32ID-ILP32-NEXT: addi sp, sp, -32
+; RV32ID-ILP32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32ID-ILP32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32ID-ILP32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32ID-ILP32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32ID-ILP32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32ID-ILP32-NEXT: call __extendhfsf2
+; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI10_0)
+; RV32ID-ILP32-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0
+; RV32ID-ILP32-NEXT: fsw fa4, 8(sp) # 4-byte Folded Spill
+; RV32ID-ILP32-NEXT: flt.s s0, fa5, fa4
+; RV32ID-ILP32-NEXT: neg s1, s0
; RV32ID-ILP32-NEXT: lui a1, 913408
; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1
-; RV32ID-ILP32-NEXT: fsw fa4, 4(sp) # 4-byte Folded Spill
-; RV32ID-ILP32-NEXT: fle.s s0, fa5, fa4
+; RV32ID-ILP32-NEXT: fle.s s2, fa5, fa4
+; RV32ID-ILP32-NEXT: neg s3, s2
; RV32ID-ILP32-NEXT: call __fixsfdi
+; RV32ID-ILP32-NEXT: and a0, s3, a0
+; RV32ID-ILP32-NEXT: or a0, s1, a0
+; RV32ID-ILP32-NEXT: flw fa5, 8(sp) # 4-byte Folded Reload
+; RV32ID-ILP32-NEXT: feq.s a2, fa5, fa5
+; RV32ID-ILP32-NEXT: neg a2, a2
; RV32ID-ILP32-NEXT: lui a4, 524288
-; RV32ID-ILP32-NEXT: lui a2, 524288
-; RV32ID-ILP32-NEXT: beqz s0, .LBB10_2
+; RV32ID-ILP32-NEXT: li a5, 1
+; RV32ID-ILP32-NEXT: lui a3, 524288
+; RV32ID-ILP32-NEXT: bne s2, a5, .LBB10_2
; RV32ID-ILP32-NEXT: # %bb.1: # %start
-; RV32ID-ILP32-NEXT: mv a2, a1
+; RV32ID-ILP32-NEXT: mv a3, a1
; RV32ID-ILP32-NEXT: .LBB10_2: # %start
-; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI10_0)
-; RV32ID-ILP32-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
-; RV32ID-ILP32-NEXT: flw fa4, 4(sp) # 4-byte Folded Reload
-; RV32ID-ILP32-NEXT: flt.s a3, fa5, fa4
-; RV32ID-ILP32-NEXT: fmv.s fa5, fa4
-; RV32ID-ILP32-NEXT: beqz a3, .LBB10_4
+; RV32ID-ILP32-NEXT: and a0, a2, a0
+; RV32ID-ILP32-NEXT: beqz s0, .LBB10_4
; RV32ID-ILP32-NEXT: # %bb.3:
-; RV32ID-ILP32-NEXT: addi a2, a4, -1
+; RV32ID-ILP32-NEXT: addi a3, a4, -1
; RV32ID-ILP32-NEXT: .LBB10_4: # %start
-; RV32ID-ILP32-NEXT: feq.s a1, fa5, fa5
-; RV32ID-ILP32-NEXT: neg a4, a1
-; RV32ID-ILP32-NEXT: and a1, a4, a2
-; RV32ID-ILP32-NEXT: neg a2, a3
-; RV32ID-ILP32-NEXT: neg a3, s0
-; RV32ID-ILP32-NEXT: and a0, a3, a0
-; RV32ID-ILP32-NEXT: or a0, a2, a0
-; RV32ID-ILP32-NEXT: and a0, a4, a0
-; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32ID-ILP32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32ID-ILP32-NEXT: addi sp, sp, 16
+; RV32ID-ILP32-NEXT: and a1, a2, a3
+; RV32ID-ILP32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32ID-ILP32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32ID-ILP32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32ID-ILP32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32ID-ILP32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32ID-ILP32-NEXT: addi sp, sp, 32
; RV32ID-ILP32-NEXT: ret
;
; RV64ID-LP64-LABEL: fcvt_l_h_sat:
@@ -2503,41 +2525,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
;
; RV32ID-LABEL: fcvt_l_h_sat:
; RV32ID: # %bb.0: # %start
-; RV32ID-NEXT: addi sp, sp, -16
-; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32ID-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: addi sp, sp, -32
+; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32ID-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
; RV32ID-NEXT: call __extendhfsf2
+; RV32ID-NEXT: lui a0, %hi(.LCPI10_0)
+; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32ID-NEXT: fmv.s fs0, fa0
+; RV32ID-NEXT: flt.s s0, fa5, fa0
+; RV32ID-NEXT: neg s1, s0
; RV32ID-NEXT: lui a0, 913408
; RV32ID-NEXT: fmv.w.x fa5, a0
-; RV32ID-NEXT: fle.s s0, fa5, fa0
+; RV32ID-NEXT: fle.s s2, fa5, fa0
+; RV32ID-NEXT: neg s3, s2
; RV32ID-NEXT: call __fixsfdi
+; RV32ID-NEXT: and a0, s3, a0
+; RV32ID-NEXT: or a0, s1, a0
+; RV32ID-NEXT: feq.s a2, fs0, fs0
+; RV32ID-NEXT: neg a2, a2
; RV32ID-NEXT: lui a4, 524288
-; RV32ID-NEXT: lui a2, 524288
-; RV32ID-NEXT: beqz s0, .LBB10_2
+; RV32ID-NEXT: li a5, 1
+; RV32ID-NEXT: lui a3, 524288
+; RV32ID-NEXT: bne s2, a5, .LBB10_2
; RV32ID-NEXT: # %bb.1: # %start
-; RV32ID-NEXT: mv a2, a1
+; RV32ID-NEXT: mv a3, a1
; RV32ID-NEXT: .LBB10_2: # %start
-; RV32ID-NEXT: lui a1, %hi(.LCPI10_0)
-; RV32ID-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
-; RV32ID-NEXT: flt.s a3, fa5, fs0
-; RV32ID-NEXT: beqz a3, .LBB10_4
+; RV32ID-NEXT: and a0, a2, a0
+; RV32ID-NEXT: beqz s0, .LBB10_4
; RV32ID-NEXT: # %bb.3:
-; RV32ID-NEXT: addi a2, a4, -1
+; RV32ID-NEXT: addi a3, a4, -1
; RV32ID-NEXT: .LBB10_4: # %start
-; RV32ID-NEXT: feq.s a1, fs0, fs0
-; RV32ID-NEXT: neg a4, a1
-; RV32ID-NEXT: and a1, a4, a2
-; RV32ID-NEXT: neg a2, a3
-; RV32ID-NEXT: neg a3, s0
-; RV32ID-NEXT: and a0, a3, a0
-; RV32ID-NEXT: or a0, a2, a0
-; RV32ID-NEXT: and a0, a4, a0
-; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32ID-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32ID-NEXT: and a1, a2, a3
+; RV32ID-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32ID-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32ID-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32ID-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32ID-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32ID-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32ID-NEXT: addi sp, sp, 16
+; RV32ID-NEXT: addi sp, sp, 32
; RV32ID-NEXT: ret
;
; RV64ID-LABEL: fcvt_l_h_sat:
@@ -2556,41 +2585,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
;
; RV32IFZFHMIN-LABEL: fcvt_l_h_sat:
; RV32IFZFHMIN: # %bb.0: # %start
-; RV32IFZFHMIN-NEXT: addi sp, sp, -16
-; RV32IFZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IFZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IFZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IFZFHMIN-NEXT: addi sp, sp, -32
+; RV32IFZFHMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IFZFHMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IFZFHMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IFZFHMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IFZFHMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
+; RV32IFZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
+; RV32IFZFHMIN-NEXT: lui a0, %hi(.LCPI10_0)
+; RV32IFZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32IFZFHMIN-NEXT: fcvt.s.h fs0, fa0
+; RV32IFZFHMIN-NEXT: flt.s s0, fa5, fs0
+; RV32IFZFHMIN-NEXT: neg s1, s0
; RV32IFZFHMIN-NEXT: lui a0, 913408
; RV32IFZFHMIN-NEXT: fmv.w.x fa5, a0
-; RV32IFZFHMIN-NEXT: fle.s s0, fa5, fs0
+; RV32IFZFHMIN-NEXT: fle.s s2, fa5, fs0
+; RV32IFZFHMIN-NEXT: neg s3, s2
; RV32IFZFHMIN-NEXT: fmv.s fa0, fs0
; RV32IFZFHMIN-NEXT: call __fixsfdi
+; RV32IFZFHMIN-NEXT: and a0, s3, a0
+; RV32IFZFHMIN-NEXT: or a0, s1, a0
+; RV32IFZFHMIN-NEXT: feq.s a2, fs0, fs0
+; RV32IFZFHMIN-NEXT: neg a2, a2
; RV32IFZFHMIN-NEXT: lui a4, 524288
-; RV32IFZFHMIN-NEXT: lui a2, 524288
-; RV32IFZFHMIN-NEXT: beqz s0, .LBB10_2
+; RV32IFZFHMIN-NEXT: li a5, 1
+; RV32IFZFHMIN-NEXT: lui a3, 524288
+; RV32IFZFHMIN-NEXT: bne s2, a5, .LBB10_2
; RV32IFZFHMIN-NEXT: # %bb.1: # %start
-; RV32IFZFHMIN-NEXT: mv a2, a1
+; RV32IFZFHMIN-NEXT: mv a3, a1
; RV32IFZFHMIN-NEXT: .LBB10_2: # %start
-; RV32IFZFHMIN-NEXT: lui a1, %hi(.LCPI10_0)
-; RV32IFZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
-; RV32IFZFHMIN-NEXT: flt.s a3, fa5, fs0
-; RV32IFZFHMIN-NEXT: beqz a3, .LBB10_4
+; RV32IFZFHMIN-NEXT: and a0, a2, a0
+; RV32IFZFHMIN-NEXT: beqz s0, .LBB10_4
; RV32IFZFHMIN-NEXT: # %bb.3:
-; RV32IFZFHMIN-NEXT: addi a2, a4, -1
+; RV32IFZFHMIN-NEXT: addi a3, a4, -1
; RV32IFZFHMIN-NEXT: .LBB10_4: # %start
-; RV32IFZFHMIN-NEXT: feq.s a1, fs0, fs0
-; RV32IFZFHMIN-NEXT: neg a4, a1
-; RV32IFZFHMIN-NEXT: and a1, a4, a2
-; RV32IFZFHMIN-NEXT: neg a2, a3
-; RV32IFZFHMIN-NEXT: neg a3, s0
-; RV32IFZFHMIN-NEXT: and a0, a3, a0
-; RV32IFZFHMIN-NEXT: or a0, a2, a0
-; RV32IFZFHMIN-NEXT: and a0, a4, a0
-; RV32IFZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IFZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IFZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
-; RV32IFZFHMIN-NEXT: addi sp, sp, 16
+; RV32IFZFHMIN-NEXT: and a1, a2, a3
+; RV32IFZFHMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFZFHMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IFZFHMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IFZFHMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IFZFHMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
+; RV32IFZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
+; RV32IFZFHMIN-NEXT: addi sp, sp, 32
; RV32IFZFHMIN-NEXT: ret
;
; CHECK64-IZFHMIN-LABEL: fcvt_l_h_sat:
@@ -2605,41 +2641,48 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
;
; RV32IDZFHMIN-LABEL: fcvt_l_h_sat:
; RV32IDZFHMIN: # %bb.0: # %start
-; RV32IDZFHMIN-NEXT: addi sp, sp, -16
-; RV32IDZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IDZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IDZFHMIN-NEXT: addi sp, sp, -32
+; RV32IDZFHMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32IDZFHMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; RV32IDZFHMIN-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
+; RV32IDZFHMIN-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
+; RV32IDZFHMIN-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32IDZFHMIN-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IDZFHMIN-NEXT: lui a0, %hi(.LCPI10_0)
+; RV32IDZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a0)
; RV32IDZFHMIN-NEXT: fcvt.s.h fs0, fa0
+; RV32IDZFHMIN-NEXT: flt.s s0, fa5, fs0
+; RV32IDZFHMIN-NEXT: neg s1, s0
; RV32IDZFHMIN-NEXT: lui a0, 913408
; RV32IDZFHMIN-NEXT: fmv.w.x fa5, a0
-; RV32IDZFHMIN-NEXT: fle.s s0, fa5, fs0
+; RV32IDZFHMIN-NEXT: fle.s s2, fa5, fs0
+; RV32IDZFHMIN-NEXT: neg s3, s2
; RV32IDZFHMIN-NEXT: fmv.s fa0, fs0
; RV32IDZFHMIN-NEXT: call __fixsfdi
+; RV32IDZFHMIN-NEXT: and a0, s3, a0
+; RV32IDZFHMIN-NEXT: or a0, s1, a0
+; RV32IDZFHMIN-NEXT: feq.s a2, fs0, fs0
+; RV32IDZFHMIN-NEXT: neg a2, a2
; RV32IDZFHMIN-NEXT: lui a4, 524288
-; RV32IDZFHMIN-NEXT: lui a2, 524288
-; RV32IDZFHMIN-NEXT: beqz s0, .LBB10_2
+; RV32IDZFHMIN-NEXT: li a5, 1
+; RV32IDZFHMIN-NEXT: lui a3, 524288
+; RV32IDZFHMIN-NEXT: bne s2, a5, .LBB10_2
; RV32IDZFHMIN-NEXT: # %bb.1: # %start
-; RV32IDZFHMIN-NEXT: mv a2, a1
+; RV32IDZFHMIN-NEXT: mv a3, a1
; RV32IDZFHMIN-NEXT: .LBB10_2: # %start
-; RV32IDZFHMIN-NEXT: lui a1, %hi(.LCPI10_0)
-; RV32IDZFHMIN-NEXT: flw fa5, %lo(.LCPI10_0)(a1)
-; RV32IDZFHMIN-NEXT: flt.s a3, fa5, fs0
-; RV32IDZFHMIN-NEXT: beqz a3, .LBB10_4
+; RV32IDZFHMIN-NEXT: and a0, a2, a0
+; RV32IDZFHMIN-NEXT: beqz s0, .LBB10_4
; RV32IDZFHMIN-NEXT: # %bb.3:
-; RV32IDZFHMIN-NEXT: addi a2, a4, -1
+; RV32IDZFHMIN-NEXT: addi a3, a4, -1
; RV32IDZFHMIN-NEXT: .LBB10_4: # %start
-; RV32IDZFHMIN-NEXT: feq.s a1, fs0, fs0
-; RV32IDZFHMIN-NEXT: neg a4, a1
-; RV32IDZFHMIN-NEXT: and a1, a4, a2
-; RV32IDZFHMIN-NEXT: neg a2, a3
-; RV32IDZFHMIN-NEXT: neg a3, s0
-; RV32IDZFHMIN-NEXT: and a0, a3, a0
-; RV32IDZFHMIN-NEXT: or a0, a2, a0
-; RV32IDZFHMIN-NEXT: and a0, a4, a0
-; RV32IDZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IDZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IDZFHMIN-NEXT: and a1, a2, a3
+; RV32IDZFHMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IDZFHMIN-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IDZFHMIN-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IDZFHMIN-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IDZFHMIN-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32IDZFHMIN-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IDZFHMIN-NEXT: addi sp, sp, 16
+; RV32IDZFHMIN-NEXT: addi sp, sp, 32
; RV32IDZFHMIN-NEXT: ret
;
; CHECK32-IZHINXMIN-LABEL: fcvt_l_h_sat:
@@ -2666,8 +2709,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
; CHECK32-IZHINXMIN-NEXT: feq.s a2, s0, s0
; CHECK32-IZHINXMIN-NEXT: neg a2, a2
; CHECK32-IZHINXMIN-NEXT: lui a4, 524288
+; CHECK32-IZHINXMIN-NEXT: li a5, 1
; CHECK32-IZHINXMIN-NEXT: lui a3, 524288
-; CHECK32-IZHINXMIN-NEXT: beqz s3, .LBB10_2
+; CHECK32-IZHINXMIN-NEXT: bne s3, a5, .LBB10_2
; CHECK32-IZHINXMIN-NEXT: # %bb.1: # %start
; CHECK32-IZHINXMIN-NEXT: mv a3, a1
; CHECK32-IZHINXMIN-NEXT: .LBB10_2: # %start
@@ -2720,8 +2764,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
; CHECK32-IZDINXZHINXMIN-NEXT: feq.s a2, s0, s0
; CHECK32-IZDINXZHINXMIN-NEXT: neg a2, a2
; CHECK32-IZDINXZHINXMIN-NEXT: lui a4, 524288
+; CHECK32-IZDINXZHINXMIN-NEXT: li a5, 1
; CHECK32-IZDINXZHINXMIN-NEXT: lui a3, 524288
-; CHECK32-IZDINXZHINXMIN-NEXT: beqz s3, .LBB10_2
+; CHECK32-IZDINXZHINXMIN-NEXT: bne s3, a5, .LBB10_2
; CHECK32-IZDINXZHINXMIN-NEXT: # %bb.1: # %start
; CHECK32-IZDINXZHINXMIN-NEXT: mv a3, a1
; CHECK32-IZDINXZHINXMIN-NEXT: .LBB10_2: # %start
@@ -2939,7 +2984,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
; RV32IZFH-NEXT: neg s0, a0
; RV32IZFH-NEXT: fmv.w.x fa5, zero
; RV32IZFH-NEXT: fle.s a0, fa5, fa0
-; RV32IZFH-NEXT: neg s1, a0
+; RV32IZFH-NEXT: xori a0, a0, 1
+; RV32IZFH-NEXT: addi s1, a0, -1
; RV32IZFH-NEXT: call __fixunssfdi
; RV32IZFH-NEXT: and a0, s1, a0
; RV32IZFH-NEXT: or a0, s0, a0
@@ -2973,7 +3019,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
; RV32IDZFH-NEXT: neg s0, a0
; RV32IDZFH-NEXT: fmv.w.x fa5, zero
; RV32IDZFH-NEXT: fle.s a0, fa5, fa0
-; RV32IDZFH-NEXT: neg s1, a0
+; RV32IDZFH-NEXT: xori a0, a0, 1
+; RV32IDZFH-NEXT: addi s1, a0, -1
; RV32IDZFH-NEXT: call __fixunssfdi
; RV32IDZFH-NEXT: and a0, s1, a0
; RV32IDZFH-NEXT: or a0, s0, a0
@@ -3006,7 +3053,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
; RV32IZHINX-NEXT: flt.s a1, a1, a0
; RV32IZHINX-NEXT: neg s0, a1
; RV32IZHINX-NEXT: fle.s a1, zero, a0
-; RV32IZHINX-NEXT: neg s1, a1
+; RV32IZHINX-NEXT: xori a1, a1, 1
+; RV32IZHINX-NEXT: addi s1, a1, -1
; RV32IZHINX-NEXT: call __fixunssfdi
; RV32IZHINX-NEXT: and a0, s1, a0
; RV32IZHINX-NEXT: or a0, s0, a0
@@ -3039,7 +3087,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
; RV32IZDINXZHINX-NEXT: flt.s a1, a1, a0
; RV32IZDINXZHINX-NEXT: neg s0, a1
; RV32IZDINXZHINX-NEXT: fle.s a1, zero, a0
-; RV32IZDINXZHINX-NEXT: neg s1, a1
+; RV32IZDINXZHINX-NEXT: xori a1, a1, 1
+; RV32IZDINXZHINX-NEXT: addi s1, a1, -1
; RV32IZDINXZHINX-NEXT: call __fixunssfdi
; RV32IZDINXZHINX-NEXT: and a0, s1, a0
; RV32IZDINXZHINX-NEXT: or a0, s0, a0
@@ -3138,7 +3187,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
; RV32ID-ILP32-NEXT: neg s0, a1
; RV32ID-ILP32-NEXT: fmv.w.x fa5, zero
; RV32ID-ILP32-NEXT: fle.s a1, fa5, fa4
-; RV32ID-ILP32-NEXT: neg s1, a1
+; RV32ID-ILP32-NEXT: xori a1, a1, 1
+; RV32ID-ILP32-NEXT: addi s1, a1, -1
; RV32ID-ILP32-NEXT: call __fixunssfdi
; RV32ID-ILP32-NEXT: and a0, s1, a0
; RV32ID-ILP32-NEXT: or a0, s0, a0
@@ -3178,7 +3228,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
; RV32ID-NEXT: neg s0, a0
; RV32ID-NEXT: fmv.w.x fa5, zero
; RV32ID-NEXT: fle.s a0, fa5, fa0
-; RV32ID-NEXT: neg s1, a0
+; RV32ID-NEXT: xori a0, a0, 1
+; RV32ID-NEXT: addi s1, a0, -1
; RV32ID-NEXT: call __fixunssfdi
; RV32ID-NEXT: and a0, s1, a0
; RV32ID-NEXT: or a0, s0, a0
@@ -3217,7 +3268,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
; CHECK32-IZFHMIN-NEXT: neg s0, a0
; CHECK32-IZFHMIN-NEXT: fmv.w.x fa5, zero
; CHECK32-IZFHMIN-NEXT: fle.s a0, fa5, fa0
-; CHECK32-IZFHMIN-NEXT: neg s1, a0
+; CHECK32-IZFHMIN-NEXT: xori a0, a0, 1
+; CHECK32-IZFHMIN-NEXT: addi s1, a0, -1
; CHECK32-IZFHMIN-NEXT: call __fixunssfdi
; CHECK32-IZFHMIN-NEXT: and a0, s1, a0
; CHECK32-IZFHMIN-NEXT: or a0, s0, a0
@@ -3251,7 +3303,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
; CHECK32-IZHINXMIN-NEXT: flt.s a1, a1, a0
; CHECK32-IZHINXMIN-NEXT: neg s0, a1
; CHECK32-IZHINXMIN-NEXT: fle.s a1, zero, a0
-; CHECK32-IZHINXMIN-NEXT: neg s1, a1
+; CHECK32-IZHINXMIN-NEXT: xori a1, a1, 1
+; CHECK32-IZHINXMIN-NEXT: addi s1, a1, -1
; CHECK32-IZHINXMIN-NEXT: call __fixunssfdi
; CHECK32-IZHINXMIN-NEXT: and a0, s1, a0
; CHECK32-IZHINXMIN-NEXT: or a0, s0, a0
@@ -3285,7 +3338,8 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
; CHECK32-IZDINXZHINXMIN-NEXT: flt.s a1, a1, a0
; CHECK32-IZDINXZHINXMIN-NEXT: neg s0, a1
; CHECK32-IZDINXZHINXMIN-NEXT: fle.s a1, zero, a0
-; CHECK32-IZDINXZHINXMIN-NEXT: neg s1, a1
+; CHECK32-IZDINXZHINXMIN-NEXT: xori a1, a1, 1
+; CHECK32-IZDINXZHINXMIN-NEXT: addi s1, a1, -1
; CHECK32-IZDINXZHINXMIN-NEXT: call __fixunssfdi
; CHECK32-IZDINXZHINXMIN-NEXT: and a0, s1, a0
; CHECK32-IZDINXZHINXMIN-NEXT: or a0, s0, a0
diff --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
index 3f385909b0b510..647af5f5b87438 100644
--- a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
+++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
@@ -108,38 +108,41 @@ define i64 @test_floor_si64(half %x) nounwind {
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: fcvt.s.h fs0, fa0
; RV32IZFH-NEXT: lui a0, 913408
; RV32IZFH-NEXT: fmv.w.x fa5, a0
; RV32IZFH-NEXT: fle.s s0, fa5, fs0
+; RV32IZFH-NEXT: neg s1, s0
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixsfdi
+; RV32IZFH-NEXT: lui a2, %hi(.LCPI1_1)
+; RV32IZFH-NEXT: flw fa5, %lo(.LCPI1_1)(a2)
+; RV32IZFH-NEXT: and a0, s1, a0
+; RV32IZFH-NEXT: flt.s a3, fa5, fs0
+; RV32IZFH-NEXT: neg a2, a3
+; RV32IZFH-NEXT: or a0, a2, a0
+; RV32IZFH-NEXT: feq.s a2, fs0, fs0
+; RV32IZFH-NEXT: neg a2, a2
+; RV32IZFH-NEXT: lui a5, 524288
+; RV32IZFH-NEXT: li a6, 1
; RV32IZFH-NEXT: lui a4, 524288
-; RV32IZFH-NEXT: lui a2, 524288
-; RV32IZFH-NEXT: beqz s0, .LBB1_4
+; RV32IZFH-NEXT: bne s0, a6, .LBB1_4
; RV32IZFH-NEXT: # %bb.3:
-; RV32IZFH-NEXT: mv a2, a1
+; RV32IZFH-NEXT: mv a4, a1
; RV32IZFH-NEXT: .LBB1_4:
-; RV32IZFH-NEXT: lui a1, %hi(.LCPI1_1)
-; RV32IZFH-NEXT: flw fa5, %lo(.LCPI1_1)(a1)
-; RV32IZFH-NEXT: flt.s a3, fa5, fs0
-; RV32IZFH-NEXT: beqz a3, .LBB1_6
-; RV32IZFH-NEXT: # %bb.5:
-; RV32IZFH-NEXT: addi a2, a4, -1
-; RV32IZFH-NEXT: .LBB1_6:
-; RV32IZFH-NEXT: feq.s a1, fs0, fs0
-; RV32IZFH-NEXT: neg a4, a1
-; RV32IZFH-NEXT: and a1, a4, a2
-; RV32IZFH-NEXT: neg a2, s0
; RV32IZFH-NEXT: and a0, a2, a0
-; RV32IZFH-NEXT: neg a2, a3
-; RV32IZFH-NEXT: or a0, a2, a0
-; RV32IZFH-NEXT: and a0, a4, a0
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: addi sp, sp, 16
+; RV32IZFH-NEXT: beqz a3, .LBB1_6
+; RV32IZFH-NEXT: # %bb.5:
+; RV32IZFH-NEXT: addi a4, a5, -1
+; RV32IZFH-NEXT: .LBB1_6:
+; RV32IZFH-NEXT: and a1, a2, a4
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: test_floor_si64:
@@ -177,16 +180,17 @@ define i64 @test_floor_si64(half %x) nounwind {
; RV32IZHINX-NEXT: lui a2, %hi(.LCPI1_1)
; RV32IZHINX-NEXT: lw a2, %lo(.LCPI1_1)(a2)
; RV32IZHINX-NEXT: and a0, s2, a0
-; RV32IZHINX-NEXT: flt.s a4, a2, s0
-; RV32IZHINX-NEXT: neg a2, a4
+; RV32IZHINX-NEXT: flt.s a3, a2, s0
+; RV32IZHINX-NEXT: neg a2, a3
; RV32IZHINX-NEXT: or a0, a2, a0
; RV32IZHINX-NEXT: feq.s a2, s0, s0
; RV32IZHINX-NEXT: neg a2, a2
; RV32IZHINX-NEXT: lui a5, 524288
-; RV32IZHINX-NEXT: lui a3, 524288
-; RV32IZHINX-NEXT: beqz s1, .LBB1_4
+; RV32IZHINX-NEXT: li a6, 1
+; RV32IZHINX-NEXT: lui a4, 524288
+; RV32IZHINX-NEXT: bne s1, a6, .LBB1_4
; RV32IZHINX-NEXT: # %bb.3:
-; RV32IZHINX-NEXT: mv a3, a1
+; RV32IZHINX-NEXT: mv a4, a1
; RV32IZHINX-NEXT: .LBB1_4:
; RV32IZHINX-NEXT: and a0, a2, a0
; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -194,11 +198,11 @@ define i64 @test_floor_si64(half %x) nounwind {
; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: addi sp, sp, 16
-; RV32IZHINX-NEXT: beqz a4, .LBB1_6
+; RV32IZHINX-NEXT: beqz a3, .LBB1_6
; RV32IZHINX-NEXT: # %bb.5:
-; RV32IZHINX-NEXT: addi a3, a5, -1
+; RV32IZHINX-NEXT: addi a4, a5, -1
; RV32IZHINX-NEXT: .LBB1_6:
-; RV32IZHINX-NEXT: and a1, a2, a3
+; RV32IZHINX-NEXT: and a1, a2, a4
; RV32IZHINX-NEXT: ret
;
; RV64IZHINX-LABEL: test_floor_si64:
@@ -236,39 +240,42 @@ define i64 @test_floor_si64(half %x) nounwind {
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5
; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5
; RV32IZFHMIN-NEXT: lui a0, 913408
; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0
; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0
+; RV32IZFHMIN-NEXT: neg s1, s0
; RV32IZFHMIN-NEXT: fmv.s fa0, fs0
; RV32IZFHMIN-NEXT: call __fixsfdi
+; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI1_0)
+; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI1_0)(a2)
+; RV32IZFHMIN-NEXT: and a0, s1, a0
+; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
+; RV32IZFHMIN-NEXT: neg a2, a3
+; RV32IZFHMIN-NEXT: or a0, a2, a0
+; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0
+; RV32IZFHMIN-NEXT: neg a2, a2
+; RV32IZFHMIN-NEXT: lui a5, 524288
+; RV32IZFHMIN-NEXT: li a6, 1
; RV32IZFHMIN-NEXT: lui a4, 524288
-; RV32IZFHMIN-NEXT: lui a2, 524288
-; RV32IZFHMIN-NEXT: beqz s0, .LBB1_4
+; RV32IZFHMIN-NEXT: bne s0, a6, .LBB1_4
; RV32IZFHMIN-NEXT: # %bb.3:
-; RV32IZFHMIN-NEXT: mv a2, a1
+; RV32IZFHMIN-NEXT: mv a4, a1
; RV32IZFHMIN-NEXT: .LBB1_4:
-; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI1_0)
-; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI1_0)(a1)
-; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
-; RV32IZFHMIN-NEXT: beqz a3, .LBB1_6
-; RV32IZFHMIN-NEXT: # %bb.5:
-; RV32IZFHMIN-NEXT: addi a2, a4, -1
-; RV32IZFHMIN-NEXT: .LBB1_6:
-; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0
-; RV32IZFHMIN-NEXT: neg a4, a1
-; RV32IZFHMIN-NEXT: and a1, a4, a2
-; RV32IZFHMIN-NEXT: neg a2, s0
; RV32IZFHMIN-NEXT: and a0, a2, a0
-; RV32IZFHMIN-NEXT: neg a2, a3
-; RV32IZFHMIN-NEXT: or a0, a2, a0
-; RV32IZFHMIN-NEXT: and a0, a4, a0
; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: addi sp, sp, 16
+; RV32IZFHMIN-NEXT: beqz a3, .LBB1_6
+; RV32IZFHMIN-NEXT: # %bb.5:
+; RV32IZFHMIN-NEXT: addi a4, a5, -1
+; RV32IZFHMIN-NEXT: .LBB1_6:
+; RV32IZFHMIN-NEXT: and a1, a2, a4
; RV32IZFHMIN-NEXT: ret
;
; RV64IZFHMIN-LABEL: test_floor_si64:
@@ -320,16 +327,17 @@ define i64 @test_floor_si64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI1_0)
; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI1_0)(a2)
; RV32IZHINXMIN-NEXT: and a0, s2, a0
-; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0
-; RV32IZHINXMIN-NEXT: neg a2, a4
+; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0
+; RV32IZHINXMIN-NEXT: neg a2, a3
; RV32IZHINXMIN-NEXT: or a0, a2, a0
; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0
; RV32IZHINXMIN-NEXT: neg a2, a2
; RV32IZHINXMIN-NEXT: lui a5, 524288
-; RV32IZHINXMIN-NEXT: lui a3, 524288
-; RV32IZHINXMIN-NEXT: beqz s1, .LBB1_4
+; RV32IZHINXMIN-NEXT: li a6, 1
+; RV32IZHINXMIN-NEXT: lui a4, 524288
+; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB1_4
; RV32IZHINXMIN-NEXT: # %bb.3:
-; RV32IZHINXMIN-NEXT: mv a3, a1
+; RV32IZHINXMIN-NEXT: mv a4, a1
; RV32IZHINXMIN-NEXT: .LBB1_4:
; RV32IZHINXMIN-NEXT: and a0, a2, a0
; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -337,11 +345,11 @@ define i64 @test_floor_si64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: addi sp, sp, 16
-; RV32IZHINXMIN-NEXT: beqz a4, .LBB1_6
+; RV32IZHINXMIN-NEXT: beqz a3, .LBB1_6
; RV32IZHINXMIN-NEXT: # %bb.5:
-; RV32IZHINXMIN-NEXT: addi a3, a5, -1
+; RV32IZHINXMIN-NEXT: addi a4, a5, -1
; RV32IZHINXMIN-NEXT: .LBB1_6:
-; RV32IZHINXMIN-NEXT: and a1, a2, a3
+; RV32IZHINXMIN-NEXT: and a1, a2, a4
; RV32IZHINXMIN-NEXT: ret
;
; RV64IZHINXMIN-LABEL: test_floor_si64:
@@ -413,7 +421,7 @@ define signext i32 @test_floor_ui32(half %x) {
; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz
; RV64IZHINX-NEXT: feq.h a0, a0, a0
; RV64IZHINX-NEXT: seqz a0, a0
-; RV64IZHINX-NEXT: addi a0, a0, -1
+; RV64IZHINX-NEXT: addiw a0, a0, -1
; RV64IZHINX-NEXT: and a0, a1, a0
; RV64IZHINX-NEXT: ret
;
@@ -457,7 +465,7 @@ define signext i32 @test_floor_ui32(half %x) {
; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz
; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5
; RV64IZFHMIN-NEXT: seqz a1, a1
-; RV64IZFHMIN-NEXT: addi a1, a1, -1
+; RV64IZFHMIN-NEXT: addiw a1, a1, -1
; RV64IZFHMIN-NEXT: and a0, a0, a1
; RV64IZFHMIN-NEXT: ret
;
@@ -499,7 +507,7 @@ define signext i32 @test_floor_ui32(half %x) {
; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz
; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0
; RV64IZHINXMIN-NEXT: seqz a0, a0
-; RV64IZHINXMIN-NEXT: addi a0, a0, -1
+; RV64IZHINXMIN-NEXT: addiw a0, a0, -1
; RV64IZHINXMIN-NEXT: and a0, a1, a0
; RV64IZHINXMIN-NEXT: ret
%a = call half @llvm.floor.f16(half %x)
@@ -522,25 +530,24 @@ define i64 @test_floor_ui64(half %x) nounwind {
; RV32IZFH-NEXT: .LBB3_2:
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: fcvt.s.h fs0, fa0
-; RV32IZFH-NEXT: fmv.w.x fa5, zero
-; RV32IZFH-NEXT: fle.s a0, fa5, fs0
-; RV32IZFH-NEXT: neg s0, a0
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixunssfdi
-; RV32IZFH-NEXT: lui a2, %hi(.LCPI3_1)
-; RV32IZFH-NEXT: flw fa5, %lo(.LCPI3_1)(a2)
-; RV32IZFH-NEXT: and a0, s0, a0
-; RV32IZFH-NEXT: flt.s a2, fa5, fs0
-; RV32IZFH-NEXT: neg a2, a2
-; RV32IZFH-NEXT: or a0, a2, a0
-; RV32IZFH-NEXT: and a1, s0, a1
-; RV32IZFH-NEXT: or a1, a2, a1
+; RV32IZFH-NEXT: fmv.w.x fa5, zero
+; RV32IZFH-NEXT: fle.s a2, fa5, fs0
+; RV32IZFH-NEXT: lui a3, %hi(.LCPI3_1)
+; RV32IZFH-NEXT: flw fa5, %lo(.LCPI3_1)(a3)
+; RV32IZFH-NEXT: xori a2, a2, 1
+; RV32IZFH-NEXT: addi a2, a2, -1
+; RV32IZFH-NEXT: and a0, a2, a0
+; RV32IZFH-NEXT: flt.s a3, fa5, fs0
+; RV32IZFH-NEXT: neg a3, a3
+; RV32IZFH-NEXT: or a0, a3, a0
+; RV32IZFH-NEXT: and a1, a2, a1
+; RV32IZFH-NEXT: or a1, a3, a1
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: addi sp, sp, 16
; RV32IZFH-NEXT: ret
;
@@ -568,23 +575,22 @@ define i64 @test_floor_ui64(half %x) nounwind {
; RV32IZHINX-NEXT: addi sp, sp, -16
; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: fcvt.s.h s0, a0
-; RV32IZHINX-NEXT: fle.s a0, zero, s0
-; RV32IZHINX-NEXT: neg s1, a0
; RV32IZHINX-NEXT: mv a0, s0
; RV32IZHINX-NEXT: call __fixunssfdi
-; RV32IZHINX-NEXT: lui a2, %hi(.LCPI3_1)
-; RV32IZHINX-NEXT: lw a2, %lo(.LCPI3_1)(a2)
-; RV32IZHINX-NEXT: and a0, s1, a0
-; RV32IZHINX-NEXT: flt.s a2, a2, s0
-; RV32IZHINX-NEXT: neg a2, a2
-; RV32IZHINX-NEXT: or a0, a2, a0
-; RV32IZHINX-NEXT: and a1, s1, a1
-; RV32IZHINX-NEXT: or a1, a2, a1
+; RV32IZHINX-NEXT: fle.s a2, zero, s0
+; RV32IZHINX-NEXT: lui a3, %hi(.LCPI3_1)
+; RV32IZHINX-NEXT: lw a3, %lo(.LCPI3_1)(a3)
+; RV32IZHINX-NEXT: xori a2, a2, 1
+; RV32IZHINX-NEXT: addi a2, a2, -1
+; RV32IZHINX-NEXT: and a0, a2, a0
+; RV32IZHINX-NEXT: flt.s a3, a3, s0
+; RV32IZHINX-NEXT: neg a3, a3
+; RV32IZHINX-NEXT: or a0, a3, a0
+; RV32IZHINX-NEXT: and a1, a2, a1
+; RV32IZHINX-NEXT: or a1, a3, a1
; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: addi sp, sp, 16
; RV32IZHINX-NEXT: ret
;
@@ -622,26 +628,25 @@ define i64 @test_floor_ui64(half %x) nounwind {
; RV32IZFHMIN-NEXT: .LBB3_2:
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5
; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5
-; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero
-; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0
-; RV32IZFHMIN-NEXT: neg s0, a0
; RV32IZFHMIN-NEXT: fmv.s fa0, fs0
; RV32IZFHMIN-NEXT: call __fixunssfdi
-; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI3_0)
-; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a2)
-; RV32IZFHMIN-NEXT: and a0, s0, a0
-; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0
-; RV32IZFHMIN-NEXT: neg a2, a2
-; RV32IZFHMIN-NEXT: or a0, a2, a0
-; RV32IZFHMIN-NEXT: and a1, s0, a1
-; RV32IZFHMIN-NEXT: or a1, a2, a1
+; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero
+; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0
+; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI3_0)
+; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a3)
+; RV32IZFHMIN-NEXT: xori a2, a2, 1
+; RV32IZFHMIN-NEXT: addi a2, a2, -1
+; RV32IZFHMIN-NEXT: and a0, a2, a0
+; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
+; RV32IZFHMIN-NEXT: neg a3, a3
+; RV32IZFHMIN-NEXT: or a0, a3, a0
+; RV32IZFHMIN-NEXT: and a1, a2, a1
+; RV32IZFHMIN-NEXT: or a1, a3, a1
; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: addi sp, sp, 16
; RV32IZFHMIN-NEXT: ret
;
@@ -682,24 +687,23 @@ define i64 @test_floor_ui64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: addi sp, sp, -16
; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0
; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0
-; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0
-; RV32IZHINXMIN-NEXT: neg s1, a0
; RV32IZHINXMIN-NEXT: mv a0, s0
; RV32IZHINXMIN-NEXT: call __fixunssfdi
-; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI3_0)
-; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI3_0)(a2)
-; RV32IZHINXMIN-NEXT: and a0, s1, a0
-; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0
-; RV32IZHINXMIN-NEXT: neg a2, a2
-; RV32IZHINXMIN-NEXT: or a0, a2, a0
-; RV32IZHINXMIN-NEXT: and a1, s1, a1
-; RV32IZHINXMIN-NEXT: or a1, a2, a1
+; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0
+; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI3_0)
+; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI3_0)(a3)
+; RV32IZHINXMIN-NEXT: xori a2, a2, 1
+; RV32IZHINXMIN-NEXT: addi a2, a2, -1
+; RV32IZHINXMIN-NEXT: and a0, a2, a0
+; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0
+; RV32IZHINXMIN-NEXT: neg a3, a3
+; RV32IZHINXMIN-NEXT: or a0, a3, a0
+; RV32IZHINXMIN-NEXT: and a1, a2, a1
+; RV32IZHINXMIN-NEXT: or a1, a3, a1
; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: addi sp, sp, 16
; RV32IZHINXMIN-NEXT: ret
;
@@ -820,38 +824,41 @@ define i64 @test_ceil_si64(half %x) nounwind {
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: fcvt.s.h fs0, fa0
; RV32IZFH-NEXT: lui a0, 913408
; RV32IZFH-NEXT: fmv.w.x fa5, a0
; RV32IZFH-NEXT: fle.s s0, fa5, fs0
+; RV32IZFH-NEXT: neg s1, s0
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixsfdi
+; RV32IZFH-NEXT: lui a2, %hi(.LCPI5_1)
+; RV32IZFH-NEXT: flw fa5, %lo(.LCPI5_1)(a2)
+; RV32IZFH-NEXT: and a0, s1, a0
+; RV32IZFH-NEXT: flt.s a3, fa5, fs0
+; RV32IZFH-NEXT: neg a2, a3
+; RV32IZFH-NEXT: or a0, a2, a0
+; RV32IZFH-NEXT: feq.s a2, fs0, fs0
+; RV32IZFH-NEXT: neg a2, a2
+; RV32IZFH-NEXT: lui a5, 524288
+; RV32IZFH-NEXT: li a6, 1
; RV32IZFH-NEXT: lui a4, 524288
-; RV32IZFH-NEXT: lui a2, 524288
-; RV32IZFH-NEXT: beqz s0, .LBB5_4
+; RV32IZFH-NEXT: bne s0, a6, .LBB5_4
; RV32IZFH-NEXT: # %bb.3:
-; RV32IZFH-NEXT: mv a2, a1
+; RV32IZFH-NEXT: mv a4, a1
; RV32IZFH-NEXT: .LBB5_4:
-; RV32IZFH-NEXT: lui a1, %hi(.LCPI5_1)
-; RV32IZFH-NEXT: flw fa5, %lo(.LCPI5_1)(a1)
-; RV32IZFH-NEXT: flt.s a3, fa5, fs0
-; RV32IZFH-NEXT: beqz a3, .LBB5_6
-; RV32IZFH-NEXT: # %bb.5:
-; RV32IZFH-NEXT: addi a2, a4, -1
-; RV32IZFH-NEXT: .LBB5_6:
-; RV32IZFH-NEXT: feq.s a1, fs0, fs0
-; RV32IZFH-NEXT: neg a4, a1
-; RV32IZFH-NEXT: and a1, a4, a2
-; RV32IZFH-NEXT: neg a2, s0
; RV32IZFH-NEXT: and a0, a2, a0
-; RV32IZFH-NEXT: neg a2, a3
-; RV32IZFH-NEXT: or a0, a2, a0
-; RV32IZFH-NEXT: and a0, a4, a0
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: addi sp, sp, 16
+; RV32IZFH-NEXT: beqz a3, .LBB5_6
+; RV32IZFH-NEXT: # %bb.5:
+; RV32IZFH-NEXT: addi a4, a5, -1
+; RV32IZFH-NEXT: .LBB5_6:
+; RV32IZFH-NEXT: and a1, a2, a4
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: test_ceil_si64:
@@ -889,16 +896,17 @@ define i64 @test_ceil_si64(half %x) nounwind {
; RV32IZHINX-NEXT: lui a2, %hi(.LCPI5_1)
; RV32IZHINX-NEXT: lw a2, %lo(.LCPI5_1)(a2)
; RV32IZHINX-NEXT: and a0, s2, a0
-; RV32IZHINX-NEXT: flt.s a4, a2, s0
-; RV32IZHINX-NEXT: neg a2, a4
+; RV32IZHINX-NEXT: flt.s a3, a2, s0
+; RV32IZHINX-NEXT: neg a2, a3
; RV32IZHINX-NEXT: or a0, a2, a0
; RV32IZHINX-NEXT: feq.s a2, s0, s0
; RV32IZHINX-NEXT: neg a2, a2
; RV32IZHINX-NEXT: lui a5, 524288
-; RV32IZHINX-NEXT: lui a3, 524288
-; RV32IZHINX-NEXT: beqz s1, .LBB5_4
+; RV32IZHINX-NEXT: li a6, 1
+; RV32IZHINX-NEXT: lui a4, 524288
+; RV32IZHINX-NEXT: bne s1, a6, .LBB5_4
; RV32IZHINX-NEXT: # %bb.3:
-; RV32IZHINX-NEXT: mv a3, a1
+; RV32IZHINX-NEXT: mv a4, a1
; RV32IZHINX-NEXT: .LBB5_4:
; RV32IZHINX-NEXT: and a0, a2, a0
; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -906,11 +914,11 @@ define i64 @test_ceil_si64(half %x) nounwind {
; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: addi sp, sp, 16
-; RV32IZHINX-NEXT: beqz a4, .LBB5_6
+; RV32IZHINX-NEXT: beqz a3, .LBB5_6
; RV32IZHINX-NEXT: # %bb.5:
-; RV32IZHINX-NEXT: addi a3, a5, -1
+; RV32IZHINX-NEXT: addi a4, a5, -1
; RV32IZHINX-NEXT: .LBB5_6:
-; RV32IZHINX-NEXT: and a1, a2, a3
+; RV32IZHINX-NEXT: and a1, a2, a4
; RV32IZHINX-NEXT: ret
;
; RV64IZHINX-LABEL: test_ceil_si64:
@@ -948,39 +956,42 @@ define i64 @test_ceil_si64(half %x) nounwind {
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5
; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5
; RV32IZFHMIN-NEXT: lui a0, 913408
; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0
; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0
+; RV32IZFHMIN-NEXT: neg s1, s0
; RV32IZFHMIN-NEXT: fmv.s fa0, fs0
; RV32IZFHMIN-NEXT: call __fixsfdi
+; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI5_0)
+; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI5_0)(a2)
+; RV32IZFHMIN-NEXT: and a0, s1, a0
+; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
+; RV32IZFHMIN-NEXT: neg a2, a3
+; RV32IZFHMIN-NEXT: or a0, a2, a0
+; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0
+; RV32IZFHMIN-NEXT: neg a2, a2
+; RV32IZFHMIN-NEXT: lui a5, 524288
+; RV32IZFHMIN-NEXT: li a6, 1
; RV32IZFHMIN-NEXT: lui a4, 524288
-; RV32IZFHMIN-NEXT: lui a2, 524288
-; RV32IZFHMIN-NEXT: beqz s0, .LBB5_4
+; RV32IZFHMIN-NEXT: bne s0, a6, .LBB5_4
; RV32IZFHMIN-NEXT: # %bb.3:
-; RV32IZFHMIN-NEXT: mv a2, a1
+; RV32IZFHMIN-NEXT: mv a4, a1
; RV32IZFHMIN-NEXT: .LBB5_4:
-; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI5_0)
-; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI5_0)(a1)
-; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
-; RV32IZFHMIN-NEXT: beqz a3, .LBB5_6
-; RV32IZFHMIN-NEXT: # %bb.5:
-; RV32IZFHMIN-NEXT: addi a2, a4, -1
-; RV32IZFHMIN-NEXT: .LBB5_6:
-; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0
-; RV32IZFHMIN-NEXT: neg a4, a1
-; RV32IZFHMIN-NEXT: and a1, a4, a2
-; RV32IZFHMIN-NEXT: neg a2, s0
; RV32IZFHMIN-NEXT: and a0, a2, a0
-; RV32IZFHMIN-NEXT: neg a2, a3
-; RV32IZFHMIN-NEXT: or a0, a2, a0
-; RV32IZFHMIN-NEXT: and a0, a4, a0
; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: addi sp, sp, 16
+; RV32IZFHMIN-NEXT: beqz a3, .LBB5_6
+; RV32IZFHMIN-NEXT: # %bb.5:
+; RV32IZFHMIN-NEXT: addi a4, a5, -1
+; RV32IZFHMIN-NEXT: .LBB5_6:
+; RV32IZFHMIN-NEXT: and a1, a2, a4
; RV32IZFHMIN-NEXT: ret
;
; RV64IZFHMIN-LABEL: test_ceil_si64:
@@ -1032,16 +1043,17 @@ define i64 @test_ceil_si64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI5_0)
; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI5_0)(a2)
; RV32IZHINXMIN-NEXT: and a0, s2, a0
-; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0
-; RV32IZHINXMIN-NEXT: neg a2, a4
+; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0
+; RV32IZHINXMIN-NEXT: neg a2, a3
; RV32IZHINXMIN-NEXT: or a0, a2, a0
; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0
; RV32IZHINXMIN-NEXT: neg a2, a2
; RV32IZHINXMIN-NEXT: lui a5, 524288
-; RV32IZHINXMIN-NEXT: lui a3, 524288
-; RV32IZHINXMIN-NEXT: beqz s1, .LBB5_4
+; RV32IZHINXMIN-NEXT: li a6, 1
+; RV32IZHINXMIN-NEXT: lui a4, 524288
+; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB5_4
; RV32IZHINXMIN-NEXT: # %bb.3:
-; RV32IZHINXMIN-NEXT: mv a3, a1
+; RV32IZHINXMIN-NEXT: mv a4, a1
; RV32IZHINXMIN-NEXT: .LBB5_4:
; RV32IZHINXMIN-NEXT: and a0, a2, a0
; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -1049,11 +1061,11 @@ define i64 @test_ceil_si64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: addi sp, sp, 16
-; RV32IZHINXMIN-NEXT: beqz a4, .LBB5_6
+; RV32IZHINXMIN-NEXT: beqz a3, .LBB5_6
; RV32IZHINXMIN-NEXT: # %bb.5:
-; RV32IZHINXMIN-NEXT: addi a3, a5, -1
+; RV32IZHINXMIN-NEXT: addi a4, a5, -1
; RV32IZHINXMIN-NEXT: .LBB5_6:
-; RV32IZHINXMIN-NEXT: and a1, a2, a3
+; RV32IZHINXMIN-NEXT: and a1, a2, a4
; RV32IZHINXMIN-NEXT: ret
;
; RV64IZHINXMIN-LABEL: test_ceil_si64:
@@ -1125,7 +1137,7 @@ define signext i32 @test_ceil_ui32(half %x) {
; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz
; RV64IZHINX-NEXT: feq.h a0, a0, a0
; RV64IZHINX-NEXT: seqz a0, a0
-; RV64IZHINX-NEXT: addi a0, a0, -1
+; RV64IZHINX-NEXT: addiw a0, a0, -1
; RV64IZHINX-NEXT: and a0, a1, a0
; RV64IZHINX-NEXT: ret
;
@@ -1169,7 +1181,7 @@ define signext i32 @test_ceil_ui32(half %x) {
; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz
; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5
; RV64IZFHMIN-NEXT: seqz a1, a1
-; RV64IZFHMIN-NEXT: addi a1, a1, -1
+; RV64IZFHMIN-NEXT: addiw a1, a1, -1
; RV64IZFHMIN-NEXT: and a0, a0, a1
; RV64IZFHMIN-NEXT: ret
;
@@ -1211,7 +1223,7 @@ define signext i32 @test_ceil_ui32(half %x) {
; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz
; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0
; RV64IZHINXMIN-NEXT: seqz a0, a0
-; RV64IZHINXMIN-NEXT: addi a0, a0, -1
+; RV64IZHINXMIN-NEXT: addiw a0, a0, -1
; RV64IZHINXMIN-NEXT: and a0, a1, a0
; RV64IZHINXMIN-NEXT: ret
%a = call half @llvm.ceil.f16(half %x)
@@ -1234,25 +1246,24 @@ define i64 @test_ceil_ui64(half %x) nounwind {
; RV32IZFH-NEXT: .LBB7_2:
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: fcvt.s.h fs0, fa0
-; RV32IZFH-NEXT: fmv.w.x fa5, zero
-; RV32IZFH-NEXT: fle.s a0, fa5, fs0
-; RV32IZFH-NEXT: neg s0, a0
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixunssfdi
-; RV32IZFH-NEXT: lui a2, %hi(.LCPI7_1)
-; RV32IZFH-NEXT: flw fa5, %lo(.LCPI7_1)(a2)
-; RV32IZFH-NEXT: and a0, s0, a0
-; RV32IZFH-NEXT: flt.s a2, fa5, fs0
-; RV32IZFH-NEXT: neg a2, a2
-; RV32IZFH-NEXT: or a0, a2, a0
-; RV32IZFH-NEXT: and a1, s0, a1
-; RV32IZFH-NEXT: or a1, a2, a1
+; RV32IZFH-NEXT: fmv.w.x fa5, zero
+; RV32IZFH-NEXT: fle.s a2, fa5, fs0
+; RV32IZFH-NEXT: lui a3, %hi(.LCPI7_1)
+; RV32IZFH-NEXT: flw fa5, %lo(.LCPI7_1)(a3)
+; RV32IZFH-NEXT: xori a2, a2, 1
+; RV32IZFH-NEXT: addi a2, a2, -1
+; RV32IZFH-NEXT: and a0, a2, a0
+; RV32IZFH-NEXT: flt.s a3, fa5, fs0
+; RV32IZFH-NEXT: neg a3, a3
+; RV32IZFH-NEXT: or a0, a3, a0
+; RV32IZFH-NEXT: and a1, a2, a1
+; RV32IZFH-NEXT: or a1, a3, a1
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: addi sp, sp, 16
; RV32IZFH-NEXT: ret
;
@@ -1280,23 +1291,22 @@ define i64 @test_ceil_ui64(half %x) nounwind {
; RV32IZHINX-NEXT: addi sp, sp, -16
; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: fcvt.s.h s0, a0
-; RV32IZHINX-NEXT: fle.s a0, zero, s0
-; RV32IZHINX-NEXT: neg s1, a0
; RV32IZHINX-NEXT: mv a0, s0
; RV32IZHINX-NEXT: call __fixunssfdi
-; RV32IZHINX-NEXT: lui a2, %hi(.LCPI7_1)
-; RV32IZHINX-NEXT: lw a2, %lo(.LCPI7_1)(a2)
-; RV32IZHINX-NEXT: and a0, s1, a0
-; RV32IZHINX-NEXT: flt.s a2, a2, s0
-; RV32IZHINX-NEXT: neg a2, a2
-; RV32IZHINX-NEXT: or a0, a2, a0
-; RV32IZHINX-NEXT: and a1, s1, a1
-; RV32IZHINX-NEXT: or a1, a2, a1
+; RV32IZHINX-NEXT: fle.s a2, zero, s0
+; RV32IZHINX-NEXT: lui a3, %hi(.LCPI7_1)
+; RV32IZHINX-NEXT: lw a3, %lo(.LCPI7_1)(a3)
+; RV32IZHINX-NEXT: xori a2, a2, 1
+; RV32IZHINX-NEXT: addi a2, a2, -1
+; RV32IZHINX-NEXT: and a0, a2, a0
+; RV32IZHINX-NEXT: flt.s a3, a3, s0
+; RV32IZHINX-NEXT: neg a3, a3
+; RV32IZHINX-NEXT: or a0, a3, a0
+; RV32IZHINX-NEXT: and a1, a2, a1
+; RV32IZHINX-NEXT: or a1, a3, a1
; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: addi sp, sp, 16
; RV32IZHINX-NEXT: ret
;
@@ -1334,26 +1344,25 @@ define i64 @test_ceil_ui64(half %x) nounwind {
; RV32IZFHMIN-NEXT: .LBB7_2:
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5
; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5
-; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero
-; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0
-; RV32IZFHMIN-NEXT: neg s0, a0
; RV32IZFHMIN-NEXT: fmv.s fa0, fs0
; RV32IZFHMIN-NEXT: call __fixunssfdi
-; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI7_0)
-; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI7_0)(a2)
-; RV32IZFHMIN-NEXT: and a0, s0, a0
-; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0
-; RV32IZFHMIN-NEXT: neg a2, a2
-; RV32IZFHMIN-NEXT: or a0, a2, a0
-; RV32IZFHMIN-NEXT: and a1, s0, a1
-; RV32IZFHMIN-NEXT: or a1, a2, a1
+; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero
+; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0
+; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI7_0)
+; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI7_0)(a3)
+; RV32IZFHMIN-NEXT: xori a2, a2, 1
+; RV32IZFHMIN-NEXT: addi a2, a2, -1
+; RV32IZFHMIN-NEXT: and a0, a2, a0
+; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
+; RV32IZFHMIN-NEXT: neg a3, a3
+; RV32IZFHMIN-NEXT: or a0, a3, a0
+; RV32IZFHMIN-NEXT: and a1, a2, a1
+; RV32IZFHMIN-NEXT: or a1, a3, a1
; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: addi sp, sp, 16
; RV32IZFHMIN-NEXT: ret
;
@@ -1394,24 +1403,23 @@ define i64 @test_ceil_ui64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: addi sp, sp, -16
; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0
; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0
-; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0
-; RV32IZHINXMIN-NEXT: neg s1, a0
; RV32IZHINXMIN-NEXT: mv a0, s0
; RV32IZHINXMIN-NEXT: call __fixunssfdi
-; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI7_0)
-; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI7_0)(a2)
-; RV32IZHINXMIN-NEXT: and a0, s1, a0
-; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0
-; RV32IZHINXMIN-NEXT: neg a2, a2
-; RV32IZHINXMIN-NEXT: or a0, a2, a0
-; RV32IZHINXMIN-NEXT: and a1, s1, a1
-; RV32IZHINXMIN-NEXT: or a1, a2, a1
+; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0
+; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI7_0)
+; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI7_0)(a3)
+; RV32IZHINXMIN-NEXT: xori a2, a2, 1
+; RV32IZHINXMIN-NEXT: addi a2, a2, -1
+; RV32IZHINXMIN-NEXT: and a0, a2, a0
+; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0
+; RV32IZHINXMIN-NEXT: neg a3, a3
+; RV32IZHINXMIN-NEXT: or a0, a3, a0
+; RV32IZHINXMIN-NEXT: and a1, a2, a1
+; RV32IZHINXMIN-NEXT: or a1, a3, a1
; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: addi sp, sp, 16
; RV32IZHINXMIN-NEXT: ret
;
@@ -1532,38 +1540,41 @@ define i64 @test_trunc_si64(half %x) nounwind {
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: fcvt.s.h fs0, fa0
; RV32IZFH-NEXT: lui a0, 913408
; RV32IZFH-NEXT: fmv.w.x fa5, a0
; RV32IZFH-NEXT: fle.s s0, fa5, fs0
+; RV32IZFH-NEXT: neg s1, s0
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixsfdi
+; RV32IZFH-NEXT: lui a2, %hi(.LCPI9_1)
+; RV32IZFH-NEXT: flw fa5, %lo(.LCPI9_1)(a2)
+; RV32IZFH-NEXT: and a0, s1, a0
+; RV32IZFH-NEXT: flt.s a3, fa5, fs0
+; RV32IZFH-NEXT: neg a2, a3
+; RV32IZFH-NEXT: or a0, a2, a0
+; RV32IZFH-NEXT: feq.s a2, fs0, fs0
+; RV32IZFH-NEXT: neg a2, a2
+; RV32IZFH-NEXT: lui a5, 524288
+; RV32IZFH-NEXT: li a6, 1
; RV32IZFH-NEXT: lui a4, 524288
-; RV32IZFH-NEXT: lui a2, 524288
-; RV32IZFH-NEXT: beqz s0, .LBB9_4
+; RV32IZFH-NEXT: bne s0, a6, .LBB9_4
; RV32IZFH-NEXT: # %bb.3:
-; RV32IZFH-NEXT: mv a2, a1
+; RV32IZFH-NEXT: mv a4, a1
; RV32IZFH-NEXT: .LBB9_4:
-; RV32IZFH-NEXT: lui a1, %hi(.LCPI9_1)
-; RV32IZFH-NEXT: flw fa5, %lo(.LCPI9_1)(a1)
-; RV32IZFH-NEXT: flt.s a3, fa5, fs0
-; RV32IZFH-NEXT: beqz a3, .LBB9_6
-; RV32IZFH-NEXT: # %bb.5:
-; RV32IZFH-NEXT: addi a2, a4, -1
-; RV32IZFH-NEXT: .LBB9_6:
-; RV32IZFH-NEXT: feq.s a1, fs0, fs0
-; RV32IZFH-NEXT: neg a4, a1
-; RV32IZFH-NEXT: and a1, a4, a2
-; RV32IZFH-NEXT: neg a2, s0
; RV32IZFH-NEXT: and a0, a2, a0
-; RV32IZFH-NEXT: neg a2, a3
-; RV32IZFH-NEXT: or a0, a2, a0
-; RV32IZFH-NEXT: and a0, a4, a0
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: addi sp, sp, 16
+; RV32IZFH-NEXT: beqz a3, .LBB9_6
+; RV32IZFH-NEXT: # %bb.5:
+; RV32IZFH-NEXT: addi a4, a5, -1
+; RV32IZFH-NEXT: .LBB9_6:
+; RV32IZFH-NEXT: and a1, a2, a4
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: test_trunc_si64:
@@ -1601,16 +1612,17 @@ define i64 @test_trunc_si64(half %x) nounwind {
; RV32IZHINX-NEXT: lui a2, %hi(.LCPI9_1)
; RV32IZHINX-NEXT: lw a2, %lo(.LCPI9_1)(a2)
; RV32IZHINX-NEXT: and a0, s2, a0
-; RV32IZHINX-NEXT: flt.s a4, a2, s0
-; RV32IZHINX-NEXT: neg a2, a4
+; RV32IZHINX-NEXT: flt.s a3, a2, s0
+; RV32IZHINX-NEXT: neg a2, a3
; RV32IZHINX-NEXT: or a0, a2, a0
; RV32IZHINX-NEXT: feq.s a2, s0, s0
; RV32IZHINX-NEXT: neg a2, a2
; RV32IZHINX-NEXT: lui a5, 524288
-; RV32IZHINX-NEXT: lui a3, 524288
-; RV32IZHINX-NEXT: beqz s1, .LBB9_4
+; RV32IZHINX-NEXT: li a6, 1
+; RV32IZHINX-NEXT: lui a4, 524288
+; RV32IZHINX-NEXT: bne s1, a6, .LBB9_4
; RV32IZHINX-NEXT: # %bb.3:
-; RV32IZHINX-NEXT: mv a3, a1
+; RV32IZHINX-NEXT: mv a4, a1
; RV32IZHINX-NEXT: .LBB9_4:
; RV32IZHINX-NEXT: and a0, a2, a0
; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -1618,11 +1630,11 @@ define i64 @test_trunc_si64(half %x) nounwind {
; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: addi sp, sp, 16
-; RV32IZHINX-NEXT: beqz a4, .LBB9_6
+; RV32IZHINX-NEXT: beqz a3, .LBB9_6
; RV32IZHINX-NEXT: # %bb.5:
-; RV32IZHINX-NEXT: addi a3, a5, -1
+; RV32IZHINX-NEXT: addi a4, a5, -1
; RV32IZHINX-NEXT: .LBB9_6:
-; RV32IZHINX-NEXT: and a1, a2, a3
+; RV32IZHINX-NEXT: and a1, a2, a4
; RV32IZHINX-NEXT: ret
;
; RV64IZHINX-LABEL: test_trunc_si64:
@@ -1660,39 +1672,42 @@ define i64 @test_trunc_si64(half %x) nounwind {
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5
; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5
; RV32IZFHMIN-NEXT: lui a0, 913408
; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0
; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0
+; RV32IZFHMIN-NEXT: neg s1, s0
; RV32IZFHMIN-NEXT: fmv.s fa0, fs0
; RV32IZFHMIN-NEXT: call __fixsfdi
+; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI9_0)
+; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI9_0)(a2)
+; RV32IZFHMIN-NEXT: and a0, s1, a0
+; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
+; RV32IZFHMIN-NEXT: neg a2, a3
+; RV32IZFHMIN-NEXT: or a0, a2, a0
+; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0
+; RV32IZFHMIN-NEXT: neg a2, a2
+; RV32IZFHMIN-NEXT: lui a5, 524288
+; RV32IZFHMIN-NEXT: li a6, 1
; RV32IZFHMIN-NEXT: lui a4, 524288
-; RV32IZFHMIN-NEXT: lui a2, 524288
-; RV32IZFHMIN-NEXT: beqz s0, .LBB9_4
+; RV32IZFHMIN-NEXT: bne s0, a6, .LBB9_4
; RV32IZFHMIN-NEXT: # %bb.3:
-; RV32IZFHMIN-NEXT: mv a2, a1
+; RV32IZFHMIN-NEXT: mv a4, a1
; RV32IZFHMIN-NEXT: .LBB9_4:
-; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI9_0)
-; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI9_0)(a1)
-; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
-; RV32IZFHMIN-NEXT: beqz a3, .LBB9_6
-; RV32IZFHMIN-NEXT: # %bb.5:
-; RV32IZFHMIN-NEXT: addi a2, a4, -1
-; RV32IZFHMIN-NEXT: .LBB9_6:
-; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0
-; RV32IZFHMIN-NEXT: neg a4, a1
-; RV32IZFHMIN-NEXT: and a1, a4, a2
-; RV32IZFHMIN-NEXT: neg a2, s0
; RV32IZFHMIN-NEXT: and a0, a2, a0
-; RV32IZFHMIN-NEXT: neg a2, a3
-; RV32IZFHMIN-NEXT: or a0, a2, a0
-; RV32IZFHMIN-NEXT: and a0, a4, a0
; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: addi sp, sp, 16
+; RV32IZFHMIN-NEXT: beqz a3, .LBB9_6
+; RV32IZFHMIN-NEXT: # %bb.5:
+; RV32IZFHMIN-NEXT: addi a4, a5, -1
+; RV32IZFHMIN-NEXT: .LBB9_6:
+; RV32IZFHMIN-NEXT: and a1, a2, a4
; RV32IZFHMIN-NEXT: ret
;
; RV64IZFHMIN-LABEL: test_trunc_si64:
@@ -1744,16 +1759,17 @@ define i64 @test_trunc_si64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI9_0)
; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI9_0)(a2)
; RV32IZHINXMIN-NEXT: and a0, s2, a0
-; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0
-; RV32IZHINXMIN-NEXT: neg a2, a4
+; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0
+; RV32IZHINXMIN-NEXT: neg a2, a3
; RV32IZHINXMIN-NEXT: or a0, a2, a0
; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0
; RV32IZHINXMIN-NEXT: neg a2, a2
; RV32IZHINXMIN-NEXT: lui a5, 524288
-; RV32IZHINXMIN-NEXT: lui a3, 524288
-; RV32IZHINXMIN-NEXT: beqz s1, .LBB9_4
+; RV32IZHINXMIN-NEXT: li a6, 1
+; RV32IZHINXMIN-NEXT: lui a4, 524288
+; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB9_4
; RV32IZHINXMIN-NEXT: # %bb.3:
-; RV32IZHINXMIN-NEXT: mv a3, a1
+; RV32IZHINXMIN-NEXT: mv a4, a1
; RV32IZHINXMIN-NEXT: .LBB9_4:
; RV32IZHINXMIN-NEXT: and a0, a2, a0
; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -1761,11 +1777,11 @@ define i64 @test_trunc_si64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: addi sp, sp, 16
-; RV32IZHINXMIN-NEXT: beqz a4, .LBB9_6
+; RV32IZHINXMIN-NEXT: beqz a3, .LBB9_6
; RV32IZHINXMIN-NEXT: # %bb.5:
-; RV32IZHINXMIN-NEXT: addi a3, a5, -1
+; RV32IZHINXMIN-NEXT: addi a4, a5, -1
; RV32IZHINXMIN-NEXT: .LBB9_6:
-; RV32IZHINXMIN-NEXT: and a1, a2, a3
+; RV32IZHINXMIN-NEXT: and a1, a2, a4
; RV32IZHINXMIN-NEXT: ret
;
; RV64IZHINXMIN-LABEL: test_trunc_si64:
@@ -1837,7 +1853,7 @@ define signext i32 @test_trunc_ui32(half %x) {
; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz
; RV64IZHINX-NEXT: feq.h a0, a0, a0
; RV64IZHINX-NEXT: seqz a0, a0
-; RV64IZHINX-NEXT: addi a0, a0, -1
+; RV64IZHINX-NEXT: addiw a0, a0, -1
; RV64IZHINX-NEXT: and a0, a1, a0
; RV64IZHINX-NEXT: ret
;
@@ -1881,7 +1897,7 @@ define signext i32 @test_trunc_ui32(half %x) {
; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz
; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5
; RV64IZFHMIN-NEXT: seqz a1, a1
-; RV64IZFHMIN-NEXT: addi a1, a1, -1
+; RV64IZFHMIN-NEXT: addiw a1, a1, -1
; RV64IZFHMIN-NEXT: and a0, a0, a1
; RV64IZFHMIN-NEXT: ret
;
@@ -1923,7 +1939,7 @@ define signext i32 @test_trunc_ui32(half %x) {
; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz
; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0
; RV64IZHINXMIN-NEXT: seqz a0, a0
-; RV64IZHINXMIN-NEXT: addi a0, a0, -1
+; RV64IZHINXMIN-NEXT: addiw a0, a0, -1
; RV64IZHINXMIN-NEXT: and a0, a1, a0
; RV64IZHINXMIN-NEXT: ret
%a = call half @llvm.trunc.f16(half %x)
@@ -1946,25 +1962,24 @@ define i64 @test_trunc_ui64(half %x) nounwind {
; RV32IZFH-NEXT: .LBB11_2:
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: fcvt.s.h fs0, fa0
-; RV32IZFH-NEXT: fmv.w.x fa5, zero
-; RV32IZFH-NEXT: fle.s a0, fa5, fs0
-; RV32IZFH-NEXT: neg s0, a0
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixunssfdi
-; RV32IZFH-NEXT: lui a2, %hi(.LCPI11_1)
-; RV32IZFH-NEXT: flw fa5, %lo(.LCPI11_1)(a2)
-; RV32IZFH-NEXT: and a0, s0, a0
-; RV32IZFH-NEXT: flt.s a2, fa5, fs0
-; RV32IZFH-NEXT: neg a2, a2
-; RV32IZFH-NEXT: or a0, a2, a0
-; RV32IZFH-NEXT: and a1, s0, a1
-; RV32IZFH-NEXT: or a1, a2, a1
+; RV32IZFH-NEXT: fmv.w.x fa5, zero
+; RV32IZFH-NEXT: fle.s a2, fa5, fs0
+; RV32IZFH-NEXT: lui a3, %hi(.LCPI11_1)
+; RV32IZFH-NEXT: flw fa5, %lo(.LCPI11_1)(a3)
+; RV32IZFH-NEXT: xori a2, a2, 1
+; RV32IZFH-NEXT: addi a2, a2, -1
+; RV32IZFH-NEXT: and a0, a2, a0
+; RV32IZFH-NEXT: flt.s a3, fa5, fs0
+; RV32IZFH-NEXT: neg a3, a3
+; RV32IZFH-NEXT: or a0, a3, a0
+; RV32IZFH-NEXT: and a1, a2, a1
+; RV32IZFH-NEXT: or a1, a3, a1
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: addi sp, sp, 16
; RV32IZFH-NEXT: ret
;
@@ -1992,23 +2007,22 @@ define i64 @test_trunc_ui64(half %x) nounwind {
; RV32IZHINX-NEXT: addi sp, sp, -16
; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: fcvt.s.h s0, a0
-; RV32IZHINX-NEXT: fle.s a0, zero, s0
-; RV32IZHINX-NEXT: neg s1, a0
; RV32IZHINX-NEXT: mv a0, s0
; RV32IZHINX-NEXT: call __fixunssfdi
-; RV32IZHINX-NEXT: lui a2, %hi(.LCPI11_1)
-; RV32IZHINX-NEXT: lw a2, %lo(.LCPI11_1)(a2)
-; RV32IZHINX-NEXT: and a0, s1, a0
-; RV32IZHINX-NEXT: flt.s a2, a2, s0
-; RV32IZHINX-NEXT: neg a2, a2
-; RV32IZHINX-NEXT: or a0, a2, a0
-; RV32IZHINX-NEXT: and a1, s1, a1
-; RV32IZHINX-NEXT: or a1, a2, a1
+; RV32IZHINX-NEXT: fle.s a2, zero, s0
+; RV32IZHINX-NEXT: lui a3, %hi(.LCPI11_1)
+; RV32IZHINX-NEXT: lw a3, %lo(.LCPI11_1)(a3)
+; RV32IZHINX-NEXT: xori a2, a2, 1
+; RV32IZHINX-NEXT: addi a2, a2, -1
+; RV32IZHINX-NEXT: and a0, a2, a0
+; RV32IZHINX-NEXT: flt.s a3, a3, s0
+; RV32IZHINX-NEXT: neg a3, a3
+; RV32IZHINX-NEXT: or a0, a3, a0
+; RV32IZHINX-NEXT: and a1, a2, a1
+; RV32IZHINX-NEXT: or a1, a3, a1
; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: addi sp, sp, 16
; RV32IZHINX-NEXT: ret
;
@@ -2046,26 +2060,25 @@ define i64 @test_trunc_ui64(half %x) nounwind {
; RV32IZFHMIN-NEXT: .LBB11_2:
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5
; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5
-; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero
-; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0
-; RV32IZFHMIN-NEXT: neg s0, a0
; RV32IZFHMIN-NEXT: fmv.s fa0, fs0
; RV32IZFHMIN-NEXT: call __fixunssfdi
-; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI11_0)
-; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI11_0)(a2)
-; RV32IZFHMIN-NEXT: and a0, s0, a0
-; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0
-; RV32IZFHMIN-NEXT: neg a2, a2
-; RV32IZFHMIN-NEXT: or a0, a2, a0
-; RV32IZFHMIN-NEXT: and a1, s0, a1
-; RV32IZFHMIN-NEXT: or a1, a2, a1
+; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero
+; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0
+; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI11_0)
+; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI11_0)(a3)
+; RV32IZFHMIN-NEXT: xori a2, a2, 1
+; RV32IZFHMIN-NEXT: addi a2, a2, -1
+; RV32IZFHMIN-NEXT: and a0, a2, a0
+; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
+; RV32IZFHMIN-NEXT: neg a3, a3
+; RV32IZFHMIN-NEXT: or a0, a3, a0
+; RV32IZFHMIN-NEXT: and a1, a2, a1
+; RV32IZFHMIN-NEXT: or a1, a3, a1
; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: addi sp, sp, 16
; RV32IZFHMIN-NEXT: ret
;
@@ -2106,24 +2119,23 @@ define i64 @test_trunc_ui64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: addi sp, sp, -16
; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0
; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0
-; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0
-; RV32IZHINXMIN-NEXT: neg s1, a0
; RV32IZHINXMIN-NEXT: mv a0, s0
; RV32IZHINXMIN-NEXT: call __fixunssfdi
-; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI11_0)
-; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI11_0)(a2)
-; RV32IZHINXMIN-NEXT: and a0, s1, a0
-; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0
-; RV32IZHINXMIN-NEXT: neg a2, a2
-; RV32IZHINXMIN-NEXT: or a0, a2, a0
-; RV32IZHINXMIN-NEXT: and a1, s1, a1
-; RV32IZHINXMIN-NEXT: or a1, a2, a1
+; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0
+; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI11_0)
+; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI11_0)(a3)
+; RV32IZHINXMIN-NEXT: xori a2, a2, 1
+; RV32IZHINXMIN-NEXT: addi a2, a2, -1
+; RV32IZHINXMIN-NEXT: and a0, a2, a0
+; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0
+; RV32IZHINXMIN-NEXT: neg a3, a3
+; RV32IZHINXMIN-NEXT: or a0, a3, a0
+; RV32IZHINXMIN-NEXT: and a1, a2, a1
+; RV32IZHINXMIN-NEXT: or a1, a3, a1
; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: addi sp, sp, 16
; RV32IZHINXMIN-NEXT: ret
;
@@ -2244,38 +2256,41 @@ define i64 @test_round_si64(half %x) nounwind {
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: fcvt.s.h fs0, fa0
; RV32IZFH-NEXT: lui a0, 913408
; RV32IZFH-NEXT: fmv.w.x fa5, a0
; RV32IZFH-NEXT: fle.s s0, fa5, fs0
+; RV32IZFH-NEXT: neg s1, s0
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixsfdi
+; RV32IZFH-NEXT: lui a2, %hi(.LCPI13_1)
+; RV32IZFH-NEXT: flw fa5, %lo(.LCPI13_1)(a2)
+; RV32IZFH-NEXT: and a0, s1, a0
+; RV32IZFH-NEXT: flt.s a3, fa5, fs0
+; RV32IZFH-NEXT: neg a2, a3
+; RV32IZFH-NEXT: or a0, a2, a0
+; RV32IZFH-NEXT: feq.s a2, fs0, fs0
+; RV32IZFH-NEXT: neg a2, a2
+; RV32IZFH-NEXT: lui a5, 524288
+; RV32IZFH-NEXT: li a6, 1
; RV32IZFH-NEXT: lui a4, 524288
-; RV32IZFH-NEXT: lui a2, 524288
-; RV32IZFH-NEXT: beqz s0, .LBB13_4
+; RV32IZFH-NEXT: bne s0, a6, .LBB13_4
; RV32IZFH-NEXT: # %bb.3:
-; RV32IZFH-NEXT: mv a2, a1
+; RV32IZFH-NEXT: mv a4, a1
; RV32IZFH-NEXT: .LBB13_4:
-; RV32IZFH-NEXT: lui a1, %hi(.LCPI13_1)
-; RV32IZFH-NEXT: flw fa5, %lo(.LCPI13_1)(a1)
-; RV32IZFH-NEXT: flt.s a3, fa5, fs0
-; RV32IZFH-NEXT: beqz a3, .LBB13_6
-; RV32IZFH-NEXT: # %bb.5:
-; RV32IZFH-NEXT: addi a2, a4, -1
-; RV32IZFH-NEXT: .LBB13_6:
-; RV32IZFH-NEXT: feq.s a1, fs0, fs0
-; RV32IZFH-NEXT: neg a4, a1
-; RV32IZFH-NEXT: and a1, a4, a2
-; RV32IZFH-NEXT: neg a2, s0
; RV32IZFH-NEXT: and a0, a2, a0
-; RV32IZFH-NEXT: neg a2, a3
-; RV32IZFH-NEXT: or a0, a2, a0
-; RV32IZFH-NEXT: and a0, a4, a0
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: addi sp, sp, 16
+; RV32IZFH-NEXT: beqz a3, .LBB13_6
+; RV32IZFH-NEXT: # %bb.5:
+; RV32IZFH-NEXT: addi a4, a5, -1
+; RV32IZFH-NEXT: .LBB13_6:
+; RV32IZFH-NEXT: and a1, a2, a4
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: test_round_si64:
@@ -2313,16 +2328,17 @@ define i64 @test_round_si64(half %x) nounwind {
; RV32IZHINX-NEXT: lui a2, %hi(.LCPI13_1)
; RV32IZHINX-NEXT: lw a2, %lo(.LCPI13_1)(a2)
; RV32IZHINX-NEXT: and a0, s2, a0
-; RV32IZHINX-NEXT: flt.s a4, a2, s0
-; RV32IZHINX-NEXT: neg a2, a4
+; RV32IZHINX-NEXT: flt.s a3, a2, s0
+; RV32IZHINX-NEXT: neg a2, a3
; RV32IZHINX-NEXT: or a0, a2, a0
; RV32IZHINX-NEXT: feq.s a2, s0, s0
; RV32IZHINX-NEXT: neg a2, a2
; RV32IZHINX-NEXT: lui a5, 524288
-; RV32IZHINX-NEXT: lui a3, 524288
-; RV32IZHINX-NEXT: beqz s1, .LBB13_4
+; RV32IZHINX-NEXT: li a6, 1
+; RV32IZHINX-NEXT: lui a4, 524288
+; RV32IZHINX-NEXT: bne s1, a6, .LBB13_4
; RV32IZHINX-NEXT: # %bb.3:
-; RV32IZHINX-NEXT: mv a3, a1
+; RV32IZHINX-NEXT: mv a4, a1
; RV32IZHINX-NEXT: .LBB13_4:
; RV32IZHINX-NEXT: and a0, a2, a0
; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -2330,11 +2346,11 @@ define i64 @test_round_si64(half %x) nounwind {
; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: addi sp, sp, 16
-; RV32IZHINX-NEXT: beqz a4, .LBB13_6
+; RV32IZHINX-NEXT: beqz a3, .LBB13_6
; RV32IZHINX-NEXT: # %bb.5:
-; RV32IZHINX-NEXT: addi a3, a5, -1
+; RV32IZHINX-NEXT: addi a4, a5, -1
; RV32IZHINX-NEXT: .LBB13_6:
-; RV32IZHINX-NEXT: and a1, a2, a3
+; RV32IZHINX-NEXT: and a1, a2, a4
; RV32IZHINX-NEXT: ret
;
; RV64IZHINX-LABEL: test_round_si64:
@@ -2372,39 +2388,42 @@ define i64 @test_round_si64(half %x) nounwind {
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5
; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5
; RV32IZFHMIN-NEXT: lui a0, 913408
; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0
; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0
+; RV32IZFHMIN-NEXT: neg s1, s0
; RV32IZFHMIN-NEXT: fmv.s fa0, fs0
; RV32IZFHMIN-NEXT: call __fixsfdi
+; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI13_0)
+; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI13_0)(a2)
+; RV32IZFHMIN-NEXT: and a0, s1, a0
+; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
+; RV32IZFHMIN-NEXT: neg a2, a3
+; RV32IZFHMIN-NEXT: or a0, a2, a0
+; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0
+; RV32IZFHMIN-NEXT: neg a2, a2
+; RV32IZFHMIN-NEXT: lui a5, 524288
+; RV32IZFHMIN-NEXT: li a6, 1
; RV32IZFHMIN-NEXT: lui a4, 524288
-; RV32IZFHMIN-NEXT: lui a2, 524288
-; RV32IZFHMIN-NEXT: beqz s0, .LBB13_4
+; RV32IZFHMIN-NEXT: bne s0, a6, .LBB13_4
; RV32IZFHMIN-NEXT: # %bb.3:
-; RV32IZFHMIN-NEXT: mv a2, a1
+; RV32IZFHMIN-NEXT: mv a4, a1
; RV32IZFHMIN-NEXT: .LBB13_4:
-; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI13_0)
-; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI13_0)(a1)
-; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
-; RV32IZFHMIN-NEXT: beqz a3, .LBB13_6
-; RV32IZFHMIN-NEXT: # %bb.5:
-; RV32IZFHMIN-NEXT: addi a2, a4, -1
-; RV32IZFHMIN-NEXT: .LBB13_6:
-; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0
-; RV32IZFHMIN-NEXT: neg a4, a1
-; RV32IZFHMIN-NEXT: and a1, a4, a2
-; RV32IZFHMIN-NEXT: neg a2, s0
; RV32IZFHMIN-NEXT: and a0, a2, a0
-; RV32IZFHMIN-NEXT: neg a2, a3
-; RV32IZFHMIN-NEXT: or a0, a2, a0
-; RV32IZFHMIN-NEXT: and a0, a4, a0
; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: addi sp, sp, 16
+; RV32IZFHMIN-NEXT: beqz a3, .LBB13_6
+; RV32IZFHMIN-NEXT: # %bb.5:
+; RV32IZFHMIN-NEXT: addi a4, a5, -1
+; RV32IZFHMIN-NEXT: .LBB13_6:
+; RV32IZFHMIN-NEXT: and a1, a2, a4
; RV32IZFHMIN-NEXT: ret
;
; RV64IZFHMIN-LABEL: test_round_si64:
@@ -2456,16 +2475,17 @@ define i64 @test_round_si64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI13_0)
; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI13_0)(a2)
; RV32IZHINXMIN-NEXT: and a0, s2, a0
-; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0
-; RV32IZHINXMIN-NEXT: neg a2, a4
+; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0
+; RV32IZHINXMIN-NEXT: neg a2, a3
; RV32IZHINXMIN-NEXT: or a0, a2, a0
; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0
; RV32IZHINXMIN-NEXT: neg a2, a2
; RV32IZHINXMIN-NEXT: lui a5, 524288
-; RV32IZHINXMIN-NEXT: lui a3, 524288
-; RV32IZHINXMIN-NEXT: beqz s1, .LBB13_4
+; RV32IZHINXMIN-NEXT: li a6, 1
+; RV32IZHINXMIN-NEXT: lui a4, 524288
+; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB13_4
; RV32IZHINXMIN-NEXT: # %bb.3:
-; RV32IZHINXMIN-NEXT: mv a3, a1
+; RV32IZHINXMIN-NEXT: mv a4, a1
; RV32IZHINXMIN-NEXT: .LBB13_4:
; RV32IZHINXMIN-NEXT: and a0, a2, a0
; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -2473,11 +2493,11 @@ define i64 @test_round_si64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: addi sp, sp, 16
-; RV32IZHINXMIN-NEXT: beqz a4, .LBB13_6
+; RV32IZHINXMIN-NEXT: beqz a3, .LBB13_6
; RV32IZHINXMIN-NEXT: # %bb.5:
-; RV32IZHINXMIN-NEXT: addi a3, a5, -1
+; RV32IZHINXMIN-NEXT: addi a4, a5, -1
; RV32IZHINXMIN-NEXT: .LBB13_6:
-; RV32IZHINXMIN-NEXT: and a1, a2, a3
+; RV32IZHINXMIN-NEXT: and a1, a2, a4
; RV32IZHINXMIN-NEXT: ret
;
; RV64IZHINXMIN-LABEL: test_round_si64:
@@ -2549,7 +2569,7 @@ define signext i32 @test_round_ui32(half %x) {
; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz
; RV64IZHINX-NEXT: feq.h a0, a0, a0
; RV64IZHINX-NEXT: seqz a0, a0
-; RV64IZHINX-NEXT: addi a0, a0, -1
+; RV64IZHINX-NEXT: addiw a0, a0, -1
; RV64IZHINX-NEXT: and a0, a1, a0
; RV64IZHINX-NEXT: ret
;
@@ -2593,7 +2613,7 @@ define signext i32 @test_round_ui32(half %x) {
; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz
; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5
; RV64IZFHMIN-NEXT: seqz a1, a1
-; RV64IZFHMIN-NEXT: addi a1, a1, -1
+; RV64IZFHMIN-NEXT: addiw a1, a1, -1
; RV64IZFHMIN-NEXT: and a0, a0, a1
; RV64IZFHMIN-NEXT: ret
;
@@ -2635,7 +2655,7 @@ define signext i32 @test_round_ui32(half %x) {
; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz
; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0
; RV64IZHINXMIN-NEXT: seqz a0, a0
-; RV64IZHINXMIN-NEXT: addi a0, a0, -1
+; RV64IZHINXMIN-NEXT: addiw a0, a0, -1
; RV64IZHINXMIN-NEXT: and a0, a1, a0
; RV64IZHINXMIN-NEXT: ret
%a = call half @llvm.round.f16(half %x)
@@ -2658,25 +2678,24 @@ define i64 @test_round_ui64(half %x) nounwind {
; RV32IZFH-NEXT: .LBB15_2:
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: fcvt.s.h fs0, fa0
-; RV32IZFH-NEXT: fmv.w.x fa5, zero
-; RV32IZFH-NEXT: fle.s a0, fa5, fs0
-; RV32IZFH-NEXT: neg s0, a0
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixunssfdi
-; RV32IZFH-NEXT: lui a2, %hi(.LCPI15_1)
-; RV32IZFH-NEXT: flw fa5, %lo(.LCPI15_1)(a2)
-; RV32IZFH-NEXT: and a0, s0, a0
-; RV32IZFH-NEXT: flt.s a2, fa5, fs0
-; RV32IZFH-NEXT: neg a2, a2
-; RV32IZFH-NEXT: or a0, a2, a0
-; RV32IZFH-NEXT: and a1, s0, a1
-; RV32IZFH-NEXT: or a1, a2, a1
+; RV32IZFH-NEXT: fmv.w.x fa5, zero
+; RV32IZFH-NEXT: fle.s a2, fa5, fs0
+; RV32IZFH-NEXT: lui a3, %hi(.LCPI15_1)
+; RV32IZFH-NEXT: flw fa5, %lo(.LCPI15_1)(a3)
+; RV32IZFH-NEXT: xori a2, a2, 1
+; RV32IZFH-NEXT: addi a2, a2, -1
+; RV32IZFH-NEXT: and a0, a2, a0
+; RV32IZFH-NEXT: flt.s a3, fa5, fs0
+; RV32IZFH-NEXT: neg a3, a3
+; RV32IZFH-NEXT: or a0, a3, a0
+; RV32IZFH-NEXT: and a1, a2, a1
+; RV32IZFH-NEXT: or a1, a3, a1
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: addi sp, sp, 16
; RV32IZFH-NEXT: ret
;
@@ -2704,23 +2723,22 @@ define i64 @test_round_ui64(half %x) nounwind {
; RV32IZHINX-NEXT: addi sp, sp, -16
; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: fcvt.s.h s0, a0
-; RV32IZHINX-NEXT: fle.s a0, zero, s0
-; RV32IZHINX-NEXT: neg s1, a0
; RV32IZHINX-NEXT: mv a0, s0
; RV32IZHINX-NEXT: call __fixunssfdi
-; RV32IZHINX-NEXT: lui a2, %hi(.LCPI15_1)
-; RV32IZHINX-NEXT: lw a2, %lo(.LCPI15_1)(a2)
-; RV32IZHINX-NEXT: and a0, s1, a0
-; RV32IZHINX-NEXT: flt.s a2, a2, s0
-; RV32IZHINX-NEXT: neg a2, a2
-; RV32IZHINX-NEXT: or a0, a2, a0
-; RV32IZHINX-NEXT: and a1, s1, a1
-; RV32IZHINX-NEXT: or a1, a2, a1
+; RV32IZHINX-NEXT: fle.s a2, zero, s0
+; RV32IZHINX-NEXT: lui a3, %hi(.LCPI15_1)
+; RV32IZHINX-NEXT: lw a3, %lo(.LCPI15_1)(a3)
+; RV32IZHINX-NEXT: xori a2, a2, 1
+; RV32IZHINX-NEXT: addi a2, a2, -1
+; RV32IZHINX-NEXT: and a0, a2, a0
+; RV32IZHINX-NEXT: flt.s a3, a3, s0
+; RV32IZHINX-NEXT: neg a3, a3
+; RV32IZHINX-NEXT: or a0, a3, a0
+; RV32IZHINX-NEXT: and a1, a2, a1
+; RV32IZHINX-NEXT: or a1, a3, a1
; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: addi sp, sp, 16
; RV32IZHINX-NEXT: ret
;
@@ -2758,26 +2776,25 @@ define i64 @test_round_ui64(half %x) nounwind {
; RV32IZFHMIN-NEXT: .LBB15_2:
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5
; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5
-; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero
-; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0
-; RV32IZFHMIN-NEXT: neg s0, a0
; RV32IZFHMIN-NEXT: fmv.s fa0, fs0
; RV32IZFHMIN-NEXT: call __fixunssfdi
-; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI15_0)
-; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI15_0)(a2)
-; RV32IZFHMIN-NEXT: and a0, s0, a0
-; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0
-; RV32IZFHMIN-NEXT: neg a2, a2
-; RV32IZFHMIN-NEXT: or a0, a2, a0
-; RV32IZFHMIN-NEXT: and a1, s0, a1
-; RV32IZFHMIN-NEXT: or a1, a2, a1
+; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero
+; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0
+; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI15_0)
+; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI15_0)(a3)
+; RV32IZFHMIN-NEXT: xori a2, a2, 1
+; RV32IZFHMIN-NEXT: addi a2, a2, -1
+; RV32IZFHMIN-NEXT: and a0, a2, a0
+; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
+; RV32IZFHMIN-NEXT: neg a3, a3
+; RV32IZFHMIN-NEXT: or a0, a3, a0
+; RV32IZFHMIN-NEXT: and a1, a2, a1
+; RV32IZFHMIN-NEXT: or a1, a3, a1
; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: addi sp, sp, 16
; RV32IZFHMIN-NEXT: ret
;
@@ -2818,24 +2835,23 @@ define i64 @test_round_ui64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: addi sp, sp, -16
; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0
; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0
-; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0
-; RV32IZHINXMIN-NEXT: neg s1, a0
; RV32IZHINXMIN-NEXT: mv a0, s0
; RV32IZHINXMIN-NEXT: call __fixunssfdi
-; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI15_0)
-; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI15_0)(a2)
-; RV32IZHINXMIN-NEXT: and a0, s1, a0
-; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0
-; RV32IZHINXMIN-NEXT: neg a2, a2
-; RV32IZHINXMIN-NEXT: or a0, a2, a0
-; RV32IZHINXMIN-NEXT: and a1, s1, a1
-; RV32IZHINXMIN-NEXT: or a1, a2, a1
+; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0
+; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI15_0)
+; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI15_0)(a3)
+; RV32IZHINXMIN-NEXT: xori a2, a2, 1
+; RV32IZHINXMIN-NEXT: addi a2, a2, -1
+; RV32IZHINXMIN-NEXT: and a0, a2, a0
+; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0
+; RV32IZHINXMIN-NEXT: neg a3, a3
+; RV32IZHINXMIN-NEXT: or a0, a3, a0
+; RV32IZHINXMIN-NEXT: and a1, a2, a1
+; RV32IZHINXMIN-NEXT: or a1, a3, a1
; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: addi sp, sp, 16
; RV32IZHINXMIN-NEXT: ret
;
@@ -2956,38 +2972,41 @@ define i64 @test_roundeven_si64(half %x) nounwind {
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: fcvt.s.h fs0, fa0
; RV32IZFH-NEXT: lui a0, 913408
; RV32IZFH-NEXT: fmv.w.x fa5, a0
; RV32IZFH-NEXT: fle.s s0, fa5, fs0
+; RV32IZFH-NEXT: neg s1, s0
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixsfdi
+; RV32IZFH-NEXT: lui a2, %hi(.LCPI17_1)
+; RV32IZFH-NEXT: flw fa5, %lo(.LCPI17_1)(a2)
+; RV32IZFH-NEXT: and a0, s1, a0
+; RV32IZFH-NEXT: flt.s a3, fa5, fs0
+; RV32IZFH-NEXT: neg a2, a3
+; RV32IZFH-NEXT: or a0, a2, a0
+; RV32IZFH-NEXT: feq.s a2, fs0, fs0
+; RV32IZFH-NEXT: neg a2, a2
+; RV32IZFH-NEXT: lui a5, 524288
+; RV32IZFH-NEXT: li a6, 1
; RV32IZFH-NEXT: lui a4, 524288
-; RV32IZFH-NEXT: lui a2, 524288
-; RV32IZFH-NEXT: beqz s0, .LBB17_4
+; RV32IZFH-NEXT: bne s0, a6, .LBB17_4
; RV32IZFH-NEXT: # %bb.3:
-; RV32IZFH-NEXT: mv a2, a1
+; RV32IZFH-NEXT: mv a4, a1
; RV32IZFH-NEXT: .LBB17_4:
-; RV32IZFH-NEXT: lui a1, %hi(.LCPI17_1)
-; RV32IZFH-NEXT: flw fa5, %lo(.LCPI17_1)(a1)
-; RV32IZFH-NEXT: flt.s a3, fa5, fs0
-; RV32IZFH-NEXT: beqz a3, .LBB17_6
-; RV32IZFH-NEXT: # %bb.5:
-; RV32IZFH-NEXT: addi a2, a4, -1
-; RV32IZFH-NEXT: .LBB17_6:
-; RV32IZFH-NEXT: feq.s a1, fs0, fs0
-; RV32IZFH-NEXT: neg a4, a1
-; RV32IZFH-NEXT: and a1, a4, a2
-; RV32IZFH-NEXT: neg a2, s0
; RV32IZFH-NEXT: and a0, a2, a0
-; RV32IZFH-NEXT: neg a2, a3
-; RV32IZFH-NEXT: or a0, a2, a0
-; RV32IZFH-NEXT: and a0, a4, a0
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: addi sp, sp, 16
+; RV32IZFH-NEXT: beqz a3, .LBB17_6
+; RV32IZFH-NEXT: # %bb.5:
+; RV32IZFH-NEXT: addi a4, a5, -1
+; RV32IZFH-NEXT: .LBB17_6:
+; RV32IZFH-NEXT: and a1, a2, a4
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: test_roundeven_si64:
@@ -3025,16 +3044,17 @@ define i64 @test_roundeven_si64(half %x) nounwind {
; RV32IZHINX-NEXT: lui a2, %hi(.LCPI17_1)
; RV32IZHINX-NEXT: lw a2, %lo(.LCPI17_1)(a2)
; RV32IZHINX-NEXT: and a0, s2, a0
-; RV32IZHINX-NEXT: flt.s a4, a2, s0
-; RV32IZHINX-NEXT: neg a2, a4
+; RV32IZHINX-NEXT: flt.s a3, a2, s0
+; RV32IZHINX-NEXT: neg a2, a3
; RV32IZHINX-NEXT: or a0, a2, a0
; RV32IZHINX-NEXT: feq.s a2, s0, s0
; RV32IZHINX-NEXT: neg a2, a2
; RV32IZHINX-NEXT: lui a5, 524288
-; RV32IZHINX-NEXT: lui a3, 524288
-; RV32IZHINX-NEXT: beqz s1, .LBB17_4
+; RV32IZHINX-NEXT: li a6, 1
+; RV32IZHINX-NEXT: lui a4, 524288
+; RV32IZHINX-NEXT: bne s1, a6, .LBB17_4
; RV32IZHINX-NEXT: # %bb.3:
-; RV32IZHINX-NEXT: mv a3, a1
+; RV32IZHINX-NEXT: mv a4, a1
; RV32IZHINX-NEXT: .LBB17_4:
; RV32IZHINX-NEXT: and a0, a2, a0
; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -3042,11 +3062,11 @@ define i64 @test_roundeven_si64(half %x) nounwind {
; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: addi sp, sp, 16
-; RV32IZHINX-NEXT: beqz a4, .LBB17_6
+; RV32IZHINX-NEXT: beqz a3, .LBB17_6
; RV32IZHINX-NEXT: # %bb.5:
-; RV32IZHINX-NEXT: addi a3, a5, -1
+; RV32IZHINX-NEXT: addi a4, a5, -1
; RV32IZHINX-NEXT: .LBB17_6:
-; RV32IZHINX-NEXT: and a1, a2, a3
+; RV32IZHINX-NEXT: and a1, a2, a4
; RV32IZHINX-NEXT: ret
;
; RV64IZHINX-LABEL: test_roundeven_si64:
@@ -3084,39 +3104,42 @@ define i64 @test_roundeven_si64(half %x) nounwind {
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5
; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5
; RV32IZFHMIN-NEXT: lui a0, 913408
; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0
; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0
+; RV32IZFHMIN-NEXT: neg s1, s0
; RV32IZFHMIN-NEXT: fmv.s fa0, fs0
; RV32IZFHMIN-NEXT: call __fixsfdi
+; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI17_0)
+; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI17_0)(a2)
+; RV32IZFHMIN-NEXT: and a0, s1, a0
+; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
+; RV32IZFHMIN-NEXT: neg a2, a3
+; RV32IZFHMIN-NEXT: or a0, a2, a0
+; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0
+; RV32IZFHMIN-NEXT: neg a2, a2
+; RV32IZFHMIN-NEXT: lui a5, 524288
+; RV32IZFHMIN-NEXT: li a6, 1
; RV32IZFHMIN-NEXT: lui a4, 524288
-; RV32IZFHMIN-NEXT: lui a2, 524288
-; RV32IZFHMIN-NEXT: beqz s0, .LBB17_4
+; RV32IZFHMIN-NEXT: bne s0, a6, .LBB17_4
; RV32IZFHMIN-NEXT: # %bb.3:
-; RV32IZFHMIN-NEXT: mv a2, a1
+; RV32IZFHMIN-NEXT: mv a4, a1
; RV32IZFHMIN-NEXT: .LBB17_4:
-; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI17_0)
-; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI17_0)(a1)
-; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
-; RV32IZFHMIN-NEXT: beqz a3, .LBB17_6
-; RV32IZFHMIN-NEXT: # %bb.5:
-; RV32IZFHMIN-NEXT: addi a2, a4, -1
-; RV32IZFHMIN-NEXT: .LBB17_6:
-; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0
-; RV32IZFHMIN-NEXT: neg a4, a1
-; RV32IZFHMIN-NEXT: and a1, a4, a2
-; RV32IZFHMIN-NEXT: neg a2, s0
; RV32IZFHMIN-NEXT: and a0, a2, a0
-; RV32IZFHMIN-NEXT: neg a2, a3
-; RV32IZFHMIN-NEXT: or a0, a2, a0
-; RV32IZFHMIN-NEXT: and a0, a4, a0
; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: addi sp, sp, 16
+; RV32IZFHMIN-NEXT: beqz a3, .LBB17_6
+; RV32IZFHMIN-NEXT: # %bb.5:
+; RV32IZFHMIN-NEXT: addi a4, a5, -1
+; RV32IZFHMIN-NEXT: .LBB17_6:
+; RV32IZFHMIN-NEXT: and a1, a2, a4
; RV32IZFHMIN-NEXT: ret
;
; RV64IZFHMIN-LABEL: test_roundeven_si64:
@@ -3168,16 +3191,17 @@ define i64 @test_roundeven_si64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI17_0)
; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI17_0)(a2)
; RV32IZHINXMIN-NEXT: and a0, s2, a0
-; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0
-; RV32IZHINXMIN-NEXT: neg a2, a4
+; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0
+; RV32IZHINXMIN-NEXT: neg a2, a3
; RV32IZHINXMIN-NEXT: or a0, a2, a0
; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0
; RV32IZHINXMIN-NEXT: neg a2, a2
; RV32IZHINXMIN-NEXT: lui a5, 524288
-; RV32IZHINXMIN-NEXT: lui a3, 524288
-; RV32IZHINXMIN-NEXT: beqz s1, .LBB17_4
+; RV32IZHINXMIN-NEXT: li a6, 1
+; RV32IZHINXMIN-NEXT: lui a4, 524288
+; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB17_4
; RV32IZHINXMIN-NEXT: # %bb.3:
-; RV32IZHINXMIN-NEXT: mv a3, a1
+; RV32IZHINXMIN-NEXT: mv a4, a1
; RV32IZHINXMIN-NEXT: .LBB17_4:
; RV32IZHINXMIN-NEXT: and a0, a2, a0
; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -3185,11 +3209,11 @@ define i64 @test_roundeven_si64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: addi sp, sp, 16
-; RV32IZHINXMIN-NEXT: beqz a4, .LBB17_6
+; RV32IZHINXMIN-NEXT: beqz a3, .LBB17_6
; RV32IZHINXMIN-NEXT: # %bb.5:
-; RV32IZHINXMIN-NEXT: addi a3, a5, -1
+; RV32IZHINXMIN-NEXT: addi a4, a5, -1
; RV32IZHINXMIN-NEXT: .LBB17_6:
-; RV32IZHINXMIN-NEXT: and a1, a2, a3
+; RV32IZHINXMIN-NEXT: and a1, a2, a4
; RV32IZHINXMIN-NEXT: ret
;
; RV64IZHINXMIN-LABEL: test_roundeven_si64:
@@ -3261,7 +3285,7 @@ define signext i32 @test_roundeven_ui32(half %x) {
; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz
; RV64IZHINX-NEXT: feq.h a0, a0, a0
; RV64IZHINX-NEXT: seqz a0, a0
-; RV64IZHINX-NEXT: addi a0, a0, -1
+; RV64IZHINX-NEXT: addiw a0, a0, -1
; RV64IZHINX-NEXT: and a0, a1, a0
; RV64IZHINX-NEXT: ret
;
@@ -3305,7 +3329,7 @@ define signext i32 @test_roundeven_ui32(half %x) {
; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz
; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5
; RV64IZFHMIN-NEXT: seqz a1, a1
-; RV64IZFHMIN-NEXT: addi a1, a1, -1
+; RV64IZFHMIN-NEXT: addiw a1, a1, -1
; RV64IZFHMIN-NEXT: and a0, a0, a1
; RV64IZFHMIN-NEXT: ret
;
@@ -3347,7 +3371,7 @@ define signext i32 @test_roundeven_ui32(half %x) {
; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz
; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0
; RV64IZHINXMIN-NEXT: seqz a0, a0
-; RV64IZHINXMIN-NEXT: addi a0, a0, -1
+; RV64IZHINXMIN-NEXT: addiw a0, a0, -1
; RV64IZHINXMIN-NEXT: and a0, a1, a0
; RV64IZHINXMIN-NEXT: ret
%a = call half @llvm.roundeven.f16(half %x)
@@ -3370,25 +3394,24 @@ define i64 @test_roundeven_ui64(half %x) nounwind {
; RV32IZFH-NEXT: .LBB19_2:
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: fcvt.s.h fs0, fa0
-; RV32IZFH-NEXT: fmv.w.x fa5, zero
-; RV32IZFH-NEXT: fle.s a0, fa5, fs0
-; RV32IZFH-NEXT: neg s0, a0
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixunssfdi
-; RV32IZFH-NEXT: lui a2, %hi(.LCPI19_1)
-; RV32IZFH-NEXT: flw fa5, %lo(.LCPI19_1)(a2)
-; RV32IZFH-NEXT: and a0, s0, a0
-; RV32IZFH-NEXT: flt.s a2, fa5, fs0
-; RV32IZFH-NEXT: neg a2, a2
-; RV32IZFH-NEXT: or a0, a2, a0
-; RV32IZFH-NEXT: and a1, s0, a1
-; RV32IZFH-NEXT: or a1, a2, a1
+; RV32IZFH-NEXT: fmv.w.x fa5, zero
+; RV32IZFH-NEXT: fle.s a2, fa5, fs0
+; RV32IZFH-NEXT: lui a3, %hi(.LCPI19_1)
+; RV32IZFH-NEXT: flw fa5, %lo(.LCPI19_1)(a3)
+; RV32IZFH-NEXT: xori a2, a2, 1
+; RV32IZFH-NEXT: addi a2, a2, -1
+; RV32IZFH-NEXT: and a0, a2, a0
+; RV32IZFH-NEXT: flt.s a3, fa5, fs0
+; RV32IZFH-NEXT: neg a3, a3
+; RV32IZFH-NEXT: or a0, a3, a0
+; RV32IZFH-NEXT: and a1, a2, a1
+; RV32IZFH-NEXT: or a1, a3, a1
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: addi sp, sp, 16
; RV32IZFH-NEXT: ret
;
@@ -3416,23 +3439,22 @@ define i64 @test_roundeven_ui64(half %x) nounwind {
; RV32IZHINX-NEXT: addi sp, sp, -16
; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: fcvt.s.h s0, a0
-; RV32IZHINX-NEXT: fle.s a0, zero, s0
-; RV32IZHINX-NEXT: neg s1, a0
; RV32IZHINX-NEXT: mv a0, s0
; RV32IZHINX-NEXT: call __fixunssfdi
-; RV32IZHINX-NEXT: lui a2, %hi(.LCPI19_1)
-; RV32IZHINX-NEXT: lw a2, %lo(.LCPI19_1)(a2)
-; RV32IZHINX-NEXT: and a0, s1, a0
-; RV32IZHINX-NEXT: flt.s a2, a2, s0
-; RV32IZHINX-NEXT: neg a2, a2
-; RV32IZHINX-NEXT: or a0, a2, a0
-; RV32IZHINX-NEXT: and a1, s1, a1
-; RV32IZHINX-NEXT: or a1, a2, a1
+; RV32IZHINX-NEXT: fle.s a2, zero, s0
+; RV32IZHINX-NEXT: lui a3, %hi(.LCPI19_1)
+; RV32IZHINX-NEXT: lw a3, %lo(.LCPI19_1)(a3)
+; RV32IZHINX-NEXT: xori a2, a2, 1
+; RV32IZHINX-NEXT: addi a2, a2, -1
+; RV32IZHINX-NEXT: and a0, a2, a0
+; RV32IZHINX-NEXT: flt.s a3, a3, s0
+; RV32IZHINX-NEXT: neg a3, a3
+; RV32IZHINX-NEXT: or a0, a3, a0
+; RV32IZHINX-NEXT: and a1, a2, a1
+; RV32IZHINX-NEXT: or a1, a3, a1
; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: addi sp, sp, 16
; RV32IZHINX-NEXT: ret
;
@@ -3470,26 +3492,25 @@ define i64 @test_roundeven_ui64(half %x) nounwind {
; RV32IZFHMIN-NEXT: .LBB19_2:
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5
; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5
-; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero
-; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0
-; RV32IZFHMIN-NEXT: neg s0, a0
; RV32IZFHMIN-NEXT: fmv.s fa0, fs0
; RV32IZFHMIN-NEXT: call __fixunssfdi
-; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI19_0)
-; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI19_0)(a2)
-; RV32IZFHMIN-NEXT: and a0, s0, a0
-; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0
-; RV32IZFHMIN-NEXT: neg a2, a2
-; RV32IZFHMIN-NEXT: or a0, a2, a0
-; RV32IZFHMIN-NEXT: and a1, s0, a1
-; RV32IZFHMIN-NEXT: or a1, a2, a1
+; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero
+; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0
+; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI19_0)
+; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI19_0)(a3)
+; RV32IZFHMIN-NEXT: xori a2, a2, 1
+; RV32IZFHMIN-NEXT: addi a2, a2, -1
+; RV32IZFHMIN-NEXT: and a0, a2, a0
+; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
+; RV32IZFHMIN-NEXT: neg a3, a3
+; RV32IZFHMIN-NEXT: or a0, a3, a0
+; RV32IZFHMIN-NEXT: and a1, a2, a1
+; RV32IZFHMIN-NEXT: or a1, a3, a1
; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: addi sp, sp, 16
; RV32IZFHMIN-NEXT: ret
;
@@ -3530,24 +3551,23 @@ define i64 @test_roundeven_ui64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: addi sp, sp, -16
; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0
; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0
-; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0
-; RV32IZHINXMIN-NEXT: neg s1, a0
; RV32IZHINXMIN-NEXT: mv a0, s0
; RV32IZHINXMIN-NEXT: call __fixunssfdi
-; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI19_0)
-; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI19_0)(a2)
-; RV32IZHINXMIN-NEXT: and a0, s1, a0
-; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0
-; RV32IZHINXMIN-NEXT: neg a2, a2
-; RV32IZHINXMIN-NEXT: or a0, a2, a0
-; RV32IZHINXMIN-NEXT: and a1, s1, a1
-; RV32IZHINXMIN-NEXT: or a1, a2, a1
+; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0
+; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI19_0)
+; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI19_0)(a3)
+; RV32IZHINXMIN-NEXT: xori a2, a2, 1
+; RV32IZHINXMIN-NEXT: addi a2, a2, -1
+; RV32IZHINXMIN-NEXT: and a0, a2, a0
+; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0
+; RV32IZHINXMIN-NEXT: neg a3, a3
+; RV32IZHINXMIN-NEXT: or a0, a3, a0
+; RV32IZHINXMIN-NEXT: and a1, a2, a1
+; RV32IZHINXMIN-NEXT: or a1, a3, a1
; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: addi sp, sp, 16
; RV32IZHINXMIN-NEXT: ret
;
@@ -3668,38 +3688,41 @@ define i64 @test_rint_si64(half %x) nounwind {
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: fcvt.s.h fs0, fa0
; RV32IZFH-NEXT: lui a0, 913408
; RV32IZFH-NEXT: fmv.w.x fa5, a0
; RV32IZFH-NEXT: fle.s s0, fa5, fs0
+; RV32IZFH-NEXT: neg s1, s0
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixsfdi
+; RV32IZFH-NEXT: lui a2, %hi(.LCPI21_1)
+; RV32IZFH-NEXT: flw fa5, %lo(.LCPI21_1)(a2)
+; RV32IZFH-NEXT: and a0, s1, a0
+; RV32IZFH-NEXT: flt.s a3, fa5, fs0
+; RV32IZFH-NEXT: neg a2, a3
+; RV32IZFH-NEXT: or a0, a2, a0
+; RV32IZFH-NEXT: feq.s a2, fs0, fs0
+; RV32IZFH-NEXT: neg a2, a2
+; RV32IZFH-NEXT: lui a5, 524288
+; RV32IZFH-NEXT: li a6, 1
; RV32IZFH-NEXT: lui a4, 524288
-; RV32IZFH-NEXT: lui a2, 524288
-; RV32IZFH-NEXT: beqz s0, .LBB21_4
+; RV32IZFH-NEXT: bne s0, a6, .LBB21_4
; RV32IZFH-NEXT: # %bb.3:
-; RV32IZFH-NEXT: mv a2, a1
+; RV32IZFH-NEXT: mv a4, a1
; RV32IZFH-NEXT: .LBB21_4:
-; RV32IZFH-NEXT: lui a1, %hi(.LCPI21_1)
-; RV32IZFH-NEXT: flw fa5, %lo(.LCPI21_1)(a1)
-; RV32IZFH-NEXT: flt.s a3, fa5, fs0
-; RV32IZFH-NEXT: beqz a3, .LBB21_6
-; RV32IZFH-NEXT: # %bb.5:
-; RV32IZFH-NEXT: addi a2, a4, -1
-; RV32IZFH-NEXT: .LBB21_6:
-; RV32IZFH-NEXT: feq.s a1, fs0, fs0
-; RV32IZFH-NEXT: neg a4, a1
-; RV32IZFH-NEXT: and a1, a4, a2
-; RV32IZFH-NEXT: neg a2, s0
; RV32IZFH-NEXT: and a0, a2, a0
-; RV32IZFH-NEXT: neg a2, a3
-; RV32IZFH-NEXT: or a0, a2, a0
-; RV32IZFH-NEXT: and a0, a4, a0
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: addi sp, sp, 16
+; RV32IZFH-NEXT: beqz a3, .LBB21_6
+; RV32IZFH-NEXT: # %bb.5:
+; RV32IZFH-NEXT: addi a4, a5, -1
+; RV32IZFH-NEXT: .LBB21_6:
+; RV32IZFH-NEXT: and a1, a2, a4
; RV32IZFH-NEXT: ret
;
; RV64IZFH-LABEL: test_rint_si64:
@@ -3737,16 +3760,17 @@ define i64 @test_rint_si64(half %x) nounwind {
; RV32IZHINX-NEXT: lui a2, %hi(.LCPI21_1)
; RV32IZHINX-NEXT: lw a2, %lo(.LCPI21_1)(a2)
; RV32IZHINX-NEXT: and a0, s2, a0
-; RV32IZHINX-NEXT: flt.s a4, a2, s0
-; RV32IZHINX-NEXT: neg a2, a4
+; RV32IZHINX-NEXT: flt.s a3, a2, s0
+; RV32IZHINX-NEXT: neg a2, a3
; RV32IZHINX-NEXT: or a0, a2, a0
; RV32IZHINX-NEXT: feq.s a2, s0, s0
; RV32IZHINX-NEXT: neg a2, a2
; RV32IZHINX-NEXT: lui a5, 524288
-; RV32IZHINX-NEXT: lui a3, 524288
-; RV32IZHINX-NEXT: beqz s1, .LBB21_4
+; RV32IZHINX-NEXT: li a6, 1
+; RV32IZHINX-NEXT: lui a4, 524288
+; RV32IZHINX-NEXT: bne s1, a6, .LBB21_4
; RV32IZHINX-NEXT: # %bb.3:
-; RV32IZHINX-NEXT: mv a3, a1
+; RV32IZHINX-NEXT: mv a4, a1
; RV32IZHINX-NEXT: .LBB21_4:
; RV32IZHINX-NEXT: and a0, a2, a0
; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -3754,11 +3778,11 @@ define i64 @test_rint_si64(half %x) nounwind {
; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: addi sp, sp, 16
-; RV32IZHINX-NEXT: beqz a4, .LBB21_6
+; RV32IZHINX-NEXT: beqz a3, .LBB21_6
; RV32IZHINX-NEXT: # %bb.5:
-; RV32IZHINX-NEXT: addi a3, a5, -1
+; RV32IZHINX-NEXT: addi a4, a5, -1
; RV32IZHINX-NEXT: .LBB21_6:
-; RV32IZHINX-NEXT: and a1, a2, a3
+; RV32IZHINX-NEXT: and a1, a2, a4
; RV32IZHINX-NEXT: ret
;
; RV64IZHINX-LABEL: test_rint_si64:
@@ -3796,39 +3820,42 @@ define i64 @test_rint_si64(half %x) nounwind {
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: fsw fs0, 0(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5
; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5
; RV32IZFHMIN-NEXT: lui a0, 913408
; RV32IZFHMIN-NEXT: fmv.w.x fa5, a0
; RV32IZFHMIN-NEXT: fle.s s0, fa5, fs0
+; RV32IZFHMIN-NEXT: neg s1, s0
; RV32IZFHMIN-NEXT: fmv.s fa0, fs0
; RV32IZFHMIN-NEXT: call __fixsfdi
+; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI21_0)
+; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI21_0)(a2)
+; RV32IZFHMIN-NEXT: and a0, s1, a0
+; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
+; RV32IZFHMIN-NEXT: neg a2, a3
+; RV32IZFHMIN-NEXT: or a0, a2, a0
+; RV32IZFHMIN-NEXT: feq.s a2, fs0, fs0
+; RV32IZFHMIN-NEXT: neg a2, a2
+; RV32IZFHMIN-NEXT: lui a5, 524288
+; RV32IZFHMIN-NEXT: li a6, 1
; RV32IZFHMIN-NEXT: lui a4, 524288
-; RV32IZFHMIN-NEXT: lui a2, 524288
-; RV32IZFHMIN-NEXT: beqz s0, .LBB21_4
+; RV32IZFHMIN-NEXT: bne s0, a6, .LBB21_4
; RV32IZFHMIN-NEXT: # %bb.3:
-; RV32IZFHMIN-NEXT: mv a2, a1
+; RV32IZFHMIN-NEXT: mv a4, a1
; RV32IZFHMIN-NEXT: .LBB21_4:
-; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI21_0)
-; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI21_0)(a1)
-; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
-; RV32IZFHMIN-NEXT: beqz a3, .LBB21_6
-; RV32IZFHMIN-NEXT: # %bb.5:
-; RV32IZFHMIN-NEXT: addi a2, a4, -1
-; RV32IZFHMIN-NEXT: .LBB21_6:
-; RV32IZFHMIN-NEXT: feq.s a1, fs0, fs0
-; RV32IZFHMIN-NEXT: neg a4, a1
-; RV32IZFHMIN-NEXT: and a1, a4, a2
-; RV32IZFHMIN-NEXT: neg a2, s0
; RV32IZFHMIN-NEXT: and a0, a2, a0
-; RV32IZFHMIN-NEXT: neg a2, a3
-; RV32IZFHMIN-NEXT: or a0, a2, a0
-; RV32IZFHMIN-NEXT: and a0, a4, a0
; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: flw fs0, 0(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: addi sp, sp, 16
+; RV32IZFHMIN-NEXT: beqz a3, .LBB21_6
+; RV32IZFHMIN-NEXT: # %bb.5:
+; RV32IZFHMIN-NEXT: addi a4, a5, -1
+; RV32IZFHMIN-NEXT: .LBB21_6:
+; RV32IZFHMIN-NEXT: and a1, a2, a4
; RV32IZFHMIN-NEXT: ret
;
; RV64IZFHMIN-LABEL: test_rint_si64:
@@ -3880,16 +3907,17 @@ define i64 @test_rint_si64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI21_0)
; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI21_0)(a2)
; RV32IZHINXMIN-NEXT: and a0, s2, a0
-; RV32IZHINXMIN-NEXT: flt.s a4, a2, s0
-; RV32IZHINXMIN-NEXT: neg a2, a4
+; RV32IZHINXMIN-NEXT: flt.s a3, a2, s0
+; RV32IZHINXMIN-NEXT: neg a2, a3
; RV32IZHINXMIN-NEXT: or a0, a2, a0
; RV32IZHINXMIN-NEXT: feq.s a2, s0, s0
; RV32IZHINXMIN-NEXT: neg a2, a2
; RV32IZHINXMIN-NEXT: lui a5, 524288
-; RV32IZHINXMIN-NEXT: lui a3, 524288
-; RV32IZHINXMIN-NEXT: beqz s1, .LBB21_4
+; RV32IZHINXMIN-NEXT: li a6, 1
+; RV32IZHINXMIN-NEXT: lui a4, 524288
+; RV32IZHINXMIN-NEXT: bne s1, a6, .LBB21_4
; RV32IZHINXMIN-NEXT: # %bb.3:
-; RV32IZHINXMIN-NEXT: mv a3, a1
+; RV32IZHINXMIN-NEXT: mv a4, a1
; RV32IZHINXMIN-NEXT: .LBB21_4:
; RV32IZHINXMIN-NEXT: and a0, a2, a0
; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -3897,11 +3925,11 @@ define i64 @test_rint_si64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: addi sp, sp, 16
-; RV32IZHINXMIN-NEXT: beqz a4, .LBB21_6
+; RV32IZHINXMIN-NEXT: beqz a3, .LBB21_6
; RV32IZHINXMIN-NEXT: # %bb.5:
-; RV32IZHINXMIN-NEXT: addi a3, a5, -1
+; RV32IZHINXMIN-NEXT: addi a4, a5, -1
; RV32IZHINXMIN-NEXT: .LBB21_6:
-; RV32IZHINXMIN-NEXT: and a1, a2, a3
+; RV32IZHINXMIN-NEXT: and a1, a2, a4
; RV32IZHINXMIN-NEXT: ret
;
; RV64IZHINXMIN-LABEL: test_rint_si64:
@@ -3973,7 +4001,7 @@ define signext i32 @test_rint_ui32(half %x) {
; RV64IZHINX-NEXT: fcvt.wu.h a1, a0, rtz
; RV64IZHINX-NEXT: feq.h a0, a0, a0
; RV64IZHINX-NEXT: seqz a0, a0
-; RV64IZHINX-NEXT: addi a0, a0, -1
+; RV64IZHINX-NEXT: addiw a0, a0, -1
; RV64IZHINX-NEXT: and a0, a1, a0
; RV64IZHINX-NEXT: ret
;
@@ -4017,7 +4045,7 @@ define signext i32 @test_rint_ui32(half %x) {
; RV64IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz
; RV64IZFHMIN-NEXT: feq.s a1, fa5, fa5
; RV64IZFHMIN-NEXT: seqz a1, a1
-; RV64IZFHMIN-NEXT: addi a1, a1, -1
+; RV64IZFHMIN-NEXT: addiw a1, a1, -1
; RV64IZFHMIN-NEXT: and a0, a0, a1
; RV64IZFHMIN-NEXT: ret
;
@@ -4059,7 +4087,7 @@ define signext i32 @test_rint_ui32(half %x) {
; RV64IZHINXMIN-NEXT: fcvt.wu.s a1, a0, rtz
; RV64IZHINXMIN-NEXT: feq.s a0, a0, a0
; RV64IZHINXMIN-NEXT: seqz a0, a0
-; RV64IZHINXMIN-NEXT: addi a0, a0, -1
+; RV64IZHINXMIN-NEXT: addiw a0, a0, -1
; RV64IZHINXMIN-NEXT: and a0, a1, a0
; RV64IZHINXMIN-NEXT: ret
%a = call half @llvm.rint.f16(half %x)
@@ -4082,25 +4110,24 @@ define i64 @test_rint_ui64(half %x) nounwind {
; RV32IZFH-NEXT: .LBB23_2:
; RV32IZFH-NEXT: addi sp, sp, -16
; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFH-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFH-NEXT: fcvt.s.h fs0, fa0
-; RV32IZFH-NEXT: fmv.w.x fa5, zero
-; RV32IZFH-NEXT: fle.s a0, fa5, fs0
-; RV32IZFH-NEXT: neg s0, a0
; RV32IZFH-NEXT: fmv.s fa0, fs0
; RV32IZFH-NEXT: call __fixunssfdi
-; RV32IZFH-NEXT: lui a2, %hi(.LCPI23_1)
-; RV32IZFH-NEXT: flw fa5, %lo(.LCPI23_1)(a2)
-; RV32IZFH-NEXT: and a0, s0, a0
-; RV32IZFH-NEXT: flt.s a2, fa5, fs0
-; RV32IZFH-NEXT: neg a2, a2
-; RV32IZFH-NEXT: or a0, a2, a0
-; RV32IZFH-NEXT: and a1, s0, a1
-; RV32IZFH-NEXT: or a1, a2, a1
+; RV32IZFH-NEXT: fmv.w.x fa5, zero
+; RV32IZFH-NEXT: fle.s a2, fa5, fs0
+; RV32IZFH-NEXT: lui a3, %hi(.LCPI23_1)
+; RV32IZFH-NEXT: flw fa5, %lo(.LCPI23_1)(a3)
+; RV32IZFH-NEXT: xori a2, a2, 1
+; RV32IZFH-NEXT: addi a2, a2, -1
+; RV32IZFH-NEXT: and a0, a2, a0
+; RV32IZFH-NEXT: flt.s a3, fa5, fs0
+; RV32IZFH-NEXT: neg a3, a3
+; RV32IZFH-NEXT: or a0, a3, a0
+; RV32IZFH-NEXT: and a1, a2, a1
+; RV32IZFH-NEXT: or a1, a3, a1
; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFH-NEXT: addi sp, sp, 16
; RV32IZFH-NEXT: ret
;
@@ -4128,23 +4155,22 @@ define i64 @test_rint_ui64(half %x) nounwind {
; RV32IZHINX-NEXT: addi sp, sp, -16
; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZHINX-NEXT: fcvt.s.h s0, a0
-; RV32IZHINX-NEXT: fle.s a0, zero, s0
-; RV32IZHINX-NEXT: neg s1, a0
; RV32IZHINX-NEXT: mv a0, s0
; RV32IZHINX-NEXT: call __fixunssfdi
-; RV32IZHINX-NEXT: lui a2, %hi(.LCPI23_1)
-; RV32IZHINX-NEXT: lw a2, %lo(.LCPI23_1)(a2)
-; RV32IZHINX-NEXT: and a0, s1, a0
-; RV32IZHINX-NEXT: flt.s a2, a2, s0
-; RV32IZHINX-NEXT: neg a2, a2
-; RV32IZHINX-NEXT: or a0, a2, a0
-; RV32IZHINX-NEXT: and a1, s1, a1
-; RV32IZHINX-NEXT: or a1, a2, a1
+; RV32IZHINX-NEXT: fle.s a2, zero, s0
+; RV32IZHINX-NEXT: lui a3, %hi(.LCPI23_1)
+; RV32IZHINX-NEXT: lw a3, %lo(.LCPI23_1)(a3)
+; RV32IZHINX-NEXT: xori a2, a2, 1
+; RV32IZHINX-NEXT: addi a2, a2, -1
+; RV32IZHINX-NEXT: and a0, a2, a0
+; RV32IZHINX-NEXT: flt.s a3, a3, s0
+; RV32IZHINX-NEXT: neg a3, a3
+; RV32IZHINX-NEXT: or a0, a3, a0
+; RV32IZHINX-NEXT: and a1, a2, a1
+; RV32IZHINX-NEXT: or a1, a3, a1
; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINX-NEXT: addi sp, sp, 16
; RV32IZHINX-NEXT: ret
;
@@ -4182,26 +4208,25 @@ define i64 @test_rint_ui64(half %x) nounwind {
; RV32IZFHMIN-NEXT: .LBB23_2:
; RV32IZFHMIN-NEXT: addi sp, sp, -16
; RV32IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZFHMIN-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFHMIN-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill
; RV32IZFHMIN-NEXT: fcvt.h.s fa5, fa5
; RV32IZFHMIN-NEXT: fcvt.s.h fs0, fa5
-; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero
-; RV32IZFHMIN-NEXT: fle.s a0, fa5, fs0
-; RV32IZFHMIN-NEXT: neg s0, a0
; RV32IZFHMIN-NEXT: fmv.s fa0, fs0
; RV32IZFHMIN-NEXT: call __fixunssfdi
-; RV32IZFHMIN-NEXT: lui a2, %hi(.LCPI23_0)
-; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI23_0)(a2)
-; RV32IZFHMIN-NEXT: and a0, s0, a0
-; RV32IZFHMIN-NEXT: flt.s a2, fa5, fs0
-; RV32IZFHMIN-NEXT: neg a2, a2
-; RV32IZFHMIN-NEXT: or a0, a2, a0
-; RV32IZFHMIN-NEXT: and a1, s0, a1
-; RV32IZFHMIN-NEXT: or a1, a2, a1
+; RV32IZFHMIN-NEXT: fmv.w.x fa5, zero
+; RV32IZFHMIN-NEXT: fle.s a2, fa5, fs0
+; RV32IZFHMIN-NEXT: lui a3, %hi(.LCPI23_0)
+; RV32IZFHMIN-NEXT: flw fa5, %lo(.LCPI23_0)(a3)
+; RV32IZFHMIN-NEXT: xori a2, a2, 1
+; RV32IZFHMIN-NEXT: addi a2, a2, -1
+; RV32IZFHMIN-NEXT: and a0, a2, a0
+; RV32IZFHMIN-NEXT: flt.s a3, fa5, fs0
+; RV32IZFHMIN-NEXT: neg a3, a3
+; RV32IZFHMIN-NEXT: or a0, a3, a0
+; RV32IZFHMIN-NEXT: and a1, a2, a1
+; RV32IZFHMIN-NEXT: or a1, a3, a1
; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZFHMIN-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFHMIN-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload
; RV32IZFHMIN-NEXT: addi sp, sp, 16
; RV32IZFHMIN-NEXT: ret
;
@@ -4242,24 +4267,23 @@ define i64 @test_rint_ui64(half %x) nounwind {
; RV32IZHINXMIN-NEXT: addi sp, sp, -16
; RV32IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0
; RV32IZHINXMIN-NEXT: fcvt.s.h s0, a0
-; RV32IZHINXMIN-NEXT: fle.s a0, zero, s0
-; RV32IZHINXMIN-NEXT: neg s1, a0
; RV32IZHINXMIN-NEXT: mv a0, s0
; RV32IZHINXMIN-NEXT: call __fixunssfdi
-; RV32IZHINXMIN-NEXT: lui a2, %hi(.LCPI23_0)
-; RV32IZHINXMIN-NEXT: lw a2, %lo(.LCPI23_0)(a2)
-; RV32IZHINXMIN-NEXT: and a0, s1, a0
-; RV32IZHINXMIN-NEXT: flt.s a2, a2, s0
-; RV32IZHINXMIN-NEXT: neg a2, a2
-; RV32IZHINXMIN-NEXT: or a0, a2, a0
-; RV32IZHINXMIN-NEXT: and a1, s1, a1
-; RV32IZHINXMIN-NEXT: or a1, a2, a1
+; RV32IZHINXMIN-NEXT: fle.s a2, zero, s0
+; RV32IZHINXMIN-NEXT: lui a3, %hi(.LCPI23_0)
+; RV32IZHINXMIN-NEXT: lw a3, %lo(.LCPI23_0)(a3)
+; RV32IZHINXMIN-NEXT: xori a2, a2, 1
+; RV32IZHINXMIN-NEXT: addi a2, a2, -1
+; RV32IZHINXMIN-NEXT: and a0, a2, a0
+; RV32IZHINXMIN-NEXT: flt.s a3, a3, s0
+; RV32IZHINXMIN-NEXT: neg a3, a3
+; RV32IZHINXMIN-NEXT: or a0, a3, a0
+; RV32IZHINXMIN-NEXT: and a1, a2, a1
+; RV32IZHINXMIN-NEXT: or a1, a3, a1
; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32IZHINXMIN-NEXT: addi sp, sp, 16
; RV32IZHINXMIN-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/iabs.ll b/llvm/test/CodeGen/RISCV/iabs.ll
index cb64e24128b5e3..98c886333d69a0 100644
--- a/llvm/test/CodeGen/RISCV/iabs.ll
+++ b/llvm/test/CodeGen/RISCV/iabs.ll
@@ -302,56 +302,56 @@ define i128 @abs128(i128 %x) {
; RV32I-LABEL: abs128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a2, 12(a1)
-; RV32I-NEXT: lw a3, 4(a1)
-; RV32I-NEXT: lw a4, 0(a1)
+; RV32I-NEXT: lw a3, 0(a1)
+; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw a1, 8(a1)
; RV32I-NEXT: bgez a2, .LBB8_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: neg a5, a1
-; RV32I-NEXT: or a6, a4, a3
-; RV32I-NEXT: snez a6, a6
-; RV32I-NEXT: sltu a7, a5, a6
+; RV32I-NEXT: snez a6, a4
+; RV32I-NEXT: snez a7, a3
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: sltu t0, a5, a6
; RV32I-NEXT: snez a1, a1
; RV32I-NEXT: add a1, a2, a1
; RV32I-NEXT: neg a1, a1
-; RV32I-NEXT: sub a2, a1, a7
+; RV32I-NEXT: sub a2, a1, t0
; RV32I-NEXT: sub a1, a5, a6
-; RV32I-NEXT: snez a5, a4
-; RV32I-NEXT: neg a3, a3
-; RV32I-NEXT: sub a3, a3, a5
; RV32I-NEXT: neg a4, a4
+; RV32I-NEXT: sub a4, a4, a7
+; RV32I-NEXT: neg a3, a3
; RV32I-NEXT: .LBB8_2:
-; RV32I-NEXT: sw a4, 0(a0)
+; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a4, 4(a0)
; RV32I-NEXT: sw a1, 8(a0)
-; RV32I-NEXT: sw a3, 4(a0)
; RV32I-NEXT: sw a2, 12(a0)
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: abs128:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a2, 12(a1)
-; RV32ZBB-NEXT: lw a3, 4(a1)
-; RV32ZBB-NEXT: lw a4, 0(a1)
+; RV32ZBB-NEXT: lw a3, 0(a1)
+; RV32ZBB-NEXT: lw a4, 4(a1)
; RV32ZBB-NEXT: lw a1, 8(a1)
; RV32ZBB-NEXT: bgez a2, .LBB8_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: neg a5, a1
-; RV32ZBB-NEXT: or a6, a4, a3
-; RV32ZBB-NEXT: snez a6, a6
-; RV32ZBB-NEXT: sltu a7, a5, a6
+; RV32ZBB-NEXT: snez a6, a4
+; RV32ZBB-NEXT: snez a7, a3
+; RV32ZBB-NEXT: or a6, a7, a6
+; RV32ZBB-NEXT: sltu t0, a5, a6
; RV32ZBB-NEXT: snez a1, a1
; RV32ZBB-NEXT: add a1, a2, a1
; RV32ZBB-NEXT: neg a1, a1
-; RV32ZBB-NEXT: sub a2, a1, a7
+; RV32ZBB-NEXT: sub a2, a1, t0
; RV32ZBB-NEXT: sub a1, a5, a6
-; RV32ZBB-NEXT: snez a5, a4
-; RV32ZBB-NEXT: neg a3, a3
-; RV32ZBB-NEXT: sub a3, a3, a5
; RV32ZBB-NEXT: neg a4, a4
+; RV32ZBB-NEXT: sub a4, a4, a7
+; RV32ZBB-NEXT: neg a3, a3
; RV32ZBB-NEXT: .LBB8_2:
-; RV32ZBB-NEXT: sw a4, 0(a0)
+; RV32ZBB-NEXT: sw a3, 0(a0)
+; RV32ZBB-NEXT: sw a4, 4(a0)
; RV32ZBB-NEXT: sw a1, 8(a0)
-; RV32ZBB-NEXT: sw a3, 4(a0)
; RV32ZBB-NEXT: sw a2, 12(a0)
; RV32ZBB-NEXT: ret
;
@@ -384,56 +384,56 @@ define i128 @select_abs128(i128 %x) {
; RV32I-LABEL: select_abs128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a2, 12(a1)
-; RV32I-NEXT: lw a3, 4(a1)
-; RV32I-NEXT: lw a4, 0(a1)
+; RV32I-NEXT: lw a3, 0(a1)
+; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw a1, 8(a1)
; RV32I-NEXT: bgez a2, .LBB9_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: neg a5, a1
-; RV32I-NEXT: or a6, a4, a3
-; RV32I-NEXT: snez a6, a6
-; RV32I-NEXT: sltu a7, a5, a6
+; RV32I-NEXT: snez a6, a4
+; RV32I-NEXT: snez a7, a3
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: sltu t0, a5, a6
; RV32I-NEXT: snez a1, a1
; RV32I-NEXT: add a1, a2, a1
; RV32I-NEXT: neg a1, a1
-; RV32I-NEXT: sub a2, a1, a7
+; RV32I-NEXT: sub a2, a1, t0
; RV32I-NEXT: sub a1, a5, a6
-; RV32I-NEXT: snez a5, a4
-; RV32I-NEXT: neg a3, a3
-; RV32I-NEXT: sub a3, a3, a5
; RV32I-NEXT: neg a4, a4
+; RV32I-NEXT: sub a4, a4, a7
+; RV32I-NEXT: neg a3, a3
; RV32I-NEXT: .LBB9_2:
-; RV32I-NEXT: sw a4, 0(a0)
+; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a4, 4(a0)
; RV32I-NEXT: sw a1, 8(a0)
-; RV32I-NEXT: sw a3, 4(a0)
; RV32I-NEXT: sw a2, 12(a0)
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: select_abs128:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a2, 12(a1)
-; RV32ZBB-NEXT: lw a3, 4(a1)
-; RV32ZBB-NEXT: lw a4, 0(a1)
+; RV32ZBB-NEXT: lw a3, 0(a1)
+; RV32ZBB-NEXT: lw a4, 4(a1)
; RV32ZBB-NEXT: lw a1, 8(a1)
; RV32ZBB-NEXT: bgez a2, .LBB9_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: neg a5, a1
-; RV32ZBB-NEXT: or a6, a4, a3
-; RV32ZBB-NEXT: snez a6, a6
-; RV32ZBB-NEXT: sltu a7, a5, a6
+; RV32ZBB-NEXT: snez a6, a4
+; RV32ZBB-NEXT: snez a7, a3
+; RV32ZBB-NEXT: or a6, a7, a6
+; RV32ZBB-NEXT: sltu t0, a5, a6
; RV32ZBB-NEXT: snez a1, a1
; RV32ZBB-NEXT: add a1, a2, a1
; RV32ZBB-NEXT: neg a1, a1
-; RV32ZBB-NEXT: sub a2, a1, a7
+; RV32ZBB-NEXT: sub a2, a1, t0
; RV32ZBB-NEXT: sub a1, a5, a6
-; RV32ZBB-NEXT: snez a5, a4
-; RV32ZBB-NEXT: neg a3, a3
-; RV32ZBB-NEXT: sub a3, a3, a5
; RV32ZBB-NEXT: neg a4, a4
+; RV32ZBB-NEXT: sub a4, a4, a7
+; RV32ZBB-NEXT: neg a3, a3
; RV32ZBB-NEXT: .LBB9_2:
-; RV32ZBB-NEXT: sw a4, 0(a0)
+; RV32ZBB-NEXT: sw a3, 0(a0)
+; RV32ZBB-NEXT: sw a4, 4(a0)
; RV32ZBB-NEXT: sw a1, 8(a0)
-; RV32ZBB-NEXT: sw a3, 4(a0)
; RV32ZBB-NEXT: sw a2, 12(a0)
; RV32ZBB-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/pr84200.ll b/llvm/test/CodeGen/RISCV/pr84200.ll
index a971527e9567fd..19a102b84ed062 100644
--- a/llvm/test/CodeGen/RISCV/pr84200.ll
+++ b/llvm/test/CodeGen/RISCV/pr84200.ll
@@ -9,9 +9,8 @@ define i64 @foo(i64 %1) {
; CHECK-NEXT: li a1, 1
; CHECK-NEXT: sub a1, a1, a0
; CHECK-NEXT: sltiu a0, a0, 2
-; CHECK-NEXT: not a0, a0
; CHECK-NEXT: xori a1, a1, 1
-; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: neg a0, a0
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
index 71040bf2646d2c..4e958f5699adbf 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
@@ -298,14 +298,14 @@ define i32 @not_shl_one_i32(i32 %x) {
define i64 @not_shl_one_i64(i64 %x) {
; CHECK-LABEL: not_shl_one_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 1
-; CHECK-NEXT: sll a1, a1, a0
-; CHECK-NEXT: addi a0, a0, -32
-; CHECK-NEXT: slti a0, a0, 0
-; CHECK-NEXT: neg a2, a0
-; CHECK-NEXT: and a2, a2, a1
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: and a1, a0, a1
+; CHECK-NEXT: addi a1, a0, -32
+; CHECK-NEXT: slti a1, a1, 0
+; CHECK-NEXT: neg a2, a1
+; CHECK-NEXT: li a3, 1
+; CHECK-NEXT: sll a0, a3, a0
+; CHECK-NEXT: and a2, a2, a0
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a0
; CHECK-NEXT: not a0, a2
; CHECK-NEXT: not a1, a1
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rv32zbs.ll b/llvm/test/CodeGen/RISCV/rv32zbs.ll
index ccda8f4e5dd059..30aba61ba47469 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbs.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbs.ll
@@ -48,20 +48,20 @@ define i32 @bclr_i32_no_mask(i32 %a, i32 %b) nounwind {
define i64 @bclr_i64(i64 %a, i64 %b) nounwind {
; RV32I-LABEL: bclr_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: li a3, 1
-; RV32I-NEXT: sll a4, a3, a2
-; RV32I-NEXT: andi a2, a2, 63
-; RV32I-NEXT: addi a5, a2, -32
-; RV32I-NEXT: slti a5, a5, 0
-; RV32I-NEXT: neg a6, a5
-; RV32I-NEXT: and a4, a6, a4
-; RV32I-NEXT: sll a2, a3, a2
-; RV32I-NEXT: addi a5, a5, -1
+; RV32I-NEXT: andi a3, a2, 63
+; RV32I-NEXT: addi a4, a3, -32
+; RV32I-NEXT: slti a4, a4, 0
+; RV32I-NEXT: neg a5, a4
+; RV32I-NEXT: li a6, 1
+; RV32I-NEXT: sll a2, a6, a2
; RV32I-NEXT: and a2, a5, a2
-; RV32I-NEXT: not a3, a4
+; RV32I-NEXT: sll a3, a6, a3
+; RV32I-NEXT: addi a4, a4, -1
+; RV32I-NEXT: and a3, a4, a3
; RV32I-NEXT: not a2, a2
-; RV32I-NEXT: and a0, a3, a0
-; RV32I-NEXT: and a1, a2, a1
+; RV32I-NEXT: not a3, a3
+; RV32I-NEXT: and a0, a2, a0
+; RV32I-NEXT: and a1, a3, a1
; RV32I-NEXT: ret
;
; RV32ZBSNOZBB-LABEL: bclr_i64:
@@ -186,14 +186,14 @@ define i64 @bset_i64(i64 %a, i64 %b) nounwind {
define signext i64 @bset_i64_zero(i64 signext %a) nounwind {
; RV32I-LABEL: bset_i64_zero:
; RV32I: # %bb.0:
-; RV32I-NEXT: li a1, 1
-; RV32I-NEXT: sll a1, a1, a0
-; RV32I-NEXT: addi a0, a0, -32
-; RV32I-NEXT: slti a2, a0, 0
-; RV32I-NEXT: neg a0, a2
-; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: addi a2, a2, -1
-; RV32I-NEXT: and a1, a2, a1
+; RV32I-NEXT: addi a1, a0, -32
+; RV32I-NEXT: slti a1, a1, 0
+; RV32I-NEXT: neg a2, a1
+; RV32I-NEXT: li a3, 1
+; RV32I-NEXT: sll a3, a3, a0
+; RV32I-NEXT: and a0, a2, a3
+; RV32I-NEXT: addi a1, a1, -1
+; RV32I-NEXT: and a1, a1, a3
; RV32I-NEXT: ret
;
; RV32ZBS-LABEL: bset_i64_zero:
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll
index 4ec7f2660b2a35..73bfc6480b4d75 100644
--- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll
@@ -489,7 +489,7 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
; RV64I-NEXT: snez a1, s0
-; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: addiw a1, a1, -1
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
@@ -513,7 +513,7 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
; RV64XTHEADBB-NEXT: add a0, a1, a0
; RV64XTHEADBB-NEXT: lbu a0, 0(a0)
; RV64XTHEADBB-NEXT: snez a1, s0
-; RV64XTHEADBB-NEXT: addi a1, a1, -1
+; RV64XTHEADBB-NEXT: addiw a1, a1, -1
; RV64XTHEADBB-NEXT: or a0, a1, a0
; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64XTHEADBB-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
@@ -542,12 +542,10 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind {
; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: addi a0, a0, 1
+; RV64I-NEXT: addiw a0, a0, 1
; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: addiw a1, a1, -1
; RV64I-NEXT: and a0, a1, a0
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
@@ -569,12 +567,10 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind {
; RV64XTHEADBB-NEXT: addi a1, a1, %lo(.LCPI9_0)
; RV64XTHEADBB-NEXT: add a0, a1, a0
; RV64XTHEADBB-NEXT: lbu a0, 0(a0)
-; RV64XTHEADBB-NEXT: addi a0, a0, 1
+; RV64XTHEADBB-NEXT: addiw a0, a0, 1
; RV64XTHEADBB-NEXT: seqz a1, s0
-; RV64XTHEADBB-NEXT: addi a1, a1, -1
+; RV64XTHEADBB-NEXT: addiw a1, a1, -1
; RV64XTHEADBB-NEXT: and a0, a1, a0
-; RV64XTHEADBB-NEXT: slli a0, a0, 32
-; RV64XTHEADBB-NEXT: srli a0, a0, 32
; RV64XTHEADBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64XTHEADBB-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64XTHEADBB-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll
index 68ce66cbe8537d..7feef4dad4116a 100644
--- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll
@@ -444,7 +444,7 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
; RV64I-NEXT: snez a1, s0
-; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: addiw a1, a1, -1
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
@@ -481,12 +481,10 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind {
; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0)
; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: addi a0, a0, 1
+; RV64I-NEXT: addiw a0, a0, 1
; RV64I-NEXT: seqz a1, s0
-; RV64I-NEXT: addi a1, a1, -1
+; RV64I-NEXT: addiw a1, a1, -1
; RV64I-NEXT: and a0, a1, a0
-; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
@@ -495,11 +493,10 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind {
; RV64ZBB-LABEL: ffs_i32:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: ctzw a1, a0
-; RV64ZBB-NEXT: addi a1, a1, 1
+; RV64ZBB-NEXT: addiw a1, a1, 1
; RV64ZBB-NEXT: seqz a0, a0
-; RV64ZBB-NEXT: addi a0, a0, -1
+; RV64ZBB-NEXT: addiw a0, a0, -1
; RV64ZBB-NEXT: and a0, a0, a1
-; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: ret
%1 = call i32 @llvm.cttz.i32(i32 %a, i1 true)
%2 = add i32 %1, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index 48ce7d623475cb..652a1799ae55c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -89,17 +89,17 @@ entry:
define <2 x i32> @ustest_f64i32(<2 x double> %x) {
; CHECK-NOV-LABEL: ustest_f64i32:
; CHECK-NOV: # %bb.0: # %entry
-; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz
+; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz
; CHECK-NOV-NEXT: li a2, -1
; CHECK-NOV-NEXT: srli a2, a2, 32
-; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz
-; CHECK-NOV-NEXT: blt a0, a2, .LBB2_2
+; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz
+; CHECK-NOV-NEXT: blt a1, a2, .LBB2_2
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: mv a0, a2
+; CHECK-NOV-NEXT: mv a1, a2
; CHECK-NOV-NEXT: .LBB2_2: # %entry
-; CHECK-NOV-NEXT: blt a1, a2, .LBB2_4
+; CHECK-NOV-NEXT: blt a0, a2, .LBB2_4
; CHECK-NOV-NEXT: # %bb.3: # %entry
-; CHECK-NOV-NEXT: mv a1, a2
+; CHECK-NOV-NEXT: mv a0, a2
; CHECK-NOV-NEXT: .LBB2_4: # %entry
; CHECK-NOV-NEXT: sgtz a2, a1
; CHECK-NOV-NEXT: sgtz a3, a0
@@ -257,46 +257,46 @@ entry:
define <4 x i32> @ustest_f32i32(<4 x float> %x) {
; CHECK-NOV-LABEL: ustest_f32i32:
; CHECK-NOV: # %bb.0: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a1, fa0, rtz
+; CHECK-NOV-NEXT: fcvt.l.s a1, fa3, rtz
; CHECK-NOV-NEXT: li a4, -1
; CHECK-NOV-NEXT: srli a4, a4, 32
-; CHECK-NOV-NEXT: fcvt.l.s a2, fa1, rtz
+; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a4, .LBB5_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a3, fa2, rtz
+; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a4, .LBB5_7
; CHECK-NOV-NEXT: .LBB5_2: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a5, fa3, rtz
+; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
; CHECK-NOV-NEXT: bge a3, a4, .LBB5_8
; CHECK-NOV-NEXT: .LBB5_3: # %entry
; CHECK-NOV-NEXT: blt a5, a4, .LBB5_5
; CHECK-NOV-NEXT: .LBB5_4: # %entry
; CHECK-NOV-NEXT: mv a5, a4
; CHECK-NOV-NEXT: .LBB5_5: # %entry
-; CHECK-NOV-NEXT: sgtz a4, a5
-; CHECK-NOV-NEXT: sgtz a6, a3
-; CHECK-NOV-NEXT: sgtz a7, a2
-; CHECK-NOV-NEXT: sgtz t0, a1
+; CHECK-NOV-NEXT: sgtz a4, a1
+; CHECK-NOV-NEXT: sgtz a6, a2
+; CHECK-NOV-NEXT: sgtz a7, a3
+; CHECK-NOV-NEXT: sgtz t0, a5
; CHECK-NOV-NEXT: negw t0, t0
-; CHECK-NOV-NEXT: and a1, t0, a1
+; CHECK-NOV-NEXT: and a5, t0, a5
; CHECK-NOV-NEXT: negw a7, a7
-; CHECK-NOV-NEXT: and a2, a7, a2
+; CHECK-NOV-NEXT: and a3, a7, a3
; CHECK-NOV-NEXT: negw a6, a6
-; CHECK-NOV-NEXT: and a3, a6, a3
+; CHECK-NOV-NEXT: and a2, a6, a2
; CHECK-NOV-NEXT: negw a4, a4
-; CHECK-NOV-NEXT: and a4, a4, a5
-; CHECK-NOV-NEXT: sw a4, 12(a0)
-; CHECK-NOV-NEXT: sw a3, 8(a0)
-; CHECK-NOV-NEXT: sw a2, 4(a0)
-; CHECK-NOV-NEXT: sw a1, 0(a0)
+; CHECK-NOV-NEXT: and a1, a4, a1
+; CHECK-NOV-NEXT: sw a1, 12(a0)
+; CHECK-NOV-NEXT: sw a2, 8(a0)
+; CHECK-NOV-NEXT: sw a3, 4(a0)
+; CHECK-NOV-NEXT: sw a5, 0(a0)
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB5_6: # %entry
; CHECK-NOV-NEXT: mv a1, a4
-; CHECK-NOV-NEXT: fcvt.l.s a3, fa2, rtz
+; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz
; CHECK-NOV-NEXT: blt a2, a4, .LBB5_2
; CHECK-NOV-NEXT: .LBB5_7: # %entry
; CHECK-NOV-NEXT: mv a2, a4
-; CHECK-NOV-NEXT: fcvt.l.s a5, fa3, rtz
+; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz
; CHECK-NOV-NEXT: blt a3, a4, .LBB5_3
; CHECK-NOV-NEXT: .LBB5_8: # %entry
; CHECK-NOV-NEXT: mv a3, a4
@@ -700,10 +700,10 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset fs0, -48
; CHECK-NOV-NEXT: .cfi_offset fs1, -56
; CHECK-NOV-NEXT: .cfi_offset fs2, -64
-; CHECK-NOV-NEXT: lhu s1, 0(a1)
-; CHECK-NOV-NEXT: lhu s2, 24(a1)
-; CHECK-NOV-NEXT: lhu s3, 16(a1)
-; CHECK-NOV-NEXT: lhu a1, 8(a1)
+; CHECK-NOV-NEXT: lhu s1, 24(a1)
+; CHECK-NOV-NEXT: lhu s2, 0(a1)
+; CHECK-NOV-NEXT: lhu s3, 8(a1)
+; CHECK-NOV-NEXT: lhu a1, 16(a1)
; CHECK-NOV-NEXT: mv s0, a0
; CHECK-NOV-NEXT: fmv.w.x fa0, a1
; CHECK-NOV-NEXT: call __extendhfsf2
@@ -732,22 +732,22 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB8_4: # %entry
; CHECK-NOV-NEXT: mv a3, a2
; CHECK-NOV-NEXT: .LBB8_5: # %entry
-; CHECK-NOV-NEXT: sgtz a2, a3
-; CHECK-NOV-NEXT: sgtz a4, a1
-; CHECK-NOV-NEXT: sgtz a5, s1
-; CHECK-NOV-NEXT: sgtz a6, a0
+; CHECK-NOV-NEXT: sgtz a2, a0
+; CHECK-NOV-NEXT: sgtz a4, s1
+; CHECK-NOV-NEXT: sgtz a5, a1
+; CHECK-NOV-NEXT: sgtz a6, a3
; CHECK-NOV-NEXT: negw a6, a6
-; CHECK-NOV-NEXT: and a0, a6, a0
+; CHECK-NOV-NEXT: and a3, a6, a3
; CHECK-NOV-NEXT: negw a5, a5
-; CHECK-NOV-NEXT: and a5, a5, s1
+; CHECK-NOV-NEXT: and a1, a5, a1
; CHECK-NOV-NEXT: negw a4, a4
-; CHECK-NOV-NEXT: and a1, a4, a1
+; CHECK-NOV-NEXT: and a4, a4, s1
; CHECK-NOV-NEXT: negw a2, a2
-; CHECK-NOV-NEXT: and a2, a2, a3
-; CHECK-NOV-NEXT: sw a2, 12(s0)
-; CHECK-NOV-NEXT: sw a1, 8(s0)
-; CHECK-NOV-NEXT: sw a5, 4(s0)
-; CHECK-NOV-NEXT: sw a0, 0(s0)
+; CHECK-NOV-NEXT: and a0, a2, a0
+; CHECK-NOV-NEXT: sw a0, 12(s0)
+; CHECK-NOV-NEXT: sw a4, 8(s0)
+; CHECK-NOV-NEXT: sw a1, 4(s0)
+; CHECK-NOV-NEXT: sw a3, 0(s0)
; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
@@ -950,17 +950,17 @@ entry:
define <2 x i16> @ustest_f64i16(<2 x double> %x) {
; CHECK-NOV-LABEL: ustest_f64i16:
; CHECK-NOV: # %bb.0: # %entry
-; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz
+; CHECK-NOV-NEXT: fcvt.w.d a1, fa1, rtz
; CHECK-NOV-NEXT: lui a2, 16
; CHECK-NOV-NEXT: addiw a2, a2, -1
-; CHECK-NOV-NEXT: fcvt.w.d a1, fa1, rtz
-; CHECK-NOV-NEXT: blt a0, a2, .LBB11_2
+; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz
+; CHECK-NOV-NEXT: blt a1, a2, .LBB11_2
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: mv a0, a2
+; CHECK-NOV-NEXT: mv a1, a2
; CHECK-NOV-NEXT: .LBB11_2: # %entry
-; CHECK-NOV-NEXT: blt a1, a2, .LBB11_4
+; CHECK-NOV-NEXT: blt a0, a2, .LBB11_4
; CHECK-NOV-NEXT: # %bb.3: # %entry
-; CHECK-NOV-NEXT: mv a1, a2
+; CHECK-NOV-NEXT: mv a0, a2
; CHECK-NOV-NEXT: .LBB11_4: # %entry
; CHECK-NOV-NEXT: sgtz a2, a1
; CHECK-NOV-NEXT: sgtz a3, a0
@@ -1122,46 +1122,46 @@ entry:
define <4 x i16> @ustest_f32i16(<4 x float> %x) {
; CHECK-NOV-LABEL: ustest_f32i16:
; CHECK-NOV: # %bb.0: # %entry
-; CHECK-NOV-NEXT: fcvt.w.s a1, fa0, rtz
+; CHECK-NOV-NEXT: fcvt.w.s a1, fa3, rtz
; CHECK-NOV-NEXT: lui a4, 16
; CHECK-NOV-NEXT: addiw a4, a4, -1
-; CHECK-NOV-NEXT: fcvt.w.s a2, fa1, rtz
+; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz
; CHECK-NOV-NEXT: bge a1, a4, .LBB14_6
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: fcvt.w.s a3, fa2, rtz
+; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
; CHECK-NOV-NEXT: bge a2, a4, .LBB14_7
; CHECK-NOV-NEXT: .LBB14_2: # %entry
-; CHECK-NOV-NEXT: fcvt.w.s a5, fa3, rtz
+; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz
; CHECK-NOV-NEXT: bge a3, a4, .LBB14_8
; CHECK-NOV-NEXT: .LBB14_3: # %entry
; CHECK-NOV-NEXT: blt a5, a4, .LBB14_5
; CHECK-NOV-NEXT: .LBB14_4: # %entry
; CHECK-NOV-NEXT: mv a5, a4
; CHECK-NOV-NEXT: .LBB14_5: # %entry
-; CHECK-NOV-NEXT: sgtz a4, a5
-; CHECK-NOV-NEXT: sgtz a6, a3
-; CHECK-NOV-NEXT: sgtz a7, a2
-; CHECK-NOV-NEXT: sgtz t0, a1
+; CHECK-NOV-NEXT: sgtz a4, a1
+; CHECK-NOV-NEXT: sgtz a6, a2
+; CHECK-NOV-NEXT: sgtz a7, a3
+; CHECK-NOV-NEXT: sgtz t0, a5
; CHECK-NOV-NEXT: negw t0, t0
-; CHECK-NOV-NEXT: and a1, t0, a1
+; CHECK-NOV-NEXT: and a5, t0, a5
; CHECK-NOV-NEXT: negw a7, a7
-; CHECK-NOV-NEXT: and a2, a7, a2
+; CHECK-NOV-NEXT: and a3, a7, a3
; CHECK-NOV-NEXT: negw a6, a6
-; CHECK-NOV-NEXT: and a3, a6, a3
+; CHECK-NOV-NEXT: and a2, a6, a2
; CHECK-NOV-NEXT: negw a4, a4
-; CHECK-NOV-NEXT: and a4, a4, a5
-; CHECK-NOV-NEXT: sh a4, 6(a0)
-; CHECK-NOV-NEXT: sh a3, 4(a0)
-; CHECK-NOV-NEXT: sh a2, 2(a0)
-; CHECK-NOV-NEXT: sh a1, 0(a0)
+; CHECK-NOV-NEXT: and a1, a4, a1
+; CHECK-NOV-NEXT: sh a1, 6(a0)
+; CHECK-NOV-NEXT: sh a2, 4(a0)
+; CHECK-NOV-NEXT: sh a3, 2(a0)
+; CHECK-NOV-NEXT: sh a5, 0(a0)
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB14_6: # %entry
; CHECK-NOV-NEXT: mv a1, a4
-; CHECK-NOV-NEXT: fcvt.w.s a3, fa2, rtz
+; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz
; CHECK-NOV-NEXT: blt a2, a4, .LBB14_2
; CHECK-NOV-NEXT: .LBB14_7: # %entry
; CHECK-NOV-NEXT: mv a2, a4
-; CHECK-NOV-NEXT: fcvt.w.s a5, fa3, rtz
+; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz
; CHECK-NOV-NEXT: blt a3, a4, .LBB14_3
; CHECK-NOV-NEXT: .LBB14_8: # %entry
; CHECK-NOV-NEXT: mv a3, a4
@@ -1822,14 +1822,14 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset fs4, -112
; CHECK-NOV-NEXT: .cfi_offset fs5, -120
; CHECK-NOV-NEXT: .cfi_offset fs6, -128
-; CHECK-NOV-NEXT: lhu s1, 0(a1)
-; CHECK-NOV-NEXT: lhu s2, 56(a1)
-; CHECK-NOV-NEXT: lhu s3, 48(a1)
-; CHECK-NOV-NEXT: lhu s4, 40(a1)
-; CHECK-NOV-NEXT: lhu s5, 32(a1)
-; CHECK-NOV-NEXT: lhu s6, 24(a1)
-; CHECK-NOV-NEXT: lhu s7, 16(a1)
-; CHECK-NOV-NEXT: lhu a1, 8(a1)
+; CHECK-NOV-NEXT: lhu s1, 56(a1)
+; CHECK-NOV-NEXT: lhu s2, 0(a1)
+; CHECK-NOV-NEXT: lhu s3, 8(a1)
+; CHECK-NOV-NEXT: lhu s4, 16(a1)
+; CHECK-NOV-NEXT: lhu s5, 24(a1)
+; CHECK-NOV-NEXT: lhu s6, 32(a1)
+; CHECK-NOV-NEXT: lhu s7, 40(a1)
+; CHECK-NOV-NEXT: lhu a1, 48(a1)
; CHECK-NOV-NEXT: mv s0, a0
; CHECK-NOV-NEXT: fmv.w.x fa0, a1
; CHECK-NOV-NEXT: call __extendhfsf2
@@ -1882,38 +1882,38 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB17_8: # %entry
; CHECK-NOV-NEXT: mv a7, a3
; CHECK-NOV-NEXT: .LBB17_9: # %entry
-; CHECK-NOV-NEXT: sgtz a3, a7
-; CHECK-NOV-NEXT: sgtz t0, a6
-; CHECK-NOV-NEXT: sgtz t1, a5
-; CHECK-NOV-NEXT: sgtz t2, a4
-; CHECK-NOV-NEXT: sgtz t3, a2
-; CHECK-NOV-NEXT: sgtz t4, a1
-; CHECK-NOV-NEXT: sgtz t5, s1
-; CHECK-NOV-NEXT: sgtz t6, a0
+; CHECK-NOV-NEXT: sgtz a3, a0
+; CHECK-NOV-NEXT: sgtz t0, s1
+; CHECK-NOV-NEXT: sgtz t1, a1
+; CHECK-NOV-NEXT: sgtz t2, a2
+; CHECK-NOV-NEXT: sgtz t3, a4
+; CHECK-NOV-NEXT: sgtz t4, a5
+; CHECK-NOV-NEXT: sgtz t5, a6
+; CHECK-NOV-NEXT: sgtz t6, a7
; CHECK-NOV-NEXT: negw t6, t6
-; CHECK-NOV-NEXT: and a0, t6, a0
+; CHECK-NOV-NEXT: and a7, t6, a7
; CHECK-NOV-NEXT: negw t5, t5
-; CHECK-NOV-NEXT: and t5, t5, s1
+; CHECK-NOV-NEXT: and a6, t5, a6
; CHECK-NOV-NEXT: negw t4, t4
-; CHECK-NOV-NEXT: and a1, t4, a1
+; CHECK-NOV-NEXT: and a5, t4, a5
; CHECK-NOV-NEXT: negw t3, t3
-; CHECK-NOV-NEXT: and a2, t3, a2
+; CHECK-NOV-NEXT: and a4, t3, a4
; CHECK-NOV-NEXT: negw t2, t2
-; CHECK-NOV-NEXT: and a4, t2, a4
+; CHECK-NOV-NEXT: and a2, t2, a2
; CHECK-NOV-NEXT: negw t1, t1
-; CHECK-NOV-NEXT: and a5, t1, a5
+; CHECK-NOV-NEXT: and a1, t1, a1
; CHECK-NOV-NEXT: negw t0, t0
-; CHECK-NOV-NEXT: and a6, t0, a6
+; CHECK-NOV-NEXT: and t0, t0, s1
; CHECK-NOV-NEXT: negw a3, a3
-; CHECK-NOV-NEXT: and a3, a3, a7
-; CHECK-NOV-NEXT: sh a3, 14(s0)
-; CHECK-NOV-NEXT: sh a6, 12(s0)
-; CHECK-NOV-NEXT: sh a5, 10(s0)
-; CHECK-NOV-NEXT: sh a4, 8(s0)
-; CHECK-NOV-NEXT: sh a2, 6(s0)
-; CHECK-NOV-NEXT: sh a1, 4(s0)
-; CHECK-NOV-NEXT: sh t5, 2(s0)
-; CHECK-NOV-NEXT: sh a0, 0(s0)
+; CHECK-NOV-NEXT: and a0, a3, a0
+; CHECK-NOV-NEXT: sh a0, 14(s0)
+; CHECK-NOV-NEXT: sh t0, 12(s0)
+; CHECK-NOV-NEXT: sh a1, 10(s0)
+; CHECK-NOV-NEXT: sh a2, 8(s0)
+; CHECK-NOV-NEXT: sh a4, 6(s0)
+; CHECK-NOV-NEXT: sh a5, 4(s0)
+; CHECK-NOV-NEXT: sh a6, 2(s0)
+; CHECK-NOV-NEXT: sh a7, 0(s0)
; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
@@ -2106,66 +2106,65 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
; CHECK-NOV-NEXT: .cfi_offset s0, -16
; CHECK-NOV-NEXT: .cfi_offset s1, -24
; CHECK-NOV-NEXT: .cfi_offset fs0, -32
-; CHECK-NOV-NEXT: fmv.d fs0, fa1
+; CHECK-NOV-NEXT: fmv.d fs0, fa0
+; CHECK-NOV-NEXT: fmv.d fa0, fa1
; CHECK-NOV-NEXT: call __fixdfti
; CHECK-NOV-NEXT: mv s0, a0
; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: fmv.d fa0, fs0
; CHECK-NOV-NEXT: call __fixdfti
-; CHECK-NOV-NEXT: mv a2, a0
-; CHECK-NOV-NEXT: li a0, -1
-; CHECK-NOV-NEXT: srli a3, a0, 1
-; CHECK-NOV-NEXT: beqz a1, .LBB18_3
+; CHECK-NOV-NEXT: li a2, -1
+; CHECK-NOV-NEXT: srli a3, a2, 1
+; CHECK-NOV-NEXT: beqz s1, .LBB18_3
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: slti a4, a1, 0
-; CHECK-NOV-NEXT: bnez s1, .LBB18_4
+; CHECK-NOV-NEXT: slti a4, s1, 0
+; CHECK-NOV-NEXT: bnez a1, .LBB18_4
; CHECK-NOV-NEXT: .LBB18_2:
-; CHECK-NOV-NEXT: sltu a5, s0, a3
+; CHECK-NOV-NEXT: sltu a5, a0, a3
; CHECK-NOV-NEXT: beqz a5, .LBB18_5
; CHECK-NOV-NEXT: j .LBB18_6
; CHECK-NOV-NEXT: .LBB18_3:
-; CHECK-NOV-NEXT: sltu a4, a2, a3
-; CHECK-NOV-NEXT: beqz s1, .LBB18_2
+; CHECK-NOV-NEXT: sltu a4, s0, a3
+; CHECK-NOV-NEXT: beqz a1, .LBB18_2
; CHECK-NOV-NEXT: .LBB18_4: # %entry
-; CHECK-NOV-NEXT: slti a5, s1, 0
+; CHECK-NOV-NEXT: slti a5, a1, 0
; CHECK-NOV-NEXT: bnez a5, .LBB18_6
; CHECK-NOV-NEXT: .LBB18_5: # %entry
-; CHECK-NOV-NEXT: mv s0, a3
+; CHECK-NOV-NEXT: mv a0, a3
; CHECK-NOV-NEXT: .LBB18_6: # %entry
; CHECK-NOV-NEXT: neg a6, a5
; CHECK-NOV-NEXT: neg a5, a4
-; CHECK-NOV-NEXT: and a5, a5, a1
+; CHECK-NOV-NEXT: and a5, a5, s1
; CHECK-NOV-NEXT: bnez a4, .LBB18_8
; CHECK-NOV-NEXT: # %bb.7: # %entry
-; CHECK-NOV-NEXT: mv a2, a3
+; CHECK-NOV-NEXT: mv s0, a3
; CHECK-NOV-NEXT: .LBB18_8: # %entry
-; CHECK-NOV-NEXT: and a4, a6, s1
-; CHECK-NOV-NEXT: slli a1, a0, 63
-; CHECK-NOV-NEXT: beq a5, a0, .LBB18_11
+; CHECK-NOV-NEXT: and a4, a6, a1
+; CHECK-NOV-NEXT: slli a1, a2, 63
+; CHECK-NOV-NEXT: beq a5, a2, .LBB18_11
; CHECK-NOV-NEXT: # %bb.9: # %entry
; CHECK-NOV-NEXT: slti a3, a5, 0
; CHECK-NOV-NEXT: xori a3, a3, 1
-; CHECK-NOV-NEXT: bne a4, a0, .LBB18_12
+; CHECK-NOV-NEXT: bne a4, a2, .LBB18_12
; CHECK-NOV-NEXT: .LBB18_10:
-; CHECK-NOV-NEXT: sltu a0, a1, s0
-; CHECK-NOV-NEXT: beqz a0, .LBB18_13
+; CHECK-NOV-NEXT: sltu a2, a1, a0
+; CHECK-NOV-NEXT: beqz a2, .LBB18_13
; CHECK-NOV-NEXT: j .LBB18_14
; CHECK-NOV-NEXT: .LBB18_11:
-; CHECK-NOV-NEXT: sltu a3, a1, a2
-; CHECK-NOV-NEXT: beq a4, a0, .LBB18_10
+; CHECK-NOV-NEXT: sltu a3, a1, s0
+; CHECK-NOV-NEXT: beq a4, a2, .LBB18_10
; CHECK-NOV-NEXT: .LBB18_12: # %entry
-; CHECK-NOV-NEXT: slti a0, a4, 0
-; CHECK-NOV-NEXT: xori a0, a0, 1
-; CHECK-NOV-NEXT: bnez a0, .LBB18_14
+; CHECK-NOV-NEXT: slti a2, a4, 0
+; CHECK-NOV-NEXT: xori a2, a2, 1
+; CHECK-NOV-NEXT: bnez a2, .LBB18_14
; CHECK-NOV-NEXT: .LBB18_13: # %entry
-; CHECK-NOV-NEXT: mv s0, a1
+; CHECK-NOV-NEXT: mv a0, a1
; CHECK-NOV-NEXT: .LBB18_14: # %entry
; CHECK-NOV-NEXT: bnez a3, .LBB18_16
; CHECK-NOV-NEXT: # %bb.15: # %entry
-; CHECK-NOV-NEXT: mv a2, a1
+; CHECK-NOV-NEXT: mv s0, a1
; CHECK-NOV-NEXT: .LBB18_16: # %entry
-; CHECK-NOV-NEXT: mv a0, s0
-; CHECK-NOV-NEXT: mv a1, a2
+; CHECK-NOV-NEXT: mv a1, s0
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
@@ -2190,43 +2189,43 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
-; CHECK-V-NEXT: vfmv.f.s fa0, v9
+; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixdfti
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslidedown.vi v8, v8, 1
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixdfti
; CHECK-V-NEXT: li a2, -1
; CHECK-V-NEXT: srli a3, a2, 1
-; CHECK-V-NEXT: beqz a1, .LBB18_3
+; CHECK-V-NEXT: beqz s1, .LBB18_3
; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: slti a4, a1, 0
-; CHECK-V-NEXT: bnez s1, .LBB18_4
+; CHECK-V-NEXT: slti a4, s1, 0
+; CHECK-V-NEXT: bnez a1, .LBB18_4
; CHECK-V-NEXT: .LBB18_2:
-; CHECK-V-NEXT: sltu a5, s0, a3
+; CHECK-V-NEXT: sltu a5, a0, a3
; CHECK-V-NEXT: beqz a5, .LBB18_5
; CHECK-V-NEXT: j .LBB18_6
; CHECK-V-NEXT: .LBB18_3:
-; CHECK-V-NEXT: sltu a4, a0, a3
-; CHECK-V-NEXT: beqz s1, .LBB18_2
+; CHECK-V-NEXT: sltu a4, s0, a3
+; CHECK-V-NEXT: beqz a1, .LBB18_2
; CHECK-V-NEXT: .LBB18_4: # %entry
-; CHECK-V-NEXT: slti a5, s1, 0
+; CHECK-V-NEXT: slti a5, a1, 0
; CHECK-V-NEXT: bnez a5, .LBB18_6
; CHECK-V-NEXT: .LBB18_5: # %entry
-; CHECK-V-NEXT: mv s0, a3
+; CHECK-V-NEXT: mv a0, a3
; CHECK-V-NEXT: .LBB18_6: # %entry
; CHECK-V-NEXT: neg a6, a5
; CHECK-V-NEXT: neg a5, a4
-; CHECK-V-NEXT: and a5, a5, a1
+; CHECK-V-NEXT: and a5, a5, s1
; CHECK-V-NEXT: bnez a4, .LBB18_8
; CHECK-V-NEXT: # %bb.7: # %entry
-; CHECK-V-NEXT: mv a0, a3
+; CHECK-V-NEXT: mv s0, a3
; CHECK-V-NEXT: .LBB18_8: # %entry
-; CHECK-V-NEXT: and a4, a6, s1
+; CHECK-V-NEXT: and a4, a6, a1
; CHECK-V-NEXT: slli a1, a2, 63
; CHECK-V-NEXT: beq a5, a2, .LBB18_11
; CHECK-V-NEXT: # %bb.9: # %entry
@@ -2234,26 +2233,26 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
; CHECK-V-NEXT: xori a3, a3, 1
; CHECK-V-NEXT: bne a4, a2, .LBB18_12
; CHECK-V-NEXT: .LBB18_10:
-; CHECK-V-NEXT: sltu a2, a1, s0
+; CHECK-V-NEXT: sltu a2, a1, a0
; CHECK-V-NEXT: beqz a2, .LBB18_13
; CHECK-V-NEXT: j .LBB18_14
; CHECK-V-NEXT: .LBB18_11:
-; CHECK-V-NEXT: sltu a3, a1, a0
+; CHECK-V-NEXT: sltu a3, a1, s0
; CHECK-V-NEXT: beq a4, a2, .LBB18_10
; CHECK-V-NEXT: .LBB18_12: # %entry
; CHECK-V-NEXT: slti a2, a4, 0
; CHECK-V-NEXT: xori a2, a2, 1
; CHECK-V-NEXT: bnez a2, .LBB18_14
; CHECK-V-NEXT: .LBB18_13: # %entry
-; CHECK-V-NEXT: mv s0, a1
+; CHECK-V-NEXT: mv a0, a1
; CHECK-V-NEXT: .LBB18_14: # %entry
; CHECK-V-NEXT: bnez a3, .LBB18_16
; CHECK-V-NEXT: # %bb.15: # %entry
-; CHECK-V-NEXT: mv a0, a1
+; CHECK-V-NEXT: mv s0, a1
; CHECK-V-NEXT: .LBB18_16: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-V-NEXT: vmv.s.x v8, a0
-; CHECK-V-NEXT: vmv.s.x v9, s0
+; CHECK-V-NEXT: vmv.s.x v8, s0
+; CHECK-V-NEXT: vmv.s.x v9, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
@@ -2286,19 +2285,19 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) {
; CHECK-NOV-NEXT: .cfi_offset s0, -16
; CHECK-NOV-NEXT: .cfi_offset s1, -24
; CHECK-NOV-NEXT: .cfi_offset fs0, -32
-; CHECK-NOV-NEXT: fmv.d fs0, fa1
+; CHECK-NOV-NEXT: fmv.d fs0, fa0
+; CHECK-NOV-NEXT: fmv.d fa0, fa1
; CHECK-NOV-NEXT: call __fixunsdfti
; CHECK-NOV-NEXT: mv s0, a0
; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: fmv.d fa0, fs0
; CHECK-NOV-NEXT: call __fixunsdfti
-; CHECK-NOV-NEXT: snez a1, a1
; CHECK-NOV-NEXT: snez a2, s1
-; CHECK-NOV-NEXT: addi a2, a2, -1
-; CHECK-NOV-NEXT: and a2, a2, s0
+; CHECK-NOV-NEXT: snez a1, a1
; CHECK-NOV-NEXT: addi a1, a1, -1
-; CHECK-NOV-NEXT: and a1, a1, a0
-; CHECK-NOV-NEXT: mv a0, a2
+; CHECK-NOV-NEXT: and a0, a1, a0
+; CHECK-NOV-NEXT: addi a1, a2, -1
+; CHECK-NOV-NEXT: and a1, a1, s0
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
@@ -2323,25 +2322,25 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) {
; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
-; CHECK-V-NEXT: vfmv.f.s fa0, v9
+; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixunsdfti
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslidedown.vi v8, v8, 1
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixunsdfti
-; CHECK-V-NEXT: snez a1, a1
; CHECK-V-NEXT: snez a2, s1
-; CHECK-V-NEXT: addi a2, a2, -1
-; CHECK-V-NEXT: and a2, a2, s0
+; CHECK-V-NEXT: snez a1, a1
; CHECK-V-NEXT: addi a1, a1, -1
; CHECK-V-NEXT: and a0, a1, a0
+; CHECK-V-NEXT: addi a2, a2, -1
+; CHECK-V-NEXT: and a2, a2, s0
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-V-NEXT: vmv.s.x v8, a0
-; CHECK-V-NEXT: vmv.s.x v9, a2
+; CHECK-V-NEXT: vmv.s.x v8, a2
+; CHECK-V-NEXT: vmv.s.x v9, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
@@ -2383,32 +2382,32 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: li a2, 1
; CHECK-NOV-NEXT: .LBB20_2: # %entry
-; CHECK-NOV-NEXT: slti a3, a1, 1
; CHECK-NOV-NEXT: slti a4, s1, 1
+; CHECK-NOV-NEXT: slti a3, a1, 1
; CHECK-NOV-NEXT: blez a1, .LBB20_4
; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: li a1, 1
; CHECK-NOV-NEXT: .LBB20_4: # %entry
-; CHECK-NOV-NEXT: neg a4, a4
; CHECK-NOV-NEXT: neg a3, a3
; CHECK-NOV-NEXT: and a3, a3, a0
+; CHECK-NOV-NEXT: neg a0, a4
; CHECK-NOV-NEXT: beqz a1, .LBB20_7
; CHECK-NOV-NEXT: # %bb.5: # %entry
; CHECK-NOV-NEXT: sgtz a1, a1
-; CHECK-NOV-NEXT: and a4, a4, s0
+; CHECK-NOV-NEXT: and a0, a0, s0
; CHECK-NOV-NEXT: bnez a2, .LBB20_8
; CHECK-NOV-NEXT: .LBB20_6:
-; CHECK-NOV-NEXT: snez a0, a4
+; CHECK-NOV-NEXT: snez a2, a0
; CHECK-NOV-NEXT: j .LBB20_9
; CHECK-NOV-NEXT: .LBB20_7:
; CHECK-NOV-NEXT: snez a1, a3
-; CHECK-NOV-NEXT: and a4, a4, s0
+; CHECK-NOV-NEXT: and a0, a0, s0
; CHECK-NOV-NEXT: beqz a2, .LBB20_6
; CHECK-NOV-NEXT: .LBB20_8: # %entry
-; CHECK-NOV-NEXT: sgtz a0, a2
+; CHECK-NOV-NEXT: sgtz a2, a2
; CHECK-NOV-NEXT: .LBB20_9: # %entry
-; CHECK-NOV-NEXT: neg a0, a0
-; CHECK-NOV-NEXT: and a0, a0, a4
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a0, a2, a0
; CHECK-NOV-NEXT: neg a1, a1
; CHECK-NOV-NEXT: and a1, a1, a3
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -2450,15 +2449,15 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-V-NEXT: # %bb.1: # %entry
; CHECK-V-NEXT: li a2, 1
; CHECK-V-NEXT: .LBB20_2: # %entry
-; CHECK-V-NEXT: slti a4, a1, 1
; CHECK-V-NEXT: slti a3, s1, 1
+; CHECK-V-NEXT: slti a4, a1, 1
; CHECK-V-NEXT: blez a1, .LBB20_4
; CHECK-V-NEXT: # %bb.3: # %entry
; CHECK-V-NEXT: li a1, 1
; CHECK-V-NEXT: .LBB20_4: # %entry
-; CHECK-V-NEXT: neg a3, a3
; CHECK-V-NEXT: neg a4, a4
; CHECK-V-NEXT: and a0, a4, a0
+; CHECK-V-NEXT: neg a3, a3
; CHECK-V-NEXT: beqz a1, .LBB20_7
; CHECK-V-NEXT: # %bb.5: # %entry
; CHECK-V-NEXT: sgtz a1, a1
@@ -2513,66 +2512,65 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
; CHECK-NOV-NEXT: .cfi_offset s0, -16
; CHECK-NOV-NEXT: .cfi_offset s1, -24
; CHECK-NOV-NEXT: .cfi_offset fs0, -32
-; CHECK-NOV-NEXT: fmv.s fs0, fa1
+; CHECK-NOV-NEXT: fmv.s fs0, fa0
+; CHECK-NOV-NEXT: fmv.s fa0, fa1
; CHECK-NOV-NEXT: call __fixsfti
; CHECK-NOV-NEXT: mv s0, a0
; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: fmv.s fa0, fs0
; CHECK-NOV-NEXT: call __fixsfti
-; CHECK-NOV-NEXT: mv a2, a0
-; CHECK-NOV-NEXT: li a0, -1
-; CHECK-NOV-NEXT: srli a3, a0, 1
-; CHECK-NOV-NEXT: beqz a1, .LBB21_3
+; CHECK-NOV-NEXT: li a2, -1
+; CHECK-NOV-NEXT: srli a3, a2, 1
+; CHECK-NOV-NEXT: beqz s1, .LBB21_3
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: slti a4, a1, 0
-; CHECK-NOV-NEXT: bnez s1, .LBB21_4
+; CHECK-NOV-NEXT: slti a4, s1, 0
+; CHECK-NOV-NEXT: bnez a1, .LBB21_4
; CHECK-NOV-NEXT: .LBB21_2:
-; CHECK-NOV-NEXT: sltu a5, s0, a3
+; CHECK-NOV-NEXT: sltu a5, a0, a3
; CHECK-NOV-NEXT: beqz a5, .LBB21_5
; CHECK-NOV-NEXT: j .LBB21_6
; CHECK-NOV-NEXT: .LBB21_3:
-; CHECK-NOV-NEXT: sltu a4, a2, a3
-; CHECK-NOV-NEXT: beqz s1, .LBB21_2
+; CHECK-NOV-NEXT: sltu a4, s0, a3
+; CHECK-NOV-NEXT: beqz a1, .LBB21_2
; CHECK-NOV-NEXT: .LBB21_4: # %entry
-; CHECK-NOV-NEXT: slti a5, s1, 0
+; CHECK-NOV-NEXT: slti a5, a1, 0
; CHECK-NOV-NEXT: bnez a5, .LBB21_6
; CHECK-NOV-NEXT: .LBB21_5: # %entry
-; CHECK-NOV-NEXT: mv s0, a3
+; CHECK-NOV-NEXT: mv a0, a3
; CHECK-NOV-NEXT: .LBB21_6: # %entry
; CHECK-NOV-NEXT: neg a6, a5
; CHECK-NOV-NEXT: neg a5, a4
-; CHECK-NOV-NEXT: and a5, a5, a1
+; CHECK-NOV-NEXT: and a5, a5, s1
; CHECK-NOV-NEXT: bnez a4, .LBB21_8
; CHECK-NOV-NEXT: # %bb.7: # %entry
-; CHECK-NOV-NEXT: mv a2, a3
+; CHECK-NOV-NEXT: mv s0, a3
; CHECK-NOV-NEXT: .LBB21_8: # %entry
-; CHECK-NOV-NEXT: and a4, a6, s1
-; CHECK-NOV-NEXT: slli a1, a0, 63
-; CHECK-NOV-NEXT: beq a5, a0, .LBB21_11
+; CHECK-NOV-NEXT: and a4, a6, a1
+; CHECK-NOV-NEXT: slli a1, a2, 63
+; CHECK-NOV-NEXT: beq a5, a2, .LBB21_11
; CHECK-NOV-NEXT: # %bb.9: # %entry
; CHECK-NOV-NEXT: slti a3, a5, 0
; CHECK-NOV-NEXT: xori a3, a3, 1
-; CHECK-NOV-NEXT: bne a4, a0, .LBB21_12
+; CHECK-NOV-NEXT: bne a4, a2, .LBB21_12
; CHECK-NOV-NEXT: .LBB21_10:
-; CHECK-NOV-NEXT: sltu a0, a1, s0
-; CHECK-NOV-NEXT: beqz a0, .LBB21_13
+; CHECK-NOV-NEXT: sltu a2, a1, a0
+; CHECK-NOV-NEXT: beqz a2, .LBB21_13
; CHECK-NOV-NEXT: j .LBB21_14
; CHECK-NOV-NEXT: .LBB21_11:
-; CHECK-NOV-NEXT: sltu a3, a1, a2
-; CHECK-NOV-NEXT: beq a4, a0, .LBB21_10
+; CHECK-NOV-NEXT: sltu a3, a1, s0
+; CHECK-NOV-NEXT: beq a4, a2, .LBB21_10
; CHECK-NOV-NEXT: .LBB21_12: # %entry
-; CHECK-NOV-NEXT: slti a0, a4, 0
-; CHECK-NOV-NEXT: xori a0, a0, 1
-; CHECK-NOV-NEXT: bnez a0, .LBB21_14
+; CHECK-NOV-NEXT: slti a2, a4, 0
+; CHECK-NOV-NEXT: xori a2, a2, 1
+; CHECK-NOV-NEXT: bnez a2, .LBB21_14
; CHECK-NOV-NEXT: .LBB21_13: # %entry
-; CHECK-NOV-NEXT: mv s0, a1
+; CHECK-NOV-NEXT: mv a0, a1
; CHECK-NOV-NEXT: .LBB21_14: # %entry
; CHECK-NOV-NEXT: bnez a3, .LBB21_16
; CHECK-NOV-NEXT: # %bb.15: # %entry
-; CHECK-NOV-NEXT: mv a2, a1
+; CHECK-NOV-NEXT: mv s0, a1
; CHECK-NOV-NEXT: .LBB21_16: # %entry
-; CHECK-NOV-NEXT: mv a0, s0
-; CHECK-NOV-NEXT: mv a1, a2
+; CHECK-NOV-NEXT: mv a1, s0
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
@@ -2597,43 +2595,43 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
-; CHECK-V-NEXT: vfmv.f.s fa0, v9
+; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixsfti
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslidedown.vi v8, v8, 1
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixsfti
; CHECK-V-NEXT: li a2, -1
; CHECK-V-NEXT: srli a3, a2, 1
-; CHECK-V-NEXT: beqz a1, .LBB21_3
+; CHECK-V-NEXT: beqz s1, .LBB21_3
; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: slti a4, a1, 0
-; CHECK-V-NEXT: bnez s1, .LBB21_4
+; CHECK-V-NEXT: slti a4, s1, 0
+; CHECK-V-NEXT: bnez a1, .LBB21_4
; CHECK-V-NEXT: .LBB21_2:
-; CHECK-V-NEXT: sltu a5, s0, a3
+; CHECK-V-NEXT: sltu a5, a0, a3
; CHECK-V-NEXT: beqz a5, .LBB21_5
; CHECK-V-NEXT: j .LBB21_6
; CHECK-V-NEXT: .LBB21_3:
-; CHECK-V-NEXT: sltu a4, a0, a3
-; CHECK-V-NEXT: beqz s1, .LBB21_2
+; CHECK-V-NEXT: sltu a4, s0, a3
+; CHECK-V-NEXT: beqz a1, .LBB21_2
; CHECK-V-NEXT: .LBB21_4: # %entry
-; CHECK-V-NEXT: slti a5, s1, 0
+; CHECK-V-NEXT: slti a5, a1, 0
; CHECK-V-NEXT: bnez a5, .LBB21_6
; CHECK-V-NEXT: .LBB21_5: # %entry
-; CHECK-V-NEXT: mv s0, a3
+; CHECK-V-NEXT: mv a0, a3
; CHECK-V-NEXT: .LBB21_6: # %entry
; CHECK-V-NEXT: neg a6, a5
; CHECK-V-NEXT: neg a5, a4
-; CHECK-V-NEXT: and a5, a5, a1
+; CHECK-V-NEXT: and a5, a5, s1
; CHECK-V-NEXT: bnez a4, .LBB21_8
; CHECK-V-NEXT: # %bb.7: # %entry
-; CHECK-V-NEXT: mv a0, a3
+; CHECK-V-NEXT: mv s0, a3
; CHECK-V-NEXT: .LBB21_8: # %entry
-; CHECK-V-NEXT: and a4, a6, s1
+; CHECK-V-NEXT: and a4, a6, a1
; CHECK-V-NEXT: slli a1, a2, 63
; CHECK-V-NEXT: beq a5, a2, .LBB21_11
; CHECK-V-NEXT: # %bb.9: # %entry
@@ -2641,26 +2639,26 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
; CHECK-V-NEXT: xori a3, a3, 1
; CHECK-V-NEXT: bne a4, a2, .LBB21_12
; CHECK-V-NEXT: .LBB21_10:
-; CHECK-V-NEXT: sltu a2, a1, s0
+; CHECK-V-NEXT: sltu a2, a1, a0
; CHECK-V-NEXT: beqz a2, .LBB21_13
; CHECK-V-NEXT: j .LBB21_14
; CHECK-V-NEXT: .LBB21_11:
-; CHECK-V-NEXT: sltu a3, a1, a0
+; CHECK-V-NEXT: sltu a3, a1, s0
; CHECK-V-NEXT: beq a4, a2, .LBB21_10
; CHECK-V-NEXT: .LBB21_12: # %entry
; CHECK-V-NEXT: slti a2, a4, 0
; CHECK-V-NEXT: xori a2, a2, 1
; CHECK-V-NEXT: bnez a2, .LBB21_14
; CHECK-V-NEXT: .LBB21_13: # %entry
-; CHECK-V-NEXT: mv s0, a1
+; CHECK-V-NEXT: mv a0, a1
; CHECK-V-NEXT: .LBB21_14: # %entry
; CHECK-V-NEXT: bnez a3, .LBB21_16
; CHECK-V-NEXT: # %bb.15: # %entry
-; CHECK-V-NEXT: mv a0, a1
+; CHECK-V-NEXT: mv s0, a1
; CHECK-V-NEXT: .LBB21_16: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-V-NEXT: vmv.s.x v8, a0
-; CHECK-V-NEXT: vmv.s.x v9, s0
+; CHECK-V-NEXT: vmv.s.x v8, s0
+; CHECK-V-NEXT: vmv.s.x v9, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
@@ -2693,19 +2691,19 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) {
; CHECK-NOV-NEXT: .cfi_offset s0, -16
; CHECK-NOV-NEXT: .cfi_offset s1, -24
; CHECK-NOV-NEXT: .cfi_offset fs0, -32
-; CHECK-NOV-NEXT: fmv.s fs0, fa1
+; CHECK-NOV-NEXT: fmv.s fs0, fa0
+; CHECK-NOV-NEXT: fmv.s fa0, fa1
; CHECK-NOV-NEXT: call __fixunssfti
; CHECK-NOV-NEXT: mv s0, a0
; CHECK-NOV-NEXT: mv s1, a1
; CHECK-NOV-NEXT: fmv.s fa0, fs0
; CHECK-NOV-NEXT: call __fixunssfti
-; CHECK-NOV-NEXT: snez a1, a1
; CHECK-NOV-NEXT: snez a2, s1
-; CHECK-NOV-NEXT: addi a2, a2, -1
-; CHECK-NOV-NEXT: and a2, a2, s0
+; CHECK-NOV-NEXT: snez a1, a1
; CHECK-NOV-NEXT: addi a1, a1, -1
-; CHECK-NOV-NEXT: and a1, a1, a0
-; CHECK-NOV-NEXT: mv a0, a2
+; CHECK-NOV-NEXT: and a0, a1, a0
+; CHECK-NOV-NEXT: addi a1, a2, -1
+; CHECK-NOV-NEXT: and a1, a1, s0
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
@@ -2730,25 +2728,25 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) {
; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-V-NEXT: vslidedown.vi v9, v8, 1
-; CHECK-V-NEXT: vfmv.f.s fa0, v9
+; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixunssfti
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-V-NEXT: addi a0, sp, 32
; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-V-NEXT: vslidedown.vi v8, v8, 1
; CHECK-V-NEXT: vfmv.f.s fa0, v8
; CHECK-V-NEXT: call __fixunssfti
-; CHECK-V-NEXT: snez a1, a1
; CHECK-V-NEXT: snez a2, s1
-; CHECK-V-NEXT: addi a2, a2, -1
-; CHECK-V-NEXT: and a2, a2, s0
+; CHECK-V-NEXT: snez a1, a1
; CHECK-V-NEXT: addi a1, a1, -1
; CHECK-V-NEXT: and a0, a1, a0
+; CHECK-V-NEXT: addi a2, a2, -1
+; CHECK-V-NEXT: and a2, a2, s0
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-V-NEXT: vmv.s.x v8, a0
-; CHECK-V-NEXT: vmv.s.x v9, a2
+; CHECK-V-NEXT: vmv.s.x v8, a2
+; CHECK-V-NEXT: vmv.s.x v9, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: csrr a0, vlenb
; CHECK-V-NEXT: slli a0, a0, 1
@@ -2790,32 +2788,32 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: li a2, 1
; CHECK-NOV-NEXT: .LBB23_2: # %entry
-; CHECK-NOV-NEXT: slti a3, a1, 1
; CHECK-NOV-NEXT: slti a4, s1, 1
+; CHECK-NOV-NEXT: slti a3, a1, 1
; CHECK-NOV-NEXT: blez a1, .LBB23_4
; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: li a1, 1
; CHECK-NOV-NEXT: .LBB23_4: # %entry
-; CHECK-NOV-NEXT: neg a4, a4
; CHECK-NOV-NEXT: neg a3, a3
; CHECK-NOV-NEXT: and a3, a3, a0
+; CHECK-NOV-NEXT: neg a0, a4
; CHECK-NOV-NEXT: beqz a1, .LBB23_7
; CHECK-NOV-NEXT: # %bb.5: # %entry
; CHECK-NOV-NEXT: sgtz a1, a1
-; CHECK-NOV-NEXT: and a4, a4, s0
+; CHECK-NOV-NEXT: and a0, a0, s0
; CHECK-NOV-NEXT: bnez a2, .LBB23_8
; CHECK-NOV-NEXT: .LBB23_6:
-; CHECK-NOV-NEXT: snez a0, a4
+; CHECK-NOV-NEXT: snez a2, a0
; CHECK-NOV-NEXT: j .LBB23_9
; CHECK-NOV-NEXT: .LBB23_7:
; CHECK-NOV-NEXT: snez a1, a3
-; CHECK-NOV-NEXT: and a4, a4, s0
+; CHECK-NOV-NEXT: and a0, a0, s0
; CHECK-NOV-NEXT: beqz a2, .LBB23_6
; CHECK-NOV-NEXT: .LBB23_8: # %entry
-; CHECK-NOV-NEXT: sgtz a0, a2
+; CHECK-NOV-NEXT: sgtz a2, a2
; CHECK-NOV-NEXT: .LBB23_9: # %entry
-; CHECK-NOV-NEXT: neg a0, a0
-; CHECK-NOV-NEXT: and a0, a0, a4
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a0, a2, a0
; CHECK-NOV-NEXT: neg a1, a1
; CHECK-NOV-NEXT: and a1, a1, a3
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -2857,15 +2855,15 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
; CHECK-V-NEXT: # %bb.1: # %entry
; CHECK-V-NEXT: li a2, 1
; CHECK-V-NEXT: .LBB23_2: # %entry
-; CHECK-V-NEXT: slti a4, a1, 1
; CHECK-V-NEXT: slti a3, s1, 1
+; CHECK-V-NEXT: slti a4, a1, 1
; CHECK-V-NEXT: blez a1, .LBB23_4
; CHECK-V-NEXT: # %bb.3: # %entry
; CHECK-V-NEXT: li a1, 1
; CHECK-V-NEXT: .LBB23_4: # %entry
-; CHECK-V-NEXT: neg a3, a3
; CHECK-V-NEXT: neg a4, a4
; CHECK-V-NEXT: and a0, a4, a0
+; CHECK-V-NEXT: neg a3, a3
; CHECK-V-NEXT: beqz a1, .LBB23_7
; CHECK-V-NEXT: # %bb.5: # %entry
; CHECK-V-NEXT: sgtz a1, a1
@@ -2920,8 +2918,8 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset s0, -16
; CHECK-NOV-NEXT: .cfi_offset s1, -24
; CHECK-NOV-NEXT: .cfi_offset s2, -32
-; CHECK-NOV-NEXT: mv s2, a1
-; CHECK-NOV-NEXT: fmv.w.x fa0, a0
+; CHECK-NOV-NEXT: mv s2, a0
+; CHECK-NOV-NEXT: fmv.w.x fa0, a1
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: call __fixsfti
; CHECK-NOV-NEXT: mv s0, a0
@@ -2929,60 +2927,58 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: call __fixsfti
-; CHECK-NOV-NEXT: mv a2, a0
-; CHECK-NOV-NEXT: li a0, -1
-; CHECK-NOV-NEXT: srli a3, a0, 1
-; CHECK-NOV-NEXT: beqz a1, .LBB24_3
+; CHECK-NOV-NEXT: li a2, -1
+; CHECK-NOV-NEXT: srli a3, a2, 1
+; CHECK-NOV-NEXT: beqz s1, .LBB24_3
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: slti a4, a1, 0
-; CHECK-NOV-NEXT: bnez s1, .LBB24_4
+; CHECK-NOV-NEXT: slti a4, s1, 0
+; CHECK-NOV-NEXT: bnez a1, .LBB24_4
; CHECK-NOV-NEXT: .LBB24_2:
-; CHECK-NOV-NEXT: sltu a5, s0, a3
+; CHECK-NOV-NEXT: sltu a5, a0, a3
; CHECK-NOV-NEXT: beqz a5, .LBB24_5
; CHECK-NOV-NEXT: j .LBB24_6
; CHECK-NOV-NEXT: .LBB24_3:
-; CHECK-NOV-NEXT: sltu a4, a2, a3
-; CHECK-NOV-NEXT: beqz s1, .LBB24_2
+; CHECK-NOV-NEXT: sltu a4, s0, a3
+; CHECK-NOV-NEXT: beqz a1, .LBB24_2
; CHECK-NOV-NEXT: .LBB24_4: # %entry
-; CHECK-NOV-NEXT: slti a5, s1, 0
+; CHECK-NOV-NEXT: slti a5, a1, 0
; CHECK-NOV-NEXT: bnez a5, .LBB24_6
; CHECK-NOV-NEXT: .LBB24_5: # %entry
-; CHECK-NOV-NEXT: mv s0, a3
+; CHECK-NOV-NEXT: mv a0, a3
; CHECK-NOV-NEXT: .LBB24_6: # %entry
; CHECK-NOV-NEXT: neg a6, a5
; CHECK-NOV-NEXT: neg a5, a4
-; CHECK-NOV-NEXT: and a5, a5, a1
+; CHECK-NOV-NEXT: and a5, a5, s1
; CHECK-NOV-NEXT: bnez a4, .LBB24_8
; CHECK-NOV-NEXT: # %bb.7: # %entry
-; CHECK-NOV-NEXT: mv a2, a3
+; CHECK-NOV-NEXT: mv s0, a3
; CHECK-NOV-NEXT: .LBB24_8: # %entry
-; CHECK-NOV-NEXT: and a4, a6, s1
-; CHECK-NOV-NEXT: slli a1, a0, 63
-; CHECK-NOV-NEXT: beq a5, a0, .LBB24_11
+; CHECK-NOV-NEXT: and a4, a6, a1
+; CHECK-NOV-NEXT: slli a1, a2, 63
+; CHECK-NOV-NEXT: beq a5, a2, .LBB24_11
; CHECK-NOV-NEXT: # %bb.9: # %entry
; CHECK-NOV-NEXT: slti a3, a5, 0
; CHECK-NOV-NEXT: xori a3, a3, 1
-; CHECK-NOV-NEXT: bne a4, a0, .LBB24_12
+; CHECK-NOV-NEXT: bne a4, a2, .LBB24_12
; CHECK-NOV-NEXT: .LBB24_10:
-; CHECK-NOV-NEXT: sltu a0, a1, s0
-; CHECK-NOV-NEXT: beqz a0, .LBB24_13
+; CHECK-NOV-NEXT: sltu a2, a1, a0
+; CHECK-NOV-NEXT: beqz a2, .LBB24_13
; CHECK-NOV-NEXT: j .LBB24_14
; CHECK-NOV-NEXT: .LBB24_11:
-; CHECK-NOV-NEXT: sltu a3, a1, a2
-; CHECK-NOV-NEXT: beq a4, a0, .LBB24_10
+; CHECK-NOV-NEXT: sltu a3, a1, s0
+; CHECK-NOV-NEXT: beq a4, a2, .LBB24_10
; CHECK-NOV-NEXT: .LBB24_12: # %entry
-; CHECK-NOV-NEXT: slti a0, a4, 0
-; CHECK-NOV-NEXT: xori a0, a0, 1
-; CHECK-NOV-NEXT: bnez a0, .LBB24_14
+; CHECK-NOV-NEXT: slti a2, a4, 0
+; CHECK-NOV-NEXT: xori a2, a2, 1
+; CHECK-NOV-NEXT: bnez a2, .LBB24_14
; CHECK-NOV-NEXT: .LBB24_13: # %entry
-; CHECK-NOV-NEXT: mv s0, a1
+; CHECK-NOV-NEXT: mv a0, a1
; CHECK-NOV-NEXT: .LBB24_14: # %entry
; CHECK-NOV-NEXT: bnez a3, .LBB24_16
; CHECK-NOV-NEXT: # %bb.15: # %entry
-; CHECK-NOV-NEXT: mv a2, a1
+; CHECK-NOV-NEXT: mv s0, a1
; CHECK-NOV-NEXT: .LBB24_16: # %entry
-; CHECK-NOV-NEXT: mv a0, s0
-; CHECK-NOV-NEXT: mv a1, a2
+; CHECK-NOV-NEXT: mv a1, s0
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
@@ -3002,8 +2998,8 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: .cfi_offset s2, -32
-; CHECK-V-NEXT: mv s2, a1
-; CHECK-V-NEXT: fmv.w.x fa0, a0
+; CHECK-V-NEXT: mv s2, a0
+; CHECK-V-NEXT: fmv.w.x fa0, a1
; CHECK-V-NEXT: call __extendhfsf2
; CHECK-V-NEXT: call __fixsfti
; CHECK-V-NEXT: mv s0, a0
@@ -3013,31 +3009,31 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
; CHECK-V-NEXT: call __fixsfti
; CHECK-V-NEXT: li a2, -1
; CHECK-V-NEXT: srli a3, a2, 1
-; CHECK-V-NEXT: beqz a1, .LBB24_3
+; CHECK-V-NEXT: beqz s1, .LBB24_3
; CHECK-V-NEXT: # %bb.1: # %entry
-; CHECK-V-NEXT: slti a4, a1, 0
-; CHECK-V-NEXT: bnez s1, .LBB24_4
+; CHECK-V-NEXT: slti a4, s1, 0
+; CHECK-V-NEXT: bnez a1, .LBB24_4
; CHECK-V-NEXT: .LBB24_2:
-; CHECK-V-NEXT: sltu a5, s0, a3
+; CHECK-V-NEXT: sltu a5, a0, a3
; CHECK-V-NEXT: beqz a5, .LBB24_5
; CHECK-V-NEXT: j .LBB24_6
; CHECK-V-NEXT: .LBB24_3:
-; CHECK-V-NEXT: sltu a4, a0, a3
-; CHECK-V-NEXT: beqz s1, .LBB24_2
+; CHECK-V-NEXT: sltu a4, s0, a3
+; CHECK-V-NEXT: beqz a1, .LBB24_2
; CHECK-V-NEXT: .LBB24_4: # %entry
-; CHECK-V-NEXT: slti a5, s1, 0
+; CHECK-V-NEXT: slti a5, a1, 0
; CHECK-V-NEXT: bnez a5, .LBB24_6
; CHECK-V-NEXT: .LBB24_5: # %entry
-; CHECK-V-NEXT: mv s0, a3
+; CHECK-V-NEXT: mv a0, a3
; CHECK-V-NEXT: .LBB24_6: # %entry
; CHECK-V-NEXT: neg a6, a5
; CHECK-V-NEXT: neg a5, a4
-; CHECK-V-NEXT: and a5, a5, a1
+; CHECK-V-NEXT: and a5, a5, s1
; CHECK-V-NEXT: bnez a4, .LBB24_8
; CHECK-V-NEXT: # %bb.7: # %entry
-; CHECK-V-NEXT: mv a0, a3
+; CHECK-V-NEXT: mv s0, a3
; CHECK-V-NEXT: .LBB24_8: # %entry
-; CHECK-V-NEXT: and a4, a6, s1
+; CHECK-V-NEXT: and a4, a6, a1
; CHECK-V-NEXT: slli a1, a2, 63
; CHECK-V-NEXT: beq a5, a2, .LBB24_11
; CHECK-V-NEXT: # %bb.9: # %entry
@@ -3045,26 +3041,26 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
; CHECK-V-NEXT: xori a3, a3, 1
; CHECK-V-NEXT: bne a4, a2, .LBB24_12
; CHECK-V-NEXT: .LBB24_10:
-; CHECK-V-NEXT: sltu a2, a1, s0
+; CHECK-V-NEXT: sltu a2, a1, a0
; CHECK-V-NEXT: beqz a2, .LBB24_13
; CHECK-V-NEXT: j .LBB24_14
; CHECK-V-NEXT: .LBB24_11:
-; CHECK-V-NEXT: sltu a3, a1, a0
+; CHECK-V-NEXT: sltu a3, a1, s0
; CHECK-V-NEXT: beq a4, a2, .LBB24_10
; CHECK-V-NEXT: .LBB24_12: # %entry
; CHECK-V-NEXT: slti a2, a4, 0
; CHECK-V-NEXT: xori a2, a2, 1
; CHECK-V-NEXT: bnez a2, .LBB24_14
; CHECK-V-NEXT: .LBB24_13: # %entry
-; CHECK-V-NEXT: mv s0, a1
+; CHECK-V-NEXT: mv a0, a1
; CHECK-V-NEXT: .LBB24_14: # %entry
; CHECK-V-NEXT: bnez a3, .LBB24_16
; CHECK-V-NEXT: # %bb.15: # %entry
-; CHECK-V-NEXT: mv a0, a1
+; CHECK-V-NEXT: mv s0, a1
; CHECK-V-NEXT: .LBB24_16: # %entry
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-V-NEXT: vmv.s.x v9, a0
-; CHECK-V-NEXT: vmv.s.x v8, s0
+; CHECK-V-NEXT: vmv.s.x v9, s0
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
@@ -3095,8 +3091,8 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset s0, -16
; CHECK-NOV-NEXT: .cfi_offset s1, -24
; CHECK-NOV-NEXT: .cfi_offset s2, -32
-; CHECK-NOV-NEXT: mv s0, a1
-; CHECK-NOV-NEXT: fmv.w.x fa0, a0
+; CHECK-NOV-NEXT: mv s0, a0
+; CHECK-NOV-NEXT: fmv.w.x fa0, a1
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: call __fixunssfti
; CHECK-NOV-NEXT: mv s1, a0
@@ -3104,13 +3100,12 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-NOV-NEXT: fmv.w.x fa0, s0
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: call __fixunssfti
-; CHECK-NOV-NEXT: snez a1, a1
; CHECK-NOV-NEXT: snez a2, s2
-; CHECK-NOV-NEXT: addi a2, a2, -1
-; CHECK-NOV-NEXT: and a2, a2, s1
+; CHECK-NOV-NEXT: snez a1, a1
; CHECK-NOV-NEXT: addi a1, a1, -1
-; CHECK-NOV-NEXT: and a1, a1, a0
-; CHECK-NOV-NEXT: mv a0, a2
+; CHECK-NOV-NEXT: and a0, a1, a0
+; CHECK-NOV-NEXT: addi a1, a2, -1
+; CHECK-NOV-NEXT: and a1, a1, s1
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
@@ -3130,8 +3125,8 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-V-NEXT: .cfi_offset s0, -16
; CHECK-V-NEXT: .cfi_offset s1, -24
; CHECK-V-NEXT: .cfi_offset s2, -32
-; CHECK-V-NEXT: mv s0, a1
-; CHECK-V-NEXT: fmv.w.x fa0, a0
+; CHECK-V-NEXT: mv s0, a0
+; CHECK-V-NEXT: fmv.w.x fa0, a1
; CHECK-V-NEXT: call __extendhfsf2
; CHECK-V-NEXT: call __fixunssfti
; CHECK-V-NEXT: mv s1, a0
@@ -3139,15 +3134,15 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
; CHECK-V-NEXT: fmv.w.x fa0, s0
; CHECK-V-NEXT: call __extendhfsf2
; CHECK-V-NEXT: call __fixunssfti
-; CHECK-V-NEXT: snez a1, a1
; CHECK-V-NEXT: snez a2, s2
-; CHECK-V-NEXT: addi a2, a2, -1
-; CHECK-V-NEXT: and a2, a2, s1
+; CHECK-V-NEXT: snez a1, a1
; CHECK-V-NEXT: addi a1, a1, -1
; CHECK-V-NEXT: and a0, a1, a0
+; CHECK-V-NEXT: addi a2, a2, -1
+; CHECK-V-NEXT: and a2, a2, s1
; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-V-NEXT: vmv.s.x v9, a0
-; CHECK-V-NEXT: vmv.s.x v8, a2
+; CHECK-V-NEXT: vmv.s.x v9, a2
+; CHECK-V-NEXT: vmv.s.x v8, a0
; CHECK-V-NEXT: vslideup.vi v8, v9, 1
; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
@@ -3190,32 +3185,32 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
; CHECK-NOV-NEXT: # %bb.1: # %entry
; CHECK-NOV-NEXT: li a2, 1
; CHECK-NOV-NEXT: .LBB26_2: # %entry
-; CHECK-NOV-NEXT: slti a3, a1, 1
; CHECK-NOV-NEXT: slti a4, s1, 1
+; CHECK-NOV-NEXT: slti a3, a1, 1
; CHECK-NOV-NEXT: blez a1, .LBB26_4
; CHECK-NOV-NEXT: # %bb.3: # %entry
; CHECK-NOV-NEXT: li a1, 1
; CHECK-NOV-NEXT: .LBB26_4: # %entry
-; CHECK-NOV-NEXT: neg a4, a4
; CHECK-NOV-NEXT: neg a3, a3
; CHECK-NOV-NEXT: and a3, a3, a0
+; CHECK-NOV-NEXT: neg a0, a4
; CHECK-NOV-NEXT: beqz a1, .LBB26_7
; CHECK-NOV-NEXT: # %bb.5: # %entry
; CHECK-NOV-NEXT: sgtz a1, a1
-; CHECK-NOV-NEXT: and a4, a4, s0
+; CHECK-NOV-NEXT: and a0, a0, s0
; CHECK-NOV-NEXT: bnez a2, .LBB26_8
; CHECK-NOV-NEXT: .LBB26_6:
-; CHECK-NOV-NEXT: snez a0, a4
+; CHECK-NOV-NEXT: snez a2, a0
; CHECK-NOV-NEXT: j .LBB26_9
; CHECK-NOV-NEXT: .LBB26_7:
; CHECK-NOV-NEXT: snez a1, a3
-; CHECK-NOV-NEXT: and a4, a4, s0
+; CHECK-NOV-NEXT: and a0, a0, s0
; CHECK-NOV-NEXT: beqz a2, .LBB26_6
; CHECK-NOV-NEXT: .LBB26_8: # %entry
-; CHECK-NOV-NEXT: sgtz a0, a2
+; CHECK-NOV-NEXT: sgtz a2, a2
; CHECK-NOV-NEXT: .LBB26_9: # %entry
-; CHECK-NOV-NEXT: neg a0, a0
-; CHECK-NOV-NEXT: and a0, a0, a4
+; CHECK-NOV-NEXT: neg a2, a2
+; CHECK-NOV-NEXT: and a0, a2, a0
; CHECK-NOV-NEXT: neg a1, a1
; CHECK-NOV-NEXT: and a1, a1, a3
; CHECK-NOV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -3251,15 +3246,15 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
; CHECK-V-NEXT: # %bb.1: # %entry
; CHECK-V-NEXT: li a2, 1
; CHECK-V-NEXT: .LBB26_2: # %entry
-; CHECK-V-NEXT: slti a4, a1, 1
; CHECK-V-NEXT: slti a3, s1, 1
+; CHECK-V-NEXT: slti a4, a1, 1
; CHECK-V-NEXT: blez a1, .LBB26_4
; CHECK-V-NEXT: # %bb.3: # %entry
; CHECK-V-NEXT: li a1, 1
; CHECK-V-NEXT: .LBB26_4: # %entry
-; CHECK-V-NEXT: neg a3, a3
; CHECK-V-NEXT: neg a4, a4
; CHECK-V-NEXT: and a0, a4, a0
+; CHECK-V-NEXT: neg a3, a3
; CHECK-V-NEXT: beqz a1, .LBB26_7
; CHECK-V-NEXT: # %bb.5: # %entry
; CHECK-V-NEXT: sgtz a1, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll b/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll
index 81076e41a7cb76..122ac13cb25731 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll
@@ -13,18 +13,18 @@ define void @vec3_setcc_crash(ptr %in, ptr %out) {
; RV32: # %bb.0:
; RV32-NEXT: lw a0, 0(a0)
; RV32-NEXT: srli a2, a0, 16
-; RV32-NEXT: srli a3, a0, 8
-; RV32-NEXT: slli a4, a0, 16
-; RV32-NEXT: srai a4, a4, 24
+; RV32-NEXT: slli a3, a0, 16
+; RV32-NEXT: srli a4, a3, 24
+; RV32-NEXT: srai a3, a3, 24
; RV32-NEXT: slli a5, a0, 24
; RV32-NEXT: srai a5, a5, 24
; RV32-NEXT: slli a6, a0, 8
; RV32-NEXT: srai a6, a6, 24
; RV32-NEXT: sgtz a6, a6
; RV32-NEXT: sgtz a5, a5
-; RV32-NEXT: sgtz a4, a4
-; RV32-NEXT: neg a4, a4
-; RV32-NEXT: and a3, a4, a3
+; RV32-NEXT: sgtz a3, a3
+; RV32-NEXT: neg a3, a3
+; RV32-NEXT: and a3, a3, a4
; RV32-NEXT: slli a3, a3, 8
; RV32-NEXT: neg a4, a5
; RV32-NEXT: and a0, a4, a0
@@ -39,19 +39,19 @@ define void @vec3_setcc_crash(ptr %in, ptr %out) {
; RV64-LABEL: vec3_setcc_crash:
; RV64: # %bb.0:
; RV64-NEXT: lw a0, 0(a0)
-; RV64-NEXT: srli a2, a0, 16
-; RV64-NEXT: srli a3, a0, 8
-; RV64-NEXT: slli a4, a0, 48
-; RV64-NEXT: srai a4, a4, 56
+; RV64-NEXT: srliw a2, a0, 16
+; RV64-NEXT: slli a3, a0, 48
+; RV64-NEXT: srli a4, a3, 56
+; RV64-NEXT: srai a3, a3, 56
; RV64-NEXT: slli a5, a0, 56
; RV64-NEXT: srai a5, a5, 56
; RV64-NEXT: slli a6, a0, 40
; RV64-NEXT: srai a6, a6, 56
; RV64-NEXT: sgtz a6, a6
; RV64-NEXT: sgtz a5, a5
-; RV64-NEXT: sgtz a4, a4
-; RV64-NEXT: negw a4, a4
-; RV64-NEXT: and a3, a4, a3
+; RV64-NEXT: sgtz a3, a3
+; RV64-NEXT: negw a3, a3
+; RV64-NEXT: and a3, a3, a4
; RV64-NEXT: slli a3, a3, 8
; RV64-NEXT: negw a4, a5
; RV64-NEXT: and a0, a4, a0
diff --git a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll
index de36bcdb910609..069b2febc334d2 100644
--- a/llvm/test/CodeGen/RISCV/signed-truncation-check.ll
+++ b/llvm/test/CodeGen/RISCV/signed-truncation-check.ll
@@ -422,7 +422,8 @@ define i1 @add_ugecmp_i64_i16(i64 %x) nounwind {
; RV32I-NEXT: lui a1, 1048560
; RV32I-NEXT: addi a1, a1, -1
; RV32I-NEXT: sltu a1, a1, a2
-; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: ret
;
@@ -462,7 +463,8 @@ define i1 @add_ugecmp_i64_i8(i64 %x) nounwind {
; RV32I-NEXT: addi a2, a0, -128
; RV32I-NEXT: sltu a0, a2, a0
; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
; RV32I-NEXT: sltiu a1, a2, -256
; RV32I-NEXT: xori a1, a1, 1
; RV32I-NEXT: and a0, a0, a1
@@ -691,7 +693,8 @@ define i1 @add_ultcmp_i64_i8(i64 %x) nounwind {
; RV32I-NEXT: addi a2, a0, 128
; RV32I-NEXT: sltu a0, a2, a0
; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: addi a0, a0, -1
; RV32I-NEXT: sltiu a1, a2, 256
; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: ret
More information about the llvm-commits
mailing list