[llvm] [RISCV] Override default sched policy (PR #115445)
Pengcheng Wang via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 8 01:52:15 PST 2024
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/115445
>From 6017bfce97203ab397802fc9c5b2abb40d362af2 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Fri, 8 Nov 2024 17:04:46 +0800
Subject: [PATCH] [RISCV] Override default sched policy
This is based on other targets like PPC/AArch64 and some experiments.
Disclaimer: I haven't tested it on many cores, maybe we should make
some options being features. I believe downstreams must have tried
this before, so feedbacks are welcome.
---
llvm/lib/Target/RISCV/RISCVSubtarget.cpp | 23 +
llvm/lib/Target/RISCV/RISCVSubtarget.h | 3 +
llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll | 8 +-
llvm/test/CodeGen/RISCV/abds-neg.ll | 136 +-
llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll | 48 +-
llvm/test/CodeGen/RISCV/atomic-rmw.ll | 1360 ++++++++---------
llvm/test/CodeGen/RISCV/atomic-signext.ll | 272 ++--
.../CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll | 108 +-
.../CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll | 104 +-
llvm/test/CodeGen/RISCV/bf16-promote.ll | 20 +-
llvm/test/CodeGen/RISCV/bfloat-convert.ll | 16 +-
llvm/test/CodeGen/RISCV/bittest.ll | 230 +--
llvm/test/CodeGen/RISCV/branch-relaxation.ll | 692 +++++----
llvm/test/CodeGen/RISCV/condops.ll | 72 +-
llvm/test/CodeGen/RISCV/double-convert.ll | 48 +-
llvm/test/CodeGen/RISCV/double-mem.ll | 20 +-
llvm/test/CodeGen/RISCV/double-select-fcmp.ll | 4 +-
.../early-clobber-tied-def-subreg-liveness.ll | 45 +-
.../CodeGen/RISCV/fastcc-without-f-reg.ll | 464 +++---
llvm/test/CodeGen/RISCV/float-convert.ll | 4 +-
.../test/CodeGen/RISCV/fold-addi-loadstore.ll | 30 +-
llvm/test/CodeGen/RISCV/half-convert.ll | 40 +-
llvm/test/CodeGen/RISCV/llvm.exp10.ll | 10 +-
llvm/test/CodeGen/RISCV/llvm.frexp.ll | 4 +-
llvm/test/CodeGen/RISCV/machine-combiner.ll | 200 +--
.../RISCV/machinelicm-address-pseudos.ll | 12 +-
.../CodeGen/RISCV/macro-fusion-lui-addi.ll | 4 +-
.../CodeGen/RISCV/misched-mem-clustering.mir | 6 +-
.../test/CodeGen/RISCV/overflow-intrinsics.ll | 30 +-
llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll | 18 +-
.../CodeGen/RISCV/rvv/calling-conv-fastcc.ll | 38 +-
llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll | 14 +-
.../RISCV/rvv/constant-folding-crash.ll | 18 +-
.../RISCV/rvv/dont-sink-splat-operands.ll | 120 +-
llvm/test/CodeGen/RISCV/rvv/expandload.ll | 298 ++--
.../RISCV/rvv/fixed-vectors-ceil-vp.ll | 12 +-
.../RISCV/rvv/fixed-vectors-ctpop-vp.ll | 55 +-
.../RISCV/rvv/fixed-vectors-floor-vp.ll | 12 +-
.../RISCV/rvv/fixed-vectors-fmaximum-vp.ll | 129 +-
.../RISCV/rvv/fixed-vectors-fminimum-vp.ll | 129 +-
.../RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll | 10 +-
.../rvv/fixed-vectors-insert-subvector.ll | 27 +-
.../RISCV/rvv/fixed-vectors-int-buildvec.ll | 5 +-
.../RISCV/rvv/fixed-vectors-int-shuffles.ll | 18 +-
.../rvv/fixed-vectors-interleave-store.ll | 2 +-
.../RISCV/rvv/fixed-vectors-masked-gather.ll | 88 +-
.../RISCV/rvv/fixed-vectors-nearbyint-vp.ll | 12 +-
.../RISCV/rvv/fixed-vectors-reduction-fp.ll | 8 +-
.../rvv/fixed-vectors-reduction-int-vp.ll | 32 +-
.../RISCV/rvv/fixed-vectors-reduction-int.ll | 24 +-
.../rvv/fixed-vectors-reduction-mask-vp.ll | 8 +-
.../RISCV/rvv/fixed-vectors-rint-vp.ll | 12 +-
.../RISCV/rvv/fixed-vectors-round-vp.ll | 12 +-
.../RISCV/rvv/fixed-vectors-roundeven-vp.ll | 12 +-
.../RISCV/rvv/fixed-vectors-roundtozero-vp.ll | 12 +-
.../RISCV/rvv/fixed-vectors-setcc-fp-vp.ll | 54 +-
.../RISCV/rvv/fixed-vectors-setcc-int-vp.ll | 28 +-
.../RISCV/rvv/fixed-vectors-shuffle-concat.ll | 19 +-
.../rvv/fixed-vectors-shuffle-reverse.ll | 104 +-
.../rvv/fixed-vectors-strided-load-combine.ll | 8 +-
.../RISCV/rvv/fixed-vectors-strided-vpload.ll | 6 +-
.../RISCV/rvv/fixed-vectors-trunc-vp.ll | 116 +-
.../RISCV/rvv/fixed-vectors-vcopysign-vp.ll | 26 +-
.../RISCV/rvv/fixed-vectors-vfma-vp.ll | 94 +-
.../RISCV/rvv/fixed-vectors-vfmax-vp.ll | 26 +-
.../RISCV/rvv/fixed-vectors-vfmin-vp.ll | 26 +-
.../RISCV/rvv/fixed-vectors-vfmuladd-vp.ll | 94 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vpload.ll | 2 +-
.../RISCV/rvv/fixed-vectors-vpmerge.ll | 31 +-
.../RISCV/rvv/fixed-vectors-vpscatter.ll | 83 +-
.../RISCV/rvv/fixed-vectors-vselect-vp.ll | 96 +-
llvm/test/CodeGen/RISCV/rvv/floor-vp.ll | 14 +-
llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll | 156 +-
llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll | 156 +-
.../RISCV/rvv/fold-scalar-load-crash.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll | 50 +-
.../test/CodeGen/RISCV/rvv/mscatter-sdnode.ll | 43 +-
.../RISCV/rvv/named-vector-shuffle-reverse.ll | 42 +-
llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll | 14 +-
llvm/test/CodeGen/RISCV/rvv/pr88576.ll | 2 +-
llvm/test/CodeGen/RISCV/rvv/rint-vp.ll | 14 +-
llvm/test/CodeGen/RISCV/rvv/round-vp.ll | 14 +-
llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll | 14 +-
llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll | 14 +-
llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll | 264 ++--
.../CodeGen/RISCV/rvv/sink-splat-operands.ll | 428 +++---
llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll | 68 +-
llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 4 +-
.../test/CodeGen/RISCV/rvv/strided-vpstore.ll | 46 +-
.../CodeGen/RISCV/rvv/vector-deinterleave.ll | 38 +-
.../RISCV/rvv/vector-interleave-fixed.ll | 4 +-
.../RISCV/rvv/vector-interleave-store.ll | 2 +-
.../CodeGen/RISCV/rvv/vector-interleave.ll | 6 +-
llvm/test/CodeGen/RISCV/rvv/vector-splice.ll | 120 +-
.../RISCV/rvv/vfirst-byte-compare-index.ll | 40 +-
llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll | 764 +++++----
.../RISCV/rvv/vfmadd-constrained-sdnode.ll | 94 +-
llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll | 143 +-
.../RISCV/rvv/vfnmadd-constrained-sdnode.ll | 46 +-
.../RISCV/rvv/vfnmsub-constrained-sdnode.ll | 51 +-
llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll | 14 +-
.../RISCV/rvv/vp-splice-mask-fixed-vectors.ll | 72 +-
.../RISCV/rvv/vp-splice-mask-vectors.ll | 90 +-
.../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/vpload.ll | 2 +-
.../CodeGen/RISCV/rvv/vpscatter-sdnode.ll | 130 +-
llvm/test/CodeGen/RISCV/rvv/vpstore.ll | 28 +-
llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll | 29 +-
llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll | 14 +-
llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll | 4 +-
llvm/test/CodeGen/RISCV/shifts.ll | 50 +-
.../CodeGen/RISCV/short-forward-branch-opt.ll | 76 +-
llvm/test/CodeGen/RISCV/srem-vector-lkk.ll | 4 +-
.../CodeGen/RISCV/zdinx-boundary-check.ll | 16 +-
114 files changed, 4476 insertions(+), 4679 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index e7db1ededf383b..f43c520422f13d 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -16,6 +16,7 @@
#include "RISCV.h"
#include "RISCVFrameLowering.h"
#include "RISCVTargetMachine.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/MacroFusion.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/MC/TargetRegistry.h"
@@ -199,3 +200,25 @@ unsigned RISCVSubtarget::getMinimumJumpTableEntries() const {
? RISCVMinimumJumpTableEntries
: TuneInfo->MinimumJumpTableEntries;
}
+
+void RISCVSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+ unsigned NumRegionInstrs) const {
+ // Do bidirectional scheduling since it provides a more balanced scheduling
+ // leading to better performance. This will increase compile time.
+ Policy.OnlyTopDown = false;
+ Policy.OnlyBottomUp = false;
+
+ // Enabling or Disabling the latency heuristic is a close call: It seems to
+ // help nearly no benchmark on out-of-order architectures, on the other hand
+ // it regresses register pressure on a few benchmarking.
+ // FIXME: This is from AArch64, but we haven't evaluated it on RISC-V.
+ Policy.DisableLatencyHeuristic = true;
+
+ // Spilling is generally expensive on all RISC-V cores, so always enable
+ // register-pressure tracking. This will increase compile time.
+ Policy.ShouldTrackPressure = true;
+
+ // Enabling ShouldTrackLaneMasks when vector instructions are supported.
+ // TODO: Add extensions that need register pairs as well?
+ Policy.ShouldTrackLaneMasks = hasVInstructions();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index f59a3737ae76f9..f2c0a3d85c998a 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -327,6 +327,9 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
unsigned getTailDupAggressiveThreshold() const {
return TuneInfo->TailDupAggressiveThreshold;
}
+
+ void overrideSchedPolicy(MachineSchedPolicy &Policy,
+ unsigned NumRegionInstrs) const override;
};
} // End llvm namespace
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll
index 835b4e32ae3206..4fcfc726d0f98c 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll
@@ -583,13 +583,13 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind {
; RV64I-NEXT: call __muldi3
; RV64I-NEXT: slli s0, s0, 32
; RV64I-NEXT: srli s0, s0, 32
-; RV64I-NEXT: mv a1, a0
-; RV64I-NEXT: li a0, 0
+; RV64I-NEXT: li a1, 0
; RV64I-NEXT: beqz s0, .LBB9_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: srliw a0, a1, 24
-; RV64I-NEXT: addiw a0, a0, 1
+; RV64I-NEXT: srliw a1, a0, 24
+; RV64I-NEXT: addiw a1, a1, 1
; RV64I-NEXT: .LBB9_2:
+; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/abds-neg.ll b/llvm/test/CodeGen/RISCV/abds-neg.ll
index e7fd87bd783876..fed4a242a695b4 100644
--- a/llvm/test/CodeGen/RISCV/abds-neg.ll
+++ b/llvm/test/CodeGen/RISCV/abds-neg.ll
@@ -625,11 +625,11 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw a6, 8(a1)
; RV32I-NEXT: lw t1, 12(a1)
-; RV32I-NEXT: lw a7, 8(a2)
+; RV32I-NEXT: lw t0, 8(a2)
; RV32I-NEXT: lw t2, 12(a2)
; RV32I-NEXT: lw a1, 0(a2)
; RV32I-NEXT: lw a2, 4(a2)
-; RV32I-NEXT: sltu t3, a7, a6
+; RV32I-NEXT: sltu t3, t0, a6
; RV32I-NEXT: mv t4, t3
; RV32I-NEXT: beq t1, t2, .LBB11_2
; RV32I-NEXT: # %bb.1:
@@ -637,19 +637,19 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: .LBB11_2:
; RV32I-NEXT: sltu a5, a1, a3
; RV32I-NEXT: sltu t6, a2, a4
-; RV32I-NEXT: mv t0, a5
+; RV32I-NEXT: mv a7, a5
; RV32I-NEXT: beq a4, a2, .LBB11_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: mv t0, t6
+; RV32I-NEXT: mv a7, t6
; RV32I-NEXT: .LBB11_4:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: xor t5, t1, t2
-; RV32I-NEXT: xor s0, a6, a7
+; RV32I-NEXT: xor s0, a6, t0
; RV32I-NEXT: or t5, s0, t5
; RV32I-NEXT: beqz t5, .LBB11_6
; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: mv t0, t4
+; RV32I-NEXT: mv a7, t4
; RV32I-NEXT: .LBB11_6:
; RV32I-NEXT: mv t5, a5
; RV32I-NEXT: beq a2, a4, .LBB11_8
@@ -662,27 +662,27 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: # %bb.9:
; RV32I-NEXT: sltu t6, a4, a2
; RV32I-NEXT: .LBB11_10:
-; RV32I-NEXT: bnez t0, .LBB11_12
+; RV32I-NEXT: bnez a7, .LBB11_12
; RV32I-NEXT: # %bb.11:
; RV32I-NEXT: sub t1, t2, t1
-; RV32I-NEXT: sub a6, a7, a6
-; RV32I-NEXT: sub a7, t1, t3
+; RV32I-NEXT: sub a6, t0, a6
+; RV32I-NEXT: sub t0, t1, t3
; RV32I-NEXT: sltu t1, a6, t5
-; RV32I-NEXT: sub a7, a7, t1
+; RV32I-NEXT: sub t0, t0, t1
; RV32I-NEXT: sub a6, a6, t5
; RV32I-NEXT: j .LBB11_13
; RV32I-NEXT: .LBB11_12:
-; RV32I-NEXT: sltu t3, a6, a7
+; RV32I-NEXT: sltu t3, a6, t0
; RV32I-NEXT: sub t1, t1, t2
; RV32I-NEXT: sub t1, t1, t3
-; RV32I-NEXT: sub a6, a6, a7
-; RV32I-NEXT: sltu a7, a6, t6
-; RV32I-NEXT: sub a7, t1, a7
+; RV32I-NEXT: sub a6, a6, t0
+; RV32I-NEXT: sltu t0, a6, t6
+; RV32I-NEXT: sub t0, t1, t0
; RV32I-NEXT: sub a6, a6, t6
; RV32I-NEXT: .LBB11_13:
; RV32I-NEXT: snez t1, a6
-; RV32I-NEXT: add a7, a7, t1
-; RV32I-NEXT: bnez t0, .LBB11_15
+; RV32I-NEXT: add t0, t0, t1
+; RV32I-NEXT: bnez a7, .LBB11_15
; RV32I-NEXT: # %bb.14:
; RV32I-NEXT: sub a2, a2, a4
; RV32I-NEXT: sub a2, a2, a5
@@ -697,7 +697,7 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: snez a3, a3
; RV32I-NEXT: neg a4, a6
; RV32I-NEXT: sltu a5, a4, a3
-; RV32I-NEXT: neg a6, a7
+; RV32I-NEXT: neg a6, t0
; RV32I-NEXT: sub a5, a6, a5
; RV32I-NEXT: snez a6, a1
; RV32I-NEXT: add a2, a2, a6
@@ -744,11 +744,11 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: lw a4, 4(a1)
; RV32ZBB-NEXT: lw a6, 8(a1)
; RV32ZBB-NEXT: lw t1, 12(a1)
-; RV32ZBB-NEXT: lw a7, 8(a2)
+; RV32ZBB-NEXT: lw t0, 8(a2)
; RV32ZBB-NEXT: lw t2, 12(a2)
; RV32ZBB-NEXT: lw a1, 0(a2)
; RV32ZBB-NEXT: lw a2, 4(a2)
-; RV32ZBB-NEXT: sltu t3, a7, a6
+; RV32ZBB-NEXT: sltu t3, t0, a6
; RV32ZBB-NEXT: mv t4, t3
; RV32ZBB-NEXT: beq t1, t2, .LBB11_2
; RV32ZBB-NEXT: # %bb.1:
@@ -756,19 +756,19 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: .LBB11_2:
; RV32ZBB-NEXT: sltu a5, a1, a3
; RV32ZBB-NEXT: sltu t6, a2, a4
-; RV32ZBB-NEXT: mv t0, a5
+; RV32ZBB-NEXT: mv a7, a5
; RV32ZBB-NEXT: beq a4, a2, .LBB11_4
; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: mv t0, t6
+; RV32ZBB-NEXT: mv a7, t6
; RV32ZBB-NEXT: .LBB11_4:
; RV32ZBB-NEXT: addi sp, sp, -16
; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZBB-NEXT: xor t5, t1, t2
-; RV32ZBB-NEXT: xor s0, a6, a7
+; RV32ZBB-NEXT: xor s0, a6, t0
; RV32ZBB-NEXT: or t5, s0, t5
; RV32ZBB-NEXT: beqz t5, .LBB11_6
; RV32ZBB-NEXT: # %bb.5:
-; RV32ZBB-NEXT: mv t0, t4
+; RV32ZBB-NEXT: mv a7, t4
; RV32ZBB-NEXT: .LBB11_6:
; RV32ZBB-NEXT: mv t5, a5
; RV32ZBB-NEXT: beq a2, a4, .LBB11_8
@@ -781,27 +781,27 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: # %bb.9:
; RV32ZBB-NEXT: sltu t6, a4, a2
; RV32ZBB-NEXT: .LBB11_10:
-; RV32ZBB-NEXT: bnez t0, .LBB11_12
+; RV32ZBB-NEXT: bnez a7, .LBB11_12
; RV32ZBB-NEXT: # %bb.11:
; RV32ZBB-NEXT: sub t1, t2, t1
-; RV32ZBB-NEXT: sub a6, a7, a6
-; RV32ZBB-NEXT: sub a7, t1, t3
+; RV32ZBB-NEXT: sub a6, t0, a6
+; RV32ZBB-NEXT: sub t0, t1, t3
; RV32ZBB-NEXT: sltu t1, a6, t5
-; RV32ZBB-NEXT: sub a7, a7, t1
+; RV32ZBB-NEXT: sub t0, t0, t1
; RV32ZBB-NEXT: sub a6, a6, t5
; RV32ZBB-NEXT: j .LBB11_13
; RV32ZBB-NEXT: .LBB11_12:
-; RV32ZBB-NEXT: sltu t3, a6, a7
+; RV32ZBB-NEXT: sltu t3, a6, t0
; RV32ZBB-NEXT: sub t1, t1, t2
; RV32ZBB-NEXT: sub t1, t1, t3
-; RV32ZBB-NEXT: sub a6, a6, a7
-; RV32ZBB-NEXT: sltu a7, a6, t6
-; RV32ZBB-NEXT: sub a7, t1, a7
+; RV32ZBB-NEXT: sub a6, a6, t0
+; RV32ZBB-NEXT: sltu t0, a6, t6
+; RV32ZBB-NEXT: sub t0, t1, t0
; RV32ZBB-NEXT: sub a6, a6, t6
; RV32ZBB-NEXT: .LBB11_13:
; RV32ZBB-NEXT: snez t1, a6
-; RV32ZBB-NEXT: add a7, a7, t1
-; RV32ZBB-NEXT: bnez t0, .LBB11_15
+; RV32ZBB-NEXT: add t0, t0, t1
+; RV32ZBB-NEXT: bnez a7, .LBB11_15
; RV32ZBB-NEXT: # %bb.14:
; RV32ZBB-NEXT: sub a2, a2, a4
; RV32ZBB-NEXT: sub a2, a2, a5
@@ -816,7 +816,7 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: snez a3, a3
; RV32ZBB-NEXT: neg a4, a6
; RV32ZBB-NEXT: sltu a5, a4, a3
-; RV32ZBB-NEXT: neg a6, a7
+; RV32ZBB-NEXT: neg a6, t0
; RV32ZBB-NEXT: sub a5, a6, a5
; RV32ZBB-NEXT: snez a6, a1
; RV32ZBB-NEXT: add a2, a2, a6
@@ -872,11 +872,11 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw a6, 8(a1)
; RV32I-NEXT: lw t1, 12(a1)
-; RV32I-NEXT: lw a7, 8(a2)
+; RV32I-NEXT: lw t0, 8(a2)
; RV32I-NEXT: lw t2, 12(a2)
; RV32I-NEXT: lw a1, 0(a2)
; RV32I-NEXT: lw a2, 4(a2)
-; RV32I-NEXT: sltu t3, a7, a6
+; RV32I-NEXT: sltu t3, t0, a6
; RV32I-NEXT: mv t4, t3
; RV32I-NEXT: beq t1, t2, .LBB12_2
; RV32I-NEXT: # %bb.1:
@@ -884,19 +884,19 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: .LBB12_2:
; RV32I-NEXT: sltu a5, a1, a3
; RV32I-NEXT: sltu t6, a2, a4
-; RV32I-NEXT: mv t0, a5
+; RV32I-NEXT: mv a7, a5
; RV32I-NEXT: beq a4, a2, .LBB12_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: mv t0, t6
+; RV32I-NEXT: mv a7, t6
; RV32I-NEXT: .LBB12_4:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: xor t5, t1, t2
-; RV32I-NEXT: xor s0, a6, a7
+; RV32I-NEXT: xor s0, a6, t0
; RV32I-NEXT: or t5, s0, t5
; RV32I-NEXT: beqz t5, .LBB12_6
; RV32I-NEXT: # %bb.5:
-; RV32I-NEXT: mv t0, t4
+; RV32I-NEXT: mv a7, t4
; RV32I-NEXT: .LBB12_6:
; RV32I-NEXT: mv t5, a5
; RV32I-NEXT: beq a2, a4, .LBB12_8
@@ -909,27 +909,27 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: # %bb.9:
; RV32I-NEXT: sltu t6, a4, a2
; RV32I-NEXT: .LBB12_10:
-; RV32I-NEXT: bnez t0, .LBB12_12
+; RV32I-NEXT: bnez a7, .LBB12_12
; RV32I-NEXT: # %bb.11:
; RV32I-NEXT: sub t1, t2, t1
-; RV32I-NEXT: sub a6, a7, a6
-; RV32I-NEXT: sub a7, t1, t3
+; RV32I-NEXT: sub a6, t0, a6
+; RV32I-NEXT: sub t0, t1, t3
; RV32I-NEXT: sltu t1, a6, t5
-; RV32I-NEXT: sub a7, a7, t1
+; RV32I-NEXT: sub t0, t0, t1
; RV32I-NEXT: sub a6, a6, t5
; RV32I-NEXT: j .LBB12_13
; RV32I-NEXT: .LBB12_12:
-; RV32I-NEXT: sltu t3, a6, a7
+; RV32I-NEXT: sltu t3, a6, t0
; RV32I-NEXT: sub t1, t1, t2
; RV32I-NEXT: sub t1, t1, t3
-; RV32I-NEXT: sub a6, a6, a7
-; RV32I-NEXT: sltu a7, a6, t6
-; RV32I-NEXT: sub a7, t1, a7
+; RV32I-NEXT: sub a6, a6, t0
+; RV32I-NEXT: sltu t0, a6, t6
+; RV32I-NEXT: sub t0, t1, t0
; RV32I-NEXT: sub a6, a6, t6
; RV32I-NEXT: .LBB12_13:
; RV32I-NEXT: snez t1, a6
-; RV32I-NEXT: add a7, a7, t1
-; RV32I-NEXT: bnez t0, .LBB12_15
+; RV32I-NEXT: add t0, t0, t1
+; RV32I-NEXT: bnez a7, .LBB12_15
; RV32I-NEXT: # %bb.14:
; RV32I-NEXT: sub a2, a2, a4
; RV32I-NEXT: sub a2, a2, a5
@@ -944,7 +944,7 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: snez a3, a3
; RV32I-NEXT: neg a4, a6
; RV32I-NEXT: sltu a5, a4, a3
-; RV32I-NEXT: neg a6, a7
+; RV32I-NEXT: neg a6, t0
; RV32I-NEXT: sub a5, a6, a5
; RV32I-NEXT: snez a6, a1
; RV32I-NEXT: add a2, a2, a6
@@ -991,11 +991,11 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: lw a4, 4(a1)
; RV32ZBB-NEXT: lw a6, 8(a1)
; RV32ZBB-NEXT: lw t1, 12(a1)
-; RV32ZBB-NEXT: lw a7, 8(a2)
+; RV32ZBB-NEXT: lw t0, 8(a2)
; RV32ZBB-NEXT: lw t2, 12(a2)
; RV32ZBB-NEXT: lw a1, 0(a2)
; RV32ZBB-NEXT: lw a2, 4(a2)
-; RV32ZBB-NEXT: sltu t3, a7, a6
+; RV32ZBB-NEXT: sltu t3, t0, a6
; RV32ZBB-NEXT: mv t4, t3
; RV32ZBB-NEXT: beq t1, t2, .LBB12_2
; RV32ZBB-NEXT: # %bb.1:
@@ -1003,19 +1003,19 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: .LBB12_2:
; RV32ZBB-NEXT: sltu a5, a1, a3
; RV32ZBB-NEXT: sltu t6, a2, a4
-; RV32ZBB-NEXT: mv t0, a5
+; RV32ZBB-NEXT: mv a7, a5
; RV32ZBB-NEXT: beq a4, a2, .LBB12_4
; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: mv t0, t6
+; RV32ZBB-NEXT: mv a7, t6
; RV32ZBB-NEXT: .LBB12_4:
; RV32ZBB-NEXT: addi sp, sp, -16
; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZBB-NEXT: xor t5, t1, t2
-; RV32ZBB-NEXT: xor s0, a6, a7
+; RV32ZBB-NEXT: xor s0, a6, t0
; RV32ZBB-NEXT: or t5, s0, t5
; RV32ZBB-NEXT: beqz t5, .LBB12_6
; RV32ZBB-NEXT: # %bb.5:
-; RV32ZBB-NEXT: mv t0, t4
+; RV32ZBB-NEXT: mv a7, t4
; RV32ZBB-NEXT: .LBB12_6:
; RV32ZBB-NEXT: mv t5, a5
; RV32ZBB-NEXT: beq a2, a4, .LBB12_8
@@ -1028,27 +1028,27 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: # %bb.9:
; RV32ZBB-NEXT: sltu t6, a4, a2
; RV32ZBB-NEXT: .LBB12_10:
-; RV32ZBB-NEXT: bnez t0, .LBB12_12
+; RV32ZBB-NEXT: bnez a7, .LBB12_12
; RV32ZBB-NEXT: # %bb.11:
; RV32ZBB-NEXT: sub t1, t2, t1
-; RV32ZBB-NEXT: sub a6, a7, a6
-; RV32ZBB-NEXT: sub a7, t1, t3
+; RV32ZBB-NEXT: sub a6, t0, a6
+; RV32ZBB-NEXT: sub t0, t1, t3
; RV32ZBB-NEXT: sltu t1, a6, t5
-; RV32ZBB-NEXT: sub a7, a7, t1
+; RV32ZBB-NEXT: sub t0, t0, t1
; RV32ZBB-NEXT: sub a6, a6, t5
; RV32ZBB-NEXT: j .LBB12_13
; RV32ZBB-NEXT: .LBB12_12:
-; RV32ZBB-NEXT: sltu t3, a6, a7
+; RV32ZBB-NEXT: sltu t3, a6, t0
; RV32ZBB-NEXT: sub t1, t1, t2
; RV32ZBB-NEXT: sub t1, t1, t3
-; RV32ZBB-NEXT: sub a6, a6, a7
-; RV32ZBB-NEXT: sltu a7, a6, t6
-; RV32ZBB-NEXT: sub a7, t1, a7
+; RV32ZBB-NEXT: sub a6, a6, t0
+; RV32ZBB-NEXT: sltu t0, a6, t6
+; RV32ZBB-NEXT: sub t0, t1, t0
; RV32ZBB-NEXT: sub a6, a6, t6
; RV32ZBB-NEXT: .LBB12_13:
; RV32ZBB-NEXT: snez t1, a6
-; RV32ZBB-NEXT: add a7, a7, t1
-; RV32ZBB-NEXT: bnez t0, .LBB12_15
+; RV32ZBB-NEXT: add t0, t0, t1
+; RV32ZBB-NEXT: bnez a7, .LBB12_15
; RV32ZBB-NEXT: # %bb.14:
; RV32ZBB-NEXT: sub a2, a2, a4
; RV32ZBB-NEXT: sub a2, a2, a5
@@ -1063,7 +1063,7 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: snez a3, a3
; RV32ZBB-NEXT: neg a4, a6
; RV32ZBB-NEXT: sltu a5, a4, a3
-; RV32ZBB-NEXT: neg a6, a7
+; RV32ZBB-NEXT: neg a6, t0
; RV32ZBB-NEXT: sub a5, a6, a5
; RV32ZBB-NEXT: snez a6, a1
; RV32ZBB-NEXT: add a2, a2, a6
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll
index 35a1227b86b3a6..8534ad379ebab2 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll
@@ -191,10 +191,10 @@ define void @amomax_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv s0, a0
+; RV32-NEXT: mv s0, a2
+; RV32-NEXT: mv s1, a0
; RV32-NEXT: lw a4, 0(a0)
; RV32-NEXT: lw a5, 4(a0)
-; RV32-NEXT: mv s1, a2
; RV32-NEXT: mv s2, a1
; RV32-NEXT: j .LBB11_2
; RV32-NEXT: .LBB11_1: # %atomicrmw.start
@@ -204,17 +204,17 @@ define void @amomax_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: addi a1, sp, 8
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
-; RV32-NEXT: mv a0, s0
+; RV32-NEXT: mv a0, s1
; RV32-NEXT: call __atomic_compare_exchange_8
; RV32-NEXT: lw a4, 8(sp)
; RV32-NEXT: lw a5, 12(sp)
; RV32-NEXT: bnez a0, .LBB11_6
; RV32-NEXT: .LBB11_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32-NEXT: beq a5, s1, .LBB11_4
+; RV32-NEXT: beq a5, s0, .LBB11_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1
-; RV32-NEXT: slt a0, s1, a5
+; RV32-NEXT: slt a0, s0, a5
; RV32-NEXT: mv a2, a4
; RV32-NEXT: mv a3, a5
; RV32-NEXT: bnez a0, .LBB11_1
@@ -227,7 +227,7 @@ define void @amomax_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: .LBB11_5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1
; RV32-NEXT: mv a2, s2
-; RV32-NEXT: mv a3, s1
+; RV32-NEXT: mv a3, s0
; RV32-NEXT: j .LBB11_1
; RV32-NEXT: .LBB11_6: # %atomicrmw.end
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -267,10 +267,10 @@ define void @amomaxu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv s0, a0
+; RV32-NEXT: mv s0, a2
+; RV32-NEXT: mv s1, a0
; RV32-NEXT: lw a4, 0(a0)
; RV32-NEXT: lw a5, 4(a0)
-; RV32-NEXT: mv s1, a2
; RV32-NEXT: mv s2, a1
; RV32-NEXT: j .LBB13_2
; RV32-NEXT: .LBB13_1: # %atomicrmw.start
@@ -280,17 +280,17 @@ define void @amomaxu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: addi a1, sp, 8
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
-; RV32-NEXT: mv a0, s0
+; RV32-NEXT: mv a0, s1
; RV32-NEXT: call __atomic_compare_exchange_8
; RV32-NEXT: lw a4, 8(sp)
; RV32-NEXT: lw a5, 12(sp)
; RV32-NEXT: bnez a0, .LBB13_6
; RV32-NEXT: .LBB13_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32-NEXT: beq a5, s1, .LBB13_4
+; RV32-NEXT: beq a5, s0, .LBB13_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1
-; RV32-NEXT: sltu a0, s1, a5
+; RV32-NEXT: sltu a0, s0, a5
; RV32-NEXT: mv a2, a4
; RV32-NEXT: mv a3, a5
; RV32-NEXT: bnez a0, .LBB13_1
@@ -303,7 +303,7 @@ define void @amomaxu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: .LBB13_5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1
; RV32-NEXT: mv a2, s2
-; RV32-NEXT: mv a3, s1
+; RV32-NEXT: mv a3, s0
; RV32-NEXT: j .LBB13_1
; RV32-NEXT: .LBB13_6: # %atomicrmw.end
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -343,10 +343,10 @@ define void @amomin_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv s0, a0
+; RV32-NEXT: mv s0, a2
+; RV32-NEXT: mv s1, a0
; RV32-NEXT: lw a4, 0(a0)
; RV32-NEXT: lw a5, 4(a0)
-; RV32-NEXT: mv s1, a2
; RV32-NEXT: mv s2, a1
; RV32-NEXT: j .LBB15_2
; RV32-NEXT: .LBB15_1: # %atomicrmw.start
@@ -356,17 +356,17 @@ define void @amomin_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: addi a1, sp, 8
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
-; RV32-NEXT: mv a0, s0
+; RV32-NEXT: mv a0, s1
; RV32-NEXT: call __atomic_compare_exchange_8
; RV32-NEXT: lw a4, 8(sp)
; RV32-NEXT: lw a5, 12(sp)
; RV32-NEXT: bnez a0, .LBB15_6
; RV32-NEXT: .LBB15_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32-NEXT: beq a5, s1, .LBB15_4
+; RV32-NEXT: beq a5, s0, .LBB15_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1
-; RV32-NEXT: slt a0, s1, a5
+; RV32-NEXT: slt a0, s0, a5
; RV32-NEXT: mv a2, a4
; RV32-NEXT: mv a3, a5
; RV32-NEXT: beqz a0, .LBB15_1
@@ -379,7 +379,7 @@ define void @amomin_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: .LBB15_5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1
; RV32-NEXT: mv a2, s2
-; RV32-NEXT: mv a3, s1
+; RV32-NEXT: mv a3, s0
; RV32-NEXT: j .LBB15_1
; RV32-NEXT: .LBB15_6: # %atomicrmw.end
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -419,10 +419,10 @@ define void @amominu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32-NEXT: mv s0, a0
+; RV32-NEXT: mv s0, a2
+; RV32-NEXT: mv s1, a0
; RV32-NEXT: lw a4, 0(a0)
; RV32-NEXT: lw a5, 4(a0)
-; RV32-NEXT: mv s1, a2
; RV32-NEXT: mv s2, a1
; RV32-NEXT: j .LBB17_2
; RV32-NEXT: .LBB17_1: # %atomicrmw.start
@@ -432,17 +432,17 @@ define void @amominu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: addi a1, sp, 8
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
-; RV32-NEXT: mv a0, s0
+; RV32-NEXT: mv a0, s1
; RV32-NEXT: call __atomic_compare_exchange_8
; RV32-NEXT: lw a4, 8(sp)
; RV32-NEXT: lw a5, 12(sp)
; RV32-NEXT: bnez a0, .LBB17_6
; RV32-NEXT: .LBB17_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32-NEXT: beq a5, s1, .LBB17_4
+; RV32-NEXT: beq a5, s0, .LBB17_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1
-; RV32-NEXT: sltu a0, s1, a5
+; RV32-NEXT: sltu a0, s0, a5
; RV32-NEXT: mv a2, a4
; RV32-NEXT: mv a3, a5
; RV32-NEXT: beqz a0, .LBB17_1
@@ -455,7 +455,7 @@ define void @amominu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: .LBB17_5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1
; RV32-NEXT: mv a2, s2
-; RV32-NEXT: mv a3, s1
+; RV32-NEXT: mv a3, s0
; RV32-NEXT: j .LBB17_1
; RV32-NEXT: .LBB17_6: # %atomicrmw.end
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
index 469edacb391df6..bc3eac7b556d76 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
@@ -26072,46 +26072,46 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB220_2
; RV32I-NEXT: .LBB220_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB220_7
; RV32I-NEXT: .LBB220_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB220_4
+; RV32I-NEXT: beq a5, s0, .LBB220_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a4
+; RV32I-NEXT: slt a0, s0, a5
; RV32I-NEXT: j .LBB220_5
; RV32I-NEXT: .LBB220_4: # in Loop: Header=BB220_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB220_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: bnez a0, .LBB220_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB220_1
; RV32I-NEXT: .LBB220_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26126,46 +26126,46 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB220_2
; RV32IA-NEXT: .LBB220_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB220_7
; RV32IA-NEXT: .LBB220_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB220_4
+; RV32IA-NEXT: beq a5, s0, .LBB220_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a4
+; RV32IA-NEXT: slt a0, s0, a5
; RV32IA-NEXT: j .LBB220_5
; RV32IA-NEXT: .LBB220_4: # in Loop: Header=BB220_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB220_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: bnez a0, .LBB220_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB220_1
; RV32IA-NEXT: .LBB220_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26225,46 +26225,46 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB221_2
; RV32I-NEXT: .LBB221_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: li a5, 2
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB221_7
; RV32I-NEXT: .LBB221_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB221_4
+; RV32I-NEXT: beq a5, s0, .LBB221_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a4
+; RV32I-NEXT: slt a0, s0, a5
; RV32I-NEXT: j .LBB221_5
; RV32I-NEXT: .LBB221_4: # in Loop: Header=BB221_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB221_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: bnez a0, .LBB221_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB221_1
; RV32I-NEXT: .LBB221_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26279,46 +26279,46 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB221_2
; RV32IA-NEXT: .LBB221_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: li a5, 2
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB221_7
; RV32IA-NEXT: .LBB221_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB221_4
+; RV32IA-NEXT: beq a5, s0, .LBB221_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a4
+; RV32IA-NEXT: slt a0, s0, a5
; RV32IA-NEXT: j .LBB221_5
; RV32IA-NEXT: .LBB221_4: # in Loop: Header=BB221_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB221_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: bnez a0, .LBB221_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB221_1
; RV32IA-NEXT: .LBB221_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26383,46 +26383,46 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB222_2
; RV32I-NEXT: .LBB222_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 3
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB222_7
; RV32I-NEXT: .LBB222_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB222_4
+; RV32I-NEXT: beq a5, s0, .LBB222_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a4
+; RV32I-NEXT: slt a0, s0, a5
; RV32I-NEXT: j .LBB222_5
; RV32I-NEXT: .LBB222_4: # in Loop: Header=BB222_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB222_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: bnez a0, .LBB222_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB222_1
; RV32I-NEXT: .LBB222_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26437,46 +26437,46 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB222_2
; RV32IA-NEXT: .LBB222_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 3
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB222_7
; RV32IA-NEXT: .LBB222_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB222_4
+; RV32IA-NEXT: beq a5, s0, .LBB222_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a4
+; RV32IA-NEXT: slt a0, s0, a5
; RV32IA-NEXT: j .LBB222_5
; RV32IA-NEXT: .LBB222_4: # in Loop: Header=BB222_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB222_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: bnez a0, .LBB222_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB222_1
; RV32IA-NEXT: .LBB222_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26541,46 +26541,46 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB223_2
; RV32I-NEXT: .LBB223_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: li a5, 2
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB223_7
; RV32I-NEXT: .LBB223_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB223_4
+; RV32I-NEXT: beq a5, s0, .LBB223_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a4
+; RV32I-NEXT: slt a0, s0, a5
; RV32I-NEXT: j .LBB223_5
; RV32I-NEXT: .LBB223_4: # in Loop: Header=BB223_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB223_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: bnez a0, .LBB223_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB223_1
; RV32I-NEXT: .LBB223_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26595,46 +26595,46 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB223_2
; RV32IA-NEXT: .LBB223_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: li a5, 2
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB223_7
; RV32IA-NEXT: .LBB223_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB223_4
+; RV32IA-NEXT: beq a5, s0, .LBB223_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a4
+; RV32IA-NEXT: slt a0, s0, a5
; RV32IA-NEXT: j .LBB223_5
; RV32IA-NEXT: .LBB223_4: # in Loop: Header=BB223_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB223_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: bnez a0, .LBB223_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB223_1
; RV32IA-NEXT: .LBB223_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26699,46 +26699,46 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB224_2
; RV32I-NEXT: .LBB224_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB224_7
; RV32I-NEXT: .LBB224_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB224_4
+; RV32I-NEXT: beq a5, s0, .LBB224_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a4
+; RV32I-NEXT: slt a0, s0, a5
; RV32I-NEXT: j .LBB224_5
; RV32I-NEXT: .LBB224_4: # in Loop: Header=BB224_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB224_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: bnez a0, .LBB224_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB224_1
; RV32I-NEXT: .LBB224_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26753,46 +26753,46 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB224_2
; RV32IA-NEXT: .LBB224_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB224_7
; RV32IA-NEXT: .LBB224_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB224_4
+; RV32IA-NEXT: beq a5, s0, .LBB224_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a4
+; RV32IA-NEXT: slt a0, s0, a5
; RV32IA-NEXT: j .LBB224_5
; RV32IA-NEXT: .LBB224_4: # in Loop: Header=BB224_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB224_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: bnez a0, .LBB224_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB224_1
; RV32IA-NEXT: .LBB224_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26857,46 +26857,46 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB225_2
; RV32I-NEXT: .LBB225_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB225_7
; RV32I-NEXT: .LBB225_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB225_4
+; RV32I-NEXT: beq a5, s0, .LBB225_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a4
+; RV32I-NEXT: slt a0, s0, a5
; RV32I-NEXT: j .LBB225_5
; RV32I-NEXT: .LBB225_4: # in Loop: Header=BB225_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB225_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: beqz a0, .LBB225_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB225_1
; RV32I-NEXT: .LBB225_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26911,46 +26911,46 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB225_2
; RV32IA-NEXT: .LBB225_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB225_7
; RV32IA-NEXT: .LBB225_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB225_4
+; RV32IA-NEXT: beq a5, s0, .LBB225_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a4
+; RV32IA-NEXT: slt a0, s0, a5
; RV32IA-NEXT: j .LBB225_5
; RV32IA-NEXT: .LBB225_4: # in Loop: Header=BB225_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB225_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: beqz a0, .LBB225_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB225_1
; RV32IA-NEXT: .LBB225_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27010,46 +27010,46 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB226_2
; RV32I-NEXT: .LBB226_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: li a5, 2
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB226_7
; RV32I-NEXT: .LBB226_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB226_4
+; RV32I-NEXT: beq a5, s0, .LBB226_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a4
+; RV32I-NEXT: slt a0, s0, a5
; RV32I-NEXT: j .LBB226_5
; RV32I-NEXT: .LBB226_4: # in Loop: Header=BB226_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB226_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: beqz a0, .LBB226_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB226_1
; RV32I-NEXT: .LBB226_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27064,46 +27064,46 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB226_2
; RV32IA-NEXT: .LBB226_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: li a5, 2
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB226_7
; RV32IA-NEXT: .LBB226_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB226_4
+; RV32IA-NEXT: beq a5, s0, .LBB226_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a4
+; RV32IA-NEXT: slt a0, s0, a5
; RV32IA-NEXT: j .LBB226_5
; RV32IA-NEXT: .LBB226_4: # in Loop: Header=BB226_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB226_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: beqz a0, .LBB226_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB226_1
; RV32IA-NEXT: .LBB226_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27168,46 +27168,46 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB227_2
; RV32I-NEXT: .LBB227_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 3
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB227_7
; RV32I-NEXT: .LBB227_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB227_4
+; RV32I-NEXT: beq a5, s0, .LBB227_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a4
+; RV32I-NEXT: slt a0, s0, a5
; RV32I-NEXT: j .LBB227_5
; RV32I-NEXT: .LBB227_4: # in Loop: Header=BB227_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB227_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: beqz a0, .LBB227_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB227_1
; RV32I-NEXT: .LBB227_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27222,46 +27222,46 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB227_2
; RV32IA-NEXT: .LBB227_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 3
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB227_7
; RV32IA-NEXT: .LBB227_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB227_4
+; RV32IA-NEXT: beq a5, s0, .LBB227_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a4
+; RV32IA-NEXT: slt a0, s0, a5
; RV32IA-NEXT: j .LBB227_5
; RV32IA-NEXT: .LBB227_4: # in Loop: Header=BB227_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB227_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: beqz a0, .LBB227_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB227_1
; RV32IA-NEXT: .LBB227_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27326,46 +27326,46 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB228_2
; RV32I-NEXT: .LBB228_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: li a5, 2
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB228_7
; RV32I-NEXT: .LBB228_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB228_4
+; RV32I-NEXT: beq a5, s0, .LBB228_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a4
+; RV32I-NEXT: slt a0, s0, a5
; RV32I-NEXT: j .LBB228_5
; RV32I-NEXT: .LBB228_4: # in Loop: Header=BB228_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB228_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: beqz a0, .LBB228_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB228_1
; RV32I-NEXT: .LBB228_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27380,46 +27380,46 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB228_2
; RV32IA-NEXT: .LBB228_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: li a5, 2
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB228_7
; RV32IA-NEXT: .LBB228_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB228_4
+; RV32IA-NEXT: beq a5, s0, .LBB228_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a4
+; RV32IA-NEXT: slt a0, s0, a5
; RV32IA-NEXT: j .LBB228_5
; RV32IA-NEXT: .LBB228_4: # in Loop: Header=BB228_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB228_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: beqz a0, .LBB228_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB228_1
; RV32IA-NEXT: .LBB228_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27484,46 +27484,46 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB229_2
; RV32I-NEXT: .LBB229_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB229_7
; RV32I-NEXT: .LBB229_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB229_4
+; RV32I-NEXT: beq a5, s0, .LBB229_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a4
+; RV32I-NEXT: slt a0, s0, a5
; RV32I-NEXT: j .LBB229_5
; RV32I-NEXT: .LBB229_4: # in Loop: Header=BB229_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB229_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: beqz a0, .LBB229_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB229_1
; RV32I-NEXT: .LBB229_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27538,46 +27538,46 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB229_2
; RV32IA-NEXT: .LBB229_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB229_7
; RV32IA-NEXT: .LBB229_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB229_4
+; RV32IA-NEXT: beq a5, s0, .LBB229_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a4
+; RV32IA-NEXT: slt a0, s0, a5
; RV32IA-NEXT: j .LBB229_5
; RV32IA-NEXT: .LBB229_4: # in Loop: Header=BB229_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB229_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: beqz a0, .LBB229_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB229_1
; RV32IA-NEXT: .LBB229_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27642,46 +27642,46 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB230_2
; RV32I-NEXT: .LBB230_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB230_7
; RV32I-NEXT: .LBB230_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB230_4
+; RV32I-NEXT: beq a5, s0, .LBB230_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a4
+; RV32I-NEXT: sltu a0, s0, a5
; RV32I-NEXT: j .LBB230_5
; RV32I-NEXT: .LBB230_4: # in Loop: Header=BB230_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB230_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: bnez a0, .LBB230_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB230_1
; RV32I-NEXT: .LBB230_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27696,46 +27696,46 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB230_2
; RV32IA-NEXT: .LBB230_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB230_7
; RV32IA-NEXT: .LBB230_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB230_4
+; RV32IA-NEXT: beq a5, s0, .LBB230_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a4
+; RV32IA-NEXT: sltu a0, s0, a5
; RV32IA-NEXT: j .LBB230_5
; RV32IA-NEXT: .LBB230_4: # in Loop: Header=BB230_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB230_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: bnez a0, .LBB230_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB230_1
; RV32IA-NEXT: .LBB230_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27795,46 +27795,46 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB231_2
; RV32I-NEXT: .LBB231_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: li a5, 2
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB231_7
; RV32I-NEXT: .LBB231_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB231_4
+; RV32I-NEXT: beq a5, s0, .LBB231_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a4
+; RV32I-NEXT: sltu a0, s0, a5
; RV32I-NEXT: j .LBB231_5
; RV32I-NEXT: .LBB231_4: # in Loop: Header=BB231_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB231_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: bnez a0, .LBB231_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB231_1
; RV32I-NEXT: .LBB231_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27849,46 +27849,46 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB231_2
; RV32IA-NEXT: .LBB231_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: li a5, 2
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB231_7
; RV32IA-NEXT: .LBB231_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB231_4
+; RV32IA-NEXT: beq a5, s0, .LBB231_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a4
+; RV32IA-NEXT: sltu a0, s0, a5
; RV32IA-NEXT: j .LBB231_5
; RV32IA-NEXT: .LBB231_4: # in Loop: Header=BB231_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB231_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: bnez a0, .LBB231_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB231_1
; RV32IA-NEXT: .LBB231_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27953,46 +27953,46 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB232_2
; RV32I-NEXT: .LBB232_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 3
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB232_7
; RV32I-NEXT: .LBB232_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB232_4
+; RV32I-NEXT: beq a5, s0, .LBB232_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a4
+; RV32I-NEXT: sltu a0, s0, a5
; RV32I-NEXT: j .LBB232_5
; RV32I-NEXT: .LBB232_4: # in Loop: Header=BB232_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB232_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: bnez a0, .LBB232_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB232_1
; RV32I-NEXT: .LBB232_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28007,46 +28007,46 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB232_2
; RV32IA-NEXT: .LBB232_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 3
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB232_7
; RV32IA-NEXT: .LBB232_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB232_4
+; RV32IA-NEXT: beq a5, s0, .LBB232_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a4
+; RV32IA-NEXT: sltu a0, s0, a5
; RV32IA-NEXT: j .LBB232_5
; RV32IA-NEXT: .LBB232_4: # in Loop: Header=BB232_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB232_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: bnez a0, .LBB232_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB232_1
; RV32IA-NEXT: .LBB232_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28111,46 +28111,46 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB233_2
; RV32I-NEXT: .LBB233_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: li a5, 2
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB233_7
; RV32I-NEXT: .LBB233_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB233_4
+; RV32I-NEXT: beq a5, s0, .LBB233_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a4
+; RV32I-NEXT: sltu a0, s0, a5
; RV32I-NEXT: j .LBB233_5
; RV32I-NEXT: .LBB233_4: # in Loop: Header=BB233_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB233_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: bnez a0, .LBB233_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB233_1
; RV32I-NEXT: .LBB233_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28165,46 +28165,46 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB233_2
; RV32IA-NEXT: .LBB233_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: li a5, 2
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB233_7
; RV32IA-NEXT: .LBB233_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB233_4
+; RV32IA-NEXT: beq a5, s0, .LBB233_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a4
+; RV32IA-NEXT: sltu a0, s0, a5
; RV32IA-NEXT: j .LBB233_5
; RV32IA-NEXT: .LBB233_4: # in Loop: Header=BB233_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB233_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: bnez a0, .LBB233_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB233_1
; RV32IA-NEXT: .LBB233_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28269,46 +28269,46 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB234_2
; RV32I-NEXT: .LBB234_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB234_7
; RV32I-NEXT: .LBB234_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB234_4
+; RV32I-NEXT: beq a5, s0, .LBB234_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a4
+; RV32I-NEXT: sltu a0, s0, a5
; RV32I-NEXT: j .LBB234_5
; RV32I-NEXT: .LBB234_4: # in Loop: Header=BB234_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB234_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: bnez a0, .LBB234_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB234_1
; RV32I-NEXT: .LBB234_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28323,46 +28323,46 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB234_2
; RV32IA-NEXT: .LBB234_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB234_7
; RV32IA-NEXT: .LBB234_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB234_4
+; RV32IA-NEXT: beq a5, s0, .LBB234_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a4
+; RV32IA-NEXT: sltu a0, s0, a5
; RV32IA-NEXT: j .LBB234_5
; RV32IA-NEXT: .LBB234_4: # in Loop: Header=BB234_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB234_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: bnez a0, .LBB234_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB234_1
; RV32IA-NEXT: .LBB234_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28427,46 +28427,46 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB235_2
; RV32I-NEXT: .LBB235_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB235_7
; RV32I-NEXT: .LBB235_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB235_4
+; RV32I-NEXT: beq a5, s0, .LBB235_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a4
+; RV32I-NEXT: sltu a0, s0, a5
; RV32I-NEXT: j .LBB235_5
; RV32I-NEXT: .LBB235_4: # in Loop: Header=BB235_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB235_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: beqz a0, .LBB235_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB235_1
; RV32I-NEXT: .LBB235_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28481,46 +28481,46 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB235_2
; RV32IA-NEXT: .LBB235_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB235_7
; RV32IA-NEXT: .LBB235_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB235_4
+; RV32IA-NEXT: beq a5, s0, .LBB235_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a4
+; RV32IA-NEXT: sltu a0, s0, a5
; RV32IA-NEXT: j .LBB235_5
; RV32IA-NEXT: .LBB235_4: # in Loop: Header=BB235_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB235_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: beqz a0, .LBB235_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB235_1
; RV32IA-NEXT: .LBB235_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28580,46 +28580,46 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB236_2
; RV32I-NEXT: .LBB236_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: li a5, 2
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB236_7
; RV32I-NEXT: .LBB236_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB236_4
+; RV32I-NEXT: beq a5, s0, .LBB236_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a4
+; RV32I-NEXT: sltu a0, s0, a5
; RV32I-NEXT: j .LBB236_5
; RV32I-NEXT: .LBB236_4: # in Loop: Header=BB236_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB236_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: beqz a0, .LBB236_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB236_1
; RV32I-NEXT: .LBB236_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28634,46 +28634,46 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB236_2
; RV32IA-NEXT: .LBB236_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: li a5, 2
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB236_7
; RV32IA-NEXT: .LBB236_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB236_4
+; RV32IA-NEXT: beq a5, s0, .LBB236_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a4
+; RV32IA-NEXT: sltu a0, s0, a5
; RV32IA-NEXT: j .LBB236_5
; RV32IA-NEXT: .LBB236_4: # in Loop: Header=BB236_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB236_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: beqz a0, .LBB236_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB236_1
; RV32IA-NEXT: .LBB236_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28738,46 +28738,46 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB237_2
; RV32I-NEXT: .LBB237_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 3
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB237_7
; RV32I-NEXT: .LBB237_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB237_4
+; RV32I-NEXT: beq a5, s0, .LBB237_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a4
+; RV32I-NEXT: sltu a0, s0, a5
; RV32I-NEXT: j .LBB237_5
; RV32I-NEXT: .LBB237_4: # in Loop: Header=BB237_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB237_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: beqz a0, .LBB237_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB237_1
; RV32I-NEXT: .LBB237_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28792,46 +28792,46 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB237_2
; RV32IA-NEXT: .LBB237_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 3
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB237_7
; RV32IA-NEXT: .LBB237_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB237_4
+; RV32IA-NEXT: beq a5, s0, .LBB237_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a4
+; RV32IA-NEXT: sltu a0, s0, a5
; RV32IA-NEXT: j .LBB237_5
; RV32IA-NEXT: .LBB237_4: # in Loop: Header=BB237_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB237_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: beqz a0, .LBB237_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB237_1
; RV32IA-NEXT: .LBB237_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28896,46 +28896,46 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB238_2
; RV32I-NEXT: .LBB238_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: li a5, 2
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB238_7
; RV32I-NEXT: .LBB238_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB238_4
+; RV32I-NEXT: beq a5, s0, .LBB238_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a4
+; RV32I-NEXT: sltu a0, s0, a5
; RV32I-NEXT: j .LBB238_5
; RV32I-NEXT: .LBB238_4: # in Loop: Header=BB238_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB238_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: beqz a0, .LBB238_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB238_1
; RV32I-NEXT: .LBB238_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28950,46 +28950,46 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB238_2
; RV32IA-NEXT: .LBB238_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: li a5, 2
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB238_7
; RV32IA-NEXT: .LBB238_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB238_4
+; RV32IA-NEXT: beq a5, s0, .LBB238_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a4
+; RV32IA-NEXT: sltu a0, s0, a5
; RV32IA-NEXT: j .LBB238_5
; RV32IA-NEXT: .LBB238_4: # in Loop: Header=BB238_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB238_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: beqz a0, .LBB238_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB238_1
; RV32IA-NEXT: .LBB238_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -29054,46 +29054,46 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB239_2
; RV32I-NEXT: .LBB239_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB239_7
; RV32I-NEXT: .LBB239_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB239_4
+; RV32I-NEXT: beq a5, s0, .LBB239_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a4
+; RV32I-NEXT: sltu a0, s0, a5
; RV32I-NEXT: j .LBB239_5
; RV32I-NEXT: .LBB239_4: # in Loop: Header=BB239_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB239_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: beqz a0, .LBB239_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB239_1
; RV32I-NEXT: .LBB239_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -29108,46 +29108,46 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB239_2
; RV32IA-NEXT: .LBB239_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB239_7
; RV32IA-NEXT: .LBB239_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB239_4
+; RV32IA-NEXT: beq a5, s0, .LBB239_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a4
+; RV32IA-NEXT: sltu a0, s0, a5
; RV32IA-NEXT: j .LBB239_5
; RV32IA-NEXT: .LBB239_4: # in Loop: Header=BB239_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB239_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: beqz a0, .LBB239_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB239_1
; RV32IA-NEXT: .LBB239_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll
index 06594e35be8703..ae7103c609b5bc 100644
--- a/llvm/test/CodeGen/RISCV/atomic-signext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll
@@ -3182,46 +3182,46 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB43_2
; RV32I-NEXT: .LBB43_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB43_7
; RV32I-NEXT: .LBB43_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB43_4
+; RV32I-NEXT: beq a5, s0, .LBB43_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a4
+; RV32I-NEXT: slt a0, s0, a5
; RV32I-NEXT: j .LBB43_5
; RV32I-NEXT: .LBB43_4: # in Loop: Header=BB43_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB43_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: bnez a0, .LBB43_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB43_1
; RV32I-NEXT: .LBB43_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -3236,46 +3236,46 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB43_2
; RV32IA-NEXT: .LBB43_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB43_7
; RV32IA-NEXT: .LBB43_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB43_4
+; RV32IA-NEXT: beq a5, s0, .LBB43_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a4
+; RV32IA-NEXT: slt a0, s0, a5
; RV32IA-NEXT: j .LBB43_5
; RV32IA-NEXT: .LBB43_4: # in Loop: Header=BB43_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB43_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: bnez a0, .LBB43_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB43_1
; RV32IA-NEXT: .LBB43_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -3335,46 +3335,46 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB44_2
; RV32I-NEXT: .LBB44_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB44_7
; RV32I-NEXT: .LBB44_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB44_4
+; RV32I-NEXT: beq a5, s0, .LBB44_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a4
+; RV32I-NEXT: slt a0, s0, a5
; RV32I-NEXT: j .LBB44_5
; RV32I-NEXT: .LBB44_4: # in Loop: Header=BB44_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB44_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: beqz a0, .LBB44_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB44_1
; RV32I-NEXT: .LBB44_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -3389,46 +3389,46 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB44_2
; RV32IA-NEXT: .LBB44_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB44_7
; RV32IA-NEXT: .LBB44_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB44_4
+; RV32IA-NEXT: beq a5, s0, .LBB44_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a4
+; RV32IA-NEXT: slt a0, s0, a5
; RV32IA-NEXT: j .LBB44_5
; RV32IA-NEXT: .LBB44_4: # in Loop: Header=BB44_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB44_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: beqz a0, .LBB44_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB44_1
; RV32IA-NEXT: .LBB44_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -3488,46 +3488,46 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB45_2
; RV32I-NEXT: .LBB45_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB45_7
; RV32I-NEXT: .LBB45_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB45_4
+; RV32I-NEXT: beq a5, s0, .LBB45_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a4
+; RV32I-NEXT: sltu a0, s0, a5
; RV32I-NEXT: j .LBB45_5
; RV32I-NEXT: .LBB45_4: # in Loop: Header=BB45_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB45_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: bnez a0, .LBB45_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB45_1
; RV32I-NEXT: .LBB45_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -3542,46 +3542,46 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB45_2
; RV32IA-NEXT: .LBB45_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB45_7
; RV32IA-NEXT: .LBB45_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB45_4
+; RV32IA-NEXT: beq a5, s0, .LBB45_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a4
+; RV32IA-NEXT: sltu a0, s0, a5
; RV32IA-NEXT: j .LBB45_5
; RV32IA-NEXT: .LBB45_4: # in Loop: Header=BB45_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB45_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: bnez a0, .LBB45_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB45_1
; RV32IA-NEXT: .LBB45_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -3641,46 +3641,46 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB46_2
; RV32I-NEXT: .LBB46_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB46_7
; RV32I-NEXT: .LBB46_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB46_4
+; RV32I-NEXT: beq a5, s0, .LBB46_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a4
+; RV32I-NEXT: sltu a0, s0, a5
; RV32I-NEXT: j .LBB46_5
; RV32I-NEXT: .LBB46_4: # in Loop: Header=BB46_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a5
+; RV32I-NEXT: sltu a0, s2, a4
; RV32I-NEXT: .LBB46_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV32I-NEXT: mv a2, a5
-; RV32I-NEXT: mv a3, a4
+; RV32I-NEXT: mv a2, a4
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: beqz a0, .LBB46_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: j .LBB46_1
; RV32I-NEXT: .LBB46_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -3695,46 +3695,46 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB46_2
; RV32IA-NEXT: .LBB46_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB46_7
; RV32IA-NEXT: .LBB46_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB46_4
+; RV32IA-NEXT: beq a5, s0, .LBB46_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a4
+; RV32IA-NEXT: sltu a0, s0, a5
; RV32IA-NEXT: j .LBB46_5
; RV32IA-NEXT: .LBB46_4: # in Loop: Header=BB46_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a5
+; RV32IA-NEXT: sltu a0, s2, a4
; RV32IA-NEXT: .LBB46_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV32IA-NEXT: mv a2, a5
-; RV32IA-NEXT: mv a3, a4
+; RV32IA-NEXT: mv a2, a4
+; RV32IA-NEXT: mv a3, a5
; RV32IA-NEXT: beqz a0, .LBB46_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: j .LBB46_1
; RV32IA-NEXT: .LBB46_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll
index f22115130117a8..038ddd427b0319 100644
--- a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll
+++ b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll
@@ -518,22 +518,22 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: .cfi_offset s1, -12
; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lw a5, 0(a0)
; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB3_3
; RV32I-NEXT: .LBB3_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1
-; RV32I-NEXT: sltu a0, a4, s1
+; RV32I-NEXT: sltu a0, a4, s0
; RV32I-NEXT: .LBB3_2: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1
; RV32I-NEXT: xori a0, a0, 1
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: and a1, a0, s2
; RV32I-NEXT: sltu a2, a5, a1
-; RV32I-NEXT: and a0, a0, s1
+; RV32I-NEXT: and a0, a0, s0
; RV32I-NEXT: sub a3, a4, a0
; RV32I-NEXT: sub a3, a3, a2
; RV32I-NEXT: sub a2, a5, a1
@@ -542,14 +542,14 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_8
; RV32I-NEXT: lw a5, 8(sp)
; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB3_5
; RV32I-NEXT: .LBB3_3: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: bne a4, s1, .LBB3_1
+; RV32I-NEXT: bne a4, s0, .LBB3_1
; RV32I-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1
; RV32I-NEXT: sltu a0, a5, s2
; RV32I-NEXT: j .LBB3_2
@@ -580,22 +580,22 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
; RV32IA-NEXT: .cfi_offset s0, -8
; RV32IA-NEXT: .cfi_offset s1, -12
; RV32IA-NEXT: .cfi_offset s2, -16
-; RV32IA-NEXT: mv s0, a0
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
; RV32IA-NEXT: lw a5, 0(a0)
; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB3_3
; RV32IA-NEXT: .LBB3_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1
-; RV32IA-NEXT: sltu a0, a4, s1
+; RV32IA-NEXT: sltu a0, a4, s0
; RV32IA-NEXT: .LBB3_2: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1
; RV32IA-NEXT: xori a0, a0, 1
; RV32IA-NEXT: neg a0, a0
; RV32IA-NEXT: and a1, a0, s2
; RV32IA-NEXT: sltu a2, a5, a1
-; RV32IA-NEXT: and a0, a0, s1
+; RV32IA-NEXT: and a0, a0, s0
; RV32IA-NEXT: sub a3, a4, a0
; RV32IA-NEXT: sub a3, a3, a2
; RV32IA-NEXT: sub a2, a5, a1
@@ -604,14 +604,14 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: call __atomic_compare_exchange_8
; RV32IA-NEXT: lw a5, 8(sp)
; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB3_5
; RV32IA-NEXT: .LBB3_3: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: bne a4, s1, .LBB3_1
+; RV32IA-NEXT: bne a4, s0, .LBB3_1
; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1
; RV32IA-NEXT: sltu a0, a5, s2
; RV32IA-NEXT: j .LBB3_2
@@ -741,11 +741,11 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
; RV32IA-LABEL: atomicrmw_usub_sat_i8:
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a3, a0, 3
-; RV32IA-NEXT: andi a0, a3, 24
-; RV32IA-NEXT: li a5, 255
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: li a3, 255
+; RV32IA-NEXT: sll a3, a3, a0
; RV32IA-NEXT: lw a4, 0(a2)
-; RV32IA-NEXT: sll a3, a5, a3
+; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: not a3, a3
; RV32IA-NEXT: andi a1, a1, 255
; RV32IA-NEXT: .LBB4_1: # %atomicrmw.start
@@ -818,11 +818,11 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
; RV64IA-LABEL: atomicrmw_usub_sat_i8:
; RV64IA: # %bb.0:
; RV64IA-NEXT: andi a2, a0, -4
-; RV64IA-NEXT: slli a4, a0, 3
-; RV64IA-NEXT: andi a0, a4, 24
-; RV64IA-NEXT: li a5, 255
+; RV64IA-NEXT: slli a0, a0, 3
+; RV64IA-NEXT: li a3, 255
+; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: lw a3, 0(a2)
-; RV64IA-NEXT: sllw a4, a5, a4
+; RV64IA-NEXT: andi a0, a0, 24
; RV64IA-NEXT: not a4, a4
; RV64IA-NEXT: andi a1, a1, 255
; RV64IA-NEXT: .LBB4_1: # %atomicrmw.start
@@ -1172,43 +1172,43 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: .cfi_offset s1, -12
; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB7_3
; RV32I-NEXT: .LBB7_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB7_3 Depth=1
-; RV32I-NEXT: sltu a2, a4, a0
+; RV32I-NEXT: sltu a2, a5, a0
; RV32I-NEXT: .LBB7_2: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB7_3 Depth=1
; RV32I-NEXT: addi a3, a2, -1
; RV32I-NEXT: and a2, a3, a1
; RV32I-NEXT: and a3, a3, a0
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB7_5
; RV32I-NEXT: .LBB7_3: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: sltu a0, a5, s2
-; RV32I-NEXT: sub a1, a4, s1
+; RV32I-NEXT: sltu a0, a4, s2
+; RV32I-NEXT: sub a1, a5, s0
; RV32I-NEXT: sub a0, a1, a0
-; RV32I-NEXT: sub a1, a5, s2
-; RV32I-NEXT: bne a0, a4, .LBB7_1
+; RV32I-NEXT: sub a1, a4, s2
+; RV32I-NEXT: bne a0, a5, .LBB7_1
; RV32I-NEXT: # %bb.4: # in Loop: Header=BB7_3 Depth=1
-; RV32I-NEXT: sltu a2, a5, a1
+; RV32I-NEXT: sltu a2, a4, a1
; RV32I-NEXT: j .LBB7_2
; RV32I-NEXT: .LBB7_5: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -1233,43 +1233,43 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
; RV32IA-NEXT: .cfi_offset s0, -8
; RV32IA-NEXT: .cfi_offset s1, -12
; RV32IA-NEXT: .cfi_offset s2, -16
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB7_3
; RV32IA-NEXT: .LBB7_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB7_3 Depth=1
-; RV32IA-NEXT: sltu a2, a4, a0
+; RV32IA-NEXT: sltu a2, a5, a0
; RV32IA-NEXT: .LBB7_2: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB7_3 Depth=1
; RV32IA-NEXT: addi a3, a2, -1
; RV32IA-NEXT: and a2, a3, a1
; RV32IA-NEXT: and a3, a3, a0
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB7_5
; RV32IA-NEXT: .LBB7_3: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: sltu a0, a5, s2
-; RV32IA-NEXT: sub a1, a4, s1
+; RV32IA-NEXT: sltu a0, a4, s2
+; RV32IA-NEXT: sub a1, a5, s0
; RV32IA-NEXT: sub a0, a1, a0
-; RV32IA-NEXT: sub a1, a5, s2
-; RV32IA-NEXT: bne a0, a4, .LBB7_1
+; RV32IA-NEXT: sub a1, a4, s2
+; RV32IA-NEXT: bne a0, a5, .LBB7_1
; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB7_3 Depth=1
-; RV32IA-NEXT: sltu a2, a5, a1
+; RV32IA-NEXT: sltu a2, a4, a1
; RV32IA-NEXT: j .LBB7_2
; RV32IA-NEXT: .LBB7_5: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
index 14a1e6b9827174..de85c8ca17c15e 100644
--- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
@@ -57,11 +57,11 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; RV32IA-LABEL: atomicrmw_uinc_wrap_i8:
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a3, a0, 3
-; RV32IA-NEXT: andi a0, a3, 24
-; RV32IA-NEXT: li a5, 255
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: li a3, 255
+; RV32IA-NEXT: sll a3, a3, a0
; RV32IA-NEXT: lw a4, 0(a2)
-; RV32IA-NEXT: sll a3, a5, a3
+; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: not a3, a3
; RV32IA-NEXT: andi a1, a1, 255
; RV32IA-NEXT: .LBB0_1: # %atomicrmw.start
@@ -135,11 +135,11 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; RV64IA-LABEL: atomicrmw_uinc_wrap_i8:
; RV64IA: # %bb.0:
; RV64IA-NEXT: andi a2, a0, -4
-; RV64IA-NEXT: slli a4, a0, 3
-; RV64IA-NEXT: andi a0, a4, 24
-; RV64IA-NEXT: li a5, 255
+; RV64IA-NEXT: slli a0, a0, 3
+; RV64IA-NEXT: li a3, 255
+; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: lw a3, 0(a2)
-; RV64IA-NEXT: sllw a4, a5, a4
+; RV64IA-NEXT: andi a0, a0, 24
; RV64IA-NEXT: not a4, a4
; RV64IA-NEXT: andi a1, a1, 255
; RV64IA-NEXT: .LBB0_1: # %atomicrmw.start
@@ -493,42 +493,42 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: .cfi_offset s1, -12
; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 0(a0)
-; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB3_3
; RV32I-NEXT: .LBB3_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1
-; RV32I-NEXT: sltu a0, a4, s1
+; RV32I-NEXT: sltu a0, a5, s0
; RV32I-NEXT: .LBB3_2: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1
-; RV32I-NEXT: addi a1, a5, 1
+; RV32I-NEXT: addi a1, a4, 1
; RV32I-NEXT: seqz a2, a1
-; RV32I-NEXT: add a3, a4, a2
+; RV32I-NEXT: add a3, a5, a2
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: and a2, a0, a1
; RV32I-NEXT: and a3, a0, a3
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 8(sp)
-; RV32I-NEXT: lw a4, 12(sp)
+; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 12(sp)
; RV32I-NEXT: bnez a0, .LBB3_5
; RV32I-NEXT: .LBB3_3: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: bne a4, s1, .LBB3_1
+; RV32I-NEXT: bne a5, s0, .LBB3_1
; RV32I-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1
-; RV32I-NEXT: sltu a0, a5, s2
+; RV32I-NEXT: sltu a0, a4, s2
; RV32I-NEXT: j .LBB3_2
; RV32I-NEXT: .LBB3_5: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a5
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a0, a4
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -553,42 +553,42 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; RV32IA-NEXT: .cfi_offset s0, -8
; RV32IA-NEXT: .cfi_offset s1, -12
; RV32IA-NEXT: .cfi_offset s2, -16
-; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 0(a0)
-; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
+; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 4(a0)
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB3_3
; RV32IA-NEXT: .LBB3_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1
-; RV32IA-NEXT: sltu a0, a4, s1
+; RV32IA-NEXT: sltu a0, a5, s0
; RV32IA-NEXT: .LBB3_2: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1
-; RV32IA-NEXT: addi a1, a5, 1
+; RV32IA-NEXT: addi a1, a4, 1
; RV32IA-NEXT: seqz a2, a1
-; RV32IA-NEXT: add a3, a4, a2
+; RV32IA-NEXT: add a3, a5, a2
; RV32IA-NEXT: neg a0, a0
; RV32IA-NEXT: and a2, a0, a1
; RV32IA-NEXT: and a3, a0, a3
-; RV32IA-NEXT: sw a5, 8(sp)
-; RV32IA-NEXT: sw a4, 12(sp)
+; RV32IA-NEXT: sw a4, 8(sp)
+; RV32IA-NEXT: sw a5, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 8(sp)
-; RV32IA-NEXT: lw a4, 12(sp)
+; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB3_5
; RV32IA-NEXT: .LBB3_3: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: bne a4, s1, .LBB3_1
+; RV32IA-NEXT: bne a5, s0, .LBB3_1
; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1
-; RV32IA-NEXT: sltu a0, a5, s2
+; RV32IA-NEXT: sltu a0, a4, s2
; RV32IA-NEXT: j .LBB3_2
; RV32IA-NEXT: .LBB3_5: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a5
-; RV32IA-NEXT: mv a1, a4
+; RV32IA-NEXT: mv a0, a4
+; RV32IA-NEXT: mv a1, a5
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -1281,10 +1281,10 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: .cfi_offset s1, -12
; RV32I-NEXT: .cfi_offset s2, -16
-; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s0, a2
+; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lw a5, 0(a0)
; RV32I-NEXT: lw a4, 4(a0)
-; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB7_2
; RV32I-NEXT: .LBB7_1: # %atomicrmw.start
@@ -1294,17 +1294,17 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_8
; RV32I-NEXT: lw a5, 8(sp)
; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB7_7
; RV32I-NEXT: .LBB7_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a4, s1, .LBB7_4
+; RV32I-NEXT: beq a4, s0, .LBB7_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a4
+; RV32I-NEXT: sltu a0, s0, a4
; RV32I-NEXT: j .LBB7_5
; RV32I-NEXT: .LBB7_4: # in Loop: Header=BB7_2 Depth=1
; RV32I-NEXT: sltu a0, s2, a5
@@ -1314,7 +1314,7 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; RV32I-NEXT: seqz a1, a1
; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: mv a2, s2
-; RV32I-NEXT: mv a3, s1
+; RV32I-NEXT: mv a3, s0
; RV32I-NEXT: bnez a0, .LBB7_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1
@@ -1349,10 +1349,10 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; RV32IA-NEXT: .cfi_offset s0, -8
; RV32IA-NEXT: .cfi_offset s1, -12
; RV32IA-NEXT: .cfi_offset s2, -16
-; RV32IA-NEXT: mv s0, a0
+; RV32IA-NEXT: mv s0, a2
+; RV32IA-NEXT: mv s1, a0
; RV32IA-NEXT: lw a5, 0(a0)
; RV32IA-NEXT: lw a4, 4(a0)
-; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB7_2
; RV32IA-NEXT: .LBB7_1: # %atomicrmw.start
@@ -1362,17 +1362,17 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
-; RV32IA-NEXT: mv a0, s0
+; RV32IA-NEXT: mv a0, s1
; RV32IA-NEXT: call __atomic_compare_exchange_8
; RV32IA-NEXT: lw a5, 8(sp)
; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB7_7
; RV32IA-NEXT: .LBB7_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a4, s1, .LBB7_4
+; RV32IA-NEXT: beq a4, s0, .LBB7_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a4
+; RV32IA-NEXT: sltu a0, s0, a4
; RV32IA-NEXT: j .LBB7_5
; RV32IA-NEXT: .LBB7_4: # in Loop: Header=BB7_2 Depth=1
; RV32IA-NEXT: sltu a0, s2, a5
@@ -1382,7 +1382,7 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; RV32IA-NEXT: seqz a1, a1
; RV32IA-NEXT: or a0, a1, a0
; RV32IA-NEXT: mv a2, s2
-; RV32IA-NEXT: mv a3, s1
+; RV32IA-NEXT: mv a3, s0
; RV32IA-NEXT: bnez a0, .LBB7_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1
diff --git a/llvm/test/CodeGen/RISCV/bf16-promote.ll b/llvm/test/CodeGen/RISCV/bf16-promote.ll
index c17450a80de963..296d94f4a4cd46 100644
--- a/llvm/test/CodeGen/RISCV/bf16-promote.ll
+++ b/llvm/test/CodeGen/RISCV/bf16-promote.ll
@@ -110,13 +110,13 @@ define void @test_fadd(ptr %p, ptr %q) nounwind {
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64-NEXT: lhu a1, 0(a1)
; RV64-NEXT: mv s0, a0
-; RV64-NEXT: lhu a0, 0(a0)
-; RV64-NEXT: slli a1, a1, 16
-; RV64-NEXT: fmv.w.x fa5, a1
+; RV64-NEXT: lhu a0, 0(a1)
+; RV64-NEXT: lhu a1, 0(s0)
; RV64-NEXT: slli a0, a0, 16
-; RV64-NEXT: fmv.w.x fa4, a0
+; RV64-NEXT: fmv.w.x fa5, a0
+; RV64-NEXT: slli a1, a1, 16
+; RV64-NEXT: fmv.w.x fa4, a1
; RV64-NEXT: fadd.s fa0, fa4, fa5
; RV64-NEXT: call __truncsfbf2
; RV64-NEXT: fmv.x.w a0, fa0
@@ -131,13 +131,13 @@ define void @test_fadd(ptr %p, ptr %q) nounwind {
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32-NEXT: lhu a1, 0(a1)
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lhu a0, 0(a0)
-; RV32-NEXT: slli a1, a1, 16
-; RV32-NEXT: fmv.w.x fa5, a1
+; RV32-NEXT: lhu a0, 0(a1)
+; RV32-NEXT: lhu a1, 0(s0)
; RV32-NEXT: slli a0, a0, 16
-; RV32-NEXT: fmv.w.x fa4, a0
+; RV32-NEXT: fmv.w.x fa5, a0
+; RV32-NEXT: slli a1, a1, 16
+; RV32-NEXT: fmv.w.x fa4, a1
; RV32-NEXT: fadd.s fa0, fa4, fa5
; RV32-NEXT: call __truncsfbf2
; RV32-NEXT: fmv.x.w a0, fa0
diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
index c2c21a30d4e4c5..c09acf5efb4ab2 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
@@ -52,12 +52,12 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
; CHECK32ZFBFMIN: # %bb.0: # %start
; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
; CHECK32ZFBFMIN-NEXT: feq.s a0, fa5, fa5
-; CHECK32ZFBFMIN-NEXT: neg a0, a0
; CHECK32ZFBFMIN-NEXT: lui a1, %hi(.LCPI1_0)
; CHECK32ZFBFMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a1)
; CHECK32ZFBFMIN-NEXT: lui a1, 815104
; CHECK32ZFBFMIN-NEXT: fmv.w.x fa3, a1
; CHECK32ZFBFMIN-NEXT: fmax.s fa5, fa5, fa3
+; CHECK32ZFBFMIN-NEXT: neg a0, a0
; CHECK32ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4
; CHECK32ZFBFMIN-NEXT: fcvt.w.s a1, fa5, rtz
; CHECK32ZFBFMIN-NEXT: and a0, a0, a1
@@ -84,12 +84,12 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
; CHECK64ZFBFMIN: # %bb.0: # %start
; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
; CHECK64ZFBFMIN-NEXT: feq.s a0, fa5, fa5
-; CHECK64ZFBFMIN-NEXT: neg a0, a0
; CHECK64ZFBFMIN-NEXT: lui a1, %hi(.LCPI1_0)
; CHECK64ZFBFMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a1)
; CHECK64ZFBFMIN-NEXT: lui a1, 815104
; CHECK64ZFBFMIN-NEXT: fmv.w.x fa3, a1
; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa3
+; CHECK64ZFBFMIN-NEXT: neg a0, a0
; CHECK64ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4
; CHECK64ZFBFMIN-NEXT: fcvt.l.s a1, fa5, rtz
; CHECK64ZFBFMIN-NEXT: and a0, a0, a1
@@ -163,10 +163,10 @@ define i16 @fcvt_ui_bf16_sat(bfloat %a) nounwind {
;
; RV32ID-LABEL: fcvt_ui_bf16_sat:
; RV32ID: # %bb.0: # %start
+; RV32ID-NEXT: lui a0, %hi(.LCPI3_0)
+; RV32ID-NEXT: flw fa5, %lo(.LCPI3_0)(a0)
; RV32ID-NEXT: fmv.x.w a0, fa0
; RV32ID-NEXT: slli a0, a0, 16
-; RV32ID-NEXT: lui a1, %hi(.LCPI3_0)
-; RV32ID-NEXT: flw fa5, %lo(.LCPI3_0)(a1)
; RV32ID-NEXT: fmv.w.x fa4, a0
; RV32ID-NEXT: fmv.w.x fa3, zero
; RV32ID-NEXT: fmax.s fa4, fa4, fa3
@@ -187,10 +187,10 @@ define i16 @fcvt_ui_bf16_sat(bfloat %a) nounwind {
;
; RV64ID-LABEL: fcvt_ui_bf16_sat:
; RV64ID: # %bb.0: # %start
+; RV64ID-NEXT: lui a0, %hi(.LCPI3_0)
+; RV64ID-NEXT: flw fa5, %lo(.LCPI3_0)(a0)
; RV64ID-NEXT: fmv.x.w a0, fa0
; RV64ID-NEXT: slli a0, a0, 16
-; RV64ID-NEXT: lui a1, %hi(.LCPI3_0)
-; RV64ID-NEXT: flw fa5, %lo(.LCPI3_0)(a1)
; RV64ID-NEXT: fmv.w.x fa4, a0
; RV64ID-NEXT: fmv.w.x fa3, zero
; RV64ID-NEXT: fmax.s fa4, fa4, fa3
@@ -1251,7 +1251,7 @@ define double @fcvt_d_bf16(bfloat %a) nounwind {
;
; R32IDZFBFMIN-LABEL: fcvt_d_bf16:
; R32IDZFBFMIN: # %bb.0:
-; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; R32IDZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0, dyn
; R32IDZFBFMIN-NEXT: fcvt.d.s fa0, fa5
; R32IDZFBFMIN-NEXT: ret
;
@@ -1275,7 +1275,7 @@ define double @fcvt_d_bf16(bfloat %a) nounwind {
;
; RV64IDZFBFMIN-LABEL: fcvt_d_bf16:
; RV64IDZFBFMIN: # %bb.0:
-; RV64IDZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0
+; RV64IDZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0, dyn
; RV64IDZFBFMIN-NEXT: fcvt.d.s fa0, fa5
; RV64IDZFBFMIN-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/bittest.ll b/llvm/test/CodeGen/RISCV/bittest.ll
index d280e5ee46b7c0..d69ab0550a0344 100644
--- a/llvm/test/CodeGen/RISCV/bittest.ll
+++ b/llvm/test/CodeGen/RISCV/bittest.ll
@@ -751,10 +751,10 @@ define signext i32 @bit_31_nz_select_i32(i32 signext %a, i32 signext %b, i32 sig
define i64 @bit_10_z_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_10_z_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: andi a6, a0, 1024
; RV32-NEXT: mv a1, a3
+; RV32-NEXT: andi a3, a0, 1024
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: beqz a6, .LBB23_2
+; RV32-NEXT: beqz a3, .LBB23_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
; RV32-NEXT: mv a1, a5
@@ -779,11 +779,11 @@ define i64 @bit_10_z_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_10_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32I-LABEL: bit_10_nz_select_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a0, a0, 21
-; RV32I-NEXT: srli a6, a0, 31
; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: slli a0, a0, 21
+; RV32I-NEXT: srli a3, a0, 31
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: bnez a6, .LBB24_2
+; RV32I-NEXT: bnez a3, .LBB24_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
@@ -802,10 +802,10 @@ define i64 @bit_10_nz_select_i64(i64 %a, i64 %b, i64 %c) {
;
; RV32ZBS-LABEL: bit_10_nz_select_i64:
; RV32ZBS: # %bb.0:
-; RV32ZBS-NEXT: bexti a6, a0, 10
; RV32ZBS-NEXT: mv a1, a3
+; RV32ZBS-NEXT: bexti a3, a0, 10
; RV32ZBS-NEXT: mv a0, a2
-; RV32ZBS-NEXT: bnez a6, .LBB24_2
+; RV32ZBS-NEXT: bnez a3, .LBB24_2
; RV32ZBS-NEXT: # %bb.1:
; RV32ZBS-NEXT: mv a0, a4
; RV32ZBS-NEXT: mv a1, a5
@@ -814,10 +814,10 @@ define i64 @bit_10_nz_select_i64(i64 %a, i64 %b, i64 %c) {
;
; RV32XTHEADBS-LABEL: bit_10_nz_select_i64:
; RV32XTHEADBS: # %bb.0:
-; RV32XTHEADBS-NEXT: th.tst a6, a0, 10
; RV32XTHEADBS-NEXT: mv a1, a3
+; RV32XTHEADBS-NEXT: th.tst a3, a0, 10
; RV32XTHEADBS-NEXT: mv a0, a2
-; RV32XTHEADBS-NEXT: bnez a6, .LBB24_2
+; RV32XTHEADBS-NEXT: bnez a3, .LBB24_2
; RV32XTHEADBS-NEXT: # %bb.1:
; RV32XTHEADBS-NEXT: mv a0, a4
; RV32XTHEADBS-NEXT: mv a1, a5
@@ -832,10 +832,10 @@ define i64 @bit_10_nz_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_11_z_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_11_z_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: slli a6, a0, 20
; RV32-NEXT: mv a1, a3
+; RV32-NEXT: slli a3, a0, 20
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: bgez a6, .LBB25_2
+; RV32-NEXT: bgez a3, .LBB25_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
; RV32-NEXT: mv a1, a5
@@ -860,11 +860,11 @@ define i64 @bit_11_z_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_11_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32I-LABEL: bit_11_nz_select_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a0, a0, 20
-; RV32I-NEXT: srli a6, a0, 31
; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: slli a0, a0, 20
+; RV32I-NEXT: srli a3, a0, 31
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: bnez a6, .LBB26_2
+; RV32I-NEXT: bnez a3, .LBB26_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
@@ -883,10 +883,10 @@ define i64 @bit_11_nz_select_i64(i64 %a, i64 %b, i64 %c) {
;
; RV32ZBS-LABEL: bit_11_nz_select_i64:
; RV32ZBS: # %bb.0:
-; RV32ZBS-NEXT: bexti a6, a0, 11
; RV32ZBS-NEXT: mv a1, a3
+; RV32ZBS-NEXT: bexti a3, a0, 11
; RV32ZBS-NEXT: mv a0, a2
-; RV32ZBS-NEXT: bnez a6, .LBB26_2
+; RV32ZBS-NEXT: bnez a3, .LBB26_2
; RV32ZBS-NEXT: # %bb.1:
; RV32ZBS-NEXT: mv a0, a4
; RV32ZBS-NEXT: mv a1, a5
@@ -895,10 +895,10 @@ define i64 @bit_11_nz_select_i64(i64 %a, i64 %b, i64 %c) {
;
; RV32XTHEADBS-LABEL: bit_11_nz_select_i64:
; RV32XTHEADBS: # %bb.0:
-; RV32XTHEADBS-NEXT: th.tst a6, a0, 11
; RV32XTHEADBS-NEXT: mv a1, a3
+; RV32XTHEADBS-NEXT: th.tst a3, a0, 11
; RV32XTHEADBS-NEXT: mv a0, a2
-; RV32XTHEADBS-NEXT: bnez a6, .LBB26_2
+; RV32XTHEADBS-NEXT: bnez a3, .LBB26_2
; RV32XTHEADBS-NEXT: # %bb.1:
; RV32XTHEADBS-NEXT: mv a0, a4
; RV32XTHEADBS-NEXT: mv a1, a5
@@ -913,10 +913,10 @@ define i64 @bit_11_nz_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_20_z_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_20_z_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: slli a6, a0, 11
; RV32-NEXT: mv a1, a3
+; RV32-NEXT: slli a3, a0, 11
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: bgez a6, .LBB27_2
+; RV32-NEXT: bgez a3, .LBB27_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
; RV32-NEXT: mv a1, a5
@@ -941,11 +941,11 @@ define i64 @bit_20_z_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_20_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32I-LABEL: bit_20_nz_select_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: slli a0, a0, 11
-; RV32I-NEXT: srli a6, a0, 31
; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: slli a0, a0, 11
+; RV32I-NEXT: srli a3, a0, 31
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: bnez a6, .LBB28_2
+; RV32I-NEXT: bnez a3, .LBB28_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a5
@@ -964,10 +964,10 @@ define i64 @bit_20_nz_select_i64(i64 %a, i64 %b, i64 %c) {
;
; RV32ZBS-LABEL: bit_20_nz_select_i64:
; RV32ZBS: # %bb.0:
-; RV32ZBS-NEXT: bexti a6, a0, 20
; RV32ZBS-NEXT: mv a1, a3
+; RV32ZBS-NEXT: bexti a3, a0, 20
; RV32ZBS-NEXT: mv a0, a2
-; RV32ZBS-NEXT: bnez a6, .LBB28_2
+; RV32ZBS-NEXT: bnez a3, .LBB28_2
; RV32ZBS-NEXT: # %bb.1:
; RV32ZBS-NEXT: mv a0, a4
; RV32ZBS-NEXT: mv a1, a5
@@ -976,10 +976,10 @@ define i64 @bit_20_nz_select_i64(i64 %a, i64 %b, i64 %c) {
;
; RV32XTHEADBS-LABEL: bit_20_nz_select_i64:
; RV32XTHEADBS: # %bb.0:
-; RV32XTHEADBS-NEXT: th.tst a6, a0, 20
; RV32XTHEADBS-NEXT: mv a1, a3
+; RV32XTHEADBS-NEXT: th.tst a3, a0, 20
; RV32XTHEADBS-NEXT: mv a0, a2
-; RV32XTHEADBS-NEXT: bnez a6, .LBB28_2
+; RV32XTHEADBS-NEXT: bnez a3, .LBB28_2
; RV32XTHEADBS-NEXT: # %bb.1:
; RV32XTHEADBS-NEXT: mv a0, a4
; RV32XTHEADBS-NEXT: mv a1, a5
@@ -1021,10 +1021,10 @@ define i64 @bit_31_z_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_31_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_31_nz_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: srli a6, a0, 31
; RV32-NEXT: mv a1, a3
+; RV32-NEXT: srli a3, a0, 31
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: bnez a6, .LBB30_2
+; RV32-NEXT: bnez a3, .LBB30_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
; RV32-NEXT: mv a1, a5
@@ -1049,14 +1049,14 @@ define i64 @bit_31_nz_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_32_z_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_32_z_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: andi a6, a1, 1
-; RV32-NEXT: mv a1, a3
+; RV32-NEXT: andi a1, a1, 1
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: beqz a6, .LBB31_2
+; RV32-NEXT: beqz a1, .LBB31_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
-; RV32-NEXT: mv a1, a5
+; RV32-NEXT: mv a3, a5
; RV32-NEXT: .LBB31_2:
+; RV32-NEXT: mv a1, a3
; RV32-NEXT: ret
;
; RV64-LABEL: bit_32_z_select_i64:
@@ -1077,14 +1077,14 @@ define i64 @bit_32_z_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_32_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_32_nz_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: andi a6, a1, 1
-; RV32-NEXT: mv a1, a3
+; RV32-NEXT: andi a1, a1, 1
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: bnez a6, .LBB32_2
+; RV32-NEXT: bnez a1, .LBB32_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
-; RV32-NEXT: mv a1, a5
+; RV32-NEXT: mv a3, a5
; RV32-NEXT: .LBB32_2:
+; RV32-NEXT: mv a1, a3
; RV32-NEXT: ret
;
; RV64-LABEL: bit_32_nz_select_i64:
@@ -1105,14 +1105,14 @@ define i64 @bit_32_nz_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_55_z_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_55_z_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: slli a6, a1, 8
-; RV32-NEXT: mv a1, a3
+; RV32-NEXT: slli a1, a1, 8
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: bgez a6, .LBB33_2
+; RV32-NEXT: bgez a1, .LBB33_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
-; RV32-NEXT: mv a1, a5
+; RV32-NEXT: mv a3, a5
; RV32-NEXT: .LBB33_2:
+; RV32-NEXT: mv a1, a3
; RV32-NEXT: ret
;
; RV64-LABEL: bit_55_z_select_i64:
@@ -1134,14 +1134,14 @@ define i64 @bit_55_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32I-LABEL: bit_55_nz_select_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a1, 8
-; RV32I-NEXT: srli a6, a1, 31
-; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: srli a1, a1, 31
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: bnez a6, .LBB34_2
+; RV32I-NEXT: bnez a1, .LBB34_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: .LBB34_2:
+; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: ret
;
; RV64-LABEL: bit_55_nz_select_i64:
@@ -1156,26 +1156,26 @@ define i64 @bit_55_nz_select_i64(i64 %a, i64 %b, i64 %c) {
;
; RV32ZBS-LABEL: bit_55_nz_select_i64:
; RV32ZBS: # %bb.0:
-; RV32ZBS-NEXT: bexti a6, a1, 23
-; RV32ZBS-NEXT: mv a1, a3
+; RV32ZBS-NEXT: bexti a1, a1, 23
; RV32ZBS-NEXT: mv a0, a2
-; RV32ZBS-NEXT: bnez a6, .LBB34_2
+; RV32ZBS-NEXT: bnez a1, .LBB34_2
; RV32ZBS-NEXT: # %bb.1:
; RV32ZBS-NEXT: mv a0, a4
-; RV32ZBS-NEXT: mv a1, a5
+; RV32ZBS-NEXT: mv a3, a5
; RV32ZBS-NEXT: .LBB34_2:
+; RV32ZBS-NEXT: mv a1, a3
; RV32ZBS-NEXT: ret
;
; RV32XTHEADBS-LABEL: bit_55_nz_select_i64:
; RV32XTHEADBS: # %bb.0:
-; RV32XTHEADBS-NEXT: th.tst a6, a1, 23
-; RV32XTHEADBS-NEXT: mv a1, a3
+; RV32XTHEADBS-NEXT: th.tst a1, a1, 23
; RV32XTHEADBS-NEXT: mv a0, a2
-; RV32XTHEADBS-NEXT: bnez a6, .LBB34_2
+; RV32XTHEADBS-NEXT: bnez a1, .LBB34_2
; RV32XTHEADBS-NEXT: # %bb.1:
; RV32XTHEADBS-NEXT: mv a0, a4
-; RV32XTHEADBS-NEXT: mv a1, a5
+; RV32XTHEADBS-NEXT: mv a3, a5
; RV32XTHEADBS-NEXT: .LBB34_2:
+; RV32XTHEADBS-NEXT: mv a1, a3
; RV32XTHEADBS-NEXT: ret
%1 = and i64 %a, 36028797018963968
%2 = icmp ne i64 %1, 0
@@ -1212,14 +1212,14 @@ define i64 @bit_63_z_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_63_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_63_nz_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: srli a6, a1, 31
-; RV32-NEXT: mv a1, a3
+; RV32-NEXT: srli a1, a1, 31
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: bnez a6, .LBB36_2
+; RV32-NEXT: bnez a1, .LBB36_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
-; RV32-NEXT: mv a1, a5
+; RV32-NEXT: mv a3, a5
; RV32-NEXT: .LBB36_2:
+; RV32-NEXT: mv a1, a3
; RV32-NEXT: ret
;
; RV64-LABEL: bit_63_nz_select_i64:
@@ -2108,10 +2108,10 @@ define signext i32 @bit_32_1_nz_select_i32(i32 signext %a, i32 signext %b, i32 s
define i64 @bit_10_1_z_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_10_1_z_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: andi a6, a0, 1023
; RV32-NEXT: mv a1, a3
+; RV32-NEXT: andi a3, a0, 1023
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: beqz a6, .LBB71_2
+; RV32-NEXT: beqz a3, .LBB71_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
; RV32-NEXT: mv a1, a5
@@ -2136,10 +2136,10 @@ define i64 @bit_10_1_z_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_10_1_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_10_1_nz_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: andi a6, a0, 1023
; RV32-NEXT: mv a1, a3
+; RV32-NEXT: andi a3, a0, 1023
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: bnez a6, .LBB72_2
+; RV32-NEXT: bnez a3, .LBB72_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
; RV32-NEXT: mv a1, a5
@@ -2164,10 +2164,10 @@ define i64 @bit_10_1_nz_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_11_1_z_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_11_1_z_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: andi a6, a0, 2047
; RV32-NEXT: mv a1, a3
+; RV32-NEXT: andi a3, a0, 2047
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: beqz a6, .LBB73_2
+; RV32-NEXT: beqz a3, .LBB73_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
; RV32-NEXT: mv a1, a5
@@ -2192,10 +2192,10 @@ define i64 @bit_11_1_z_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_11_1_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_11_1_nz_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: andi a6, a0, 2047
; RV32-NEXT: mv a1, a3
+; RV32-NEXT: andi a3, a0, 2047
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: bnez a6, .LBB74_2
+; RV32-NEXT: bnez a3, .LBB74_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
; RV32-NEXT: mv a1, a5
@@ -2220,10 +2220,10 @@ define i64 @bit_11_1_nz_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_16_1_z_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_16_1_z_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: slli a6, a0, 16
; RV32-NEXT: mv a1, a3
+; RV32-NEXT: slli a3, a0, 16
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: beqz a6, .LBB75_2
+; RV32-NEXT: beqz a3, .LBB75_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
; RV32-NEXT: mv a1, a5
@@ -2276,10 +2276,10 @@ define i64 @bit_16_1_nz_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_20_1_z_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_20_1_z_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: slli a6, a0, 12
; RV32-NEXT: mv a1, a3
+; RV32-NEXT: slli a3, a0, 12
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: beqz a6, .LBB77_2
+; RV32-NEXT: beqz a3, .LBB77_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
; RV32-NEXT: mv a1, a5
@@ -2304,10 +2304,10 @@ define i64 @bit_20_1_z_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_20_1_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_20_1_nz_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: slli a6, a0, 12
; RV32-NEXT: mv a1, a3
+; RV32-NEXT: slli a3, a0, 12
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: bnez a6, .LBB78_2
+; RV32-NEXT: bnez a3, .LBB78_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
; RV32-NEXT: mv a1, a5
@@ -2332,10 +2332,10 @@ define i64 @bit_20_1_nz_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_31_1_z_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_31_1_z_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: slli a6, a0, 1
; RV32-NEXT: mv a1, a3
+; RV32-NEXT: slli a3, a0, 1
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: beqz a6, .LBB79_2
+; RV32-NEXT: beqz a3, .LBB79_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
; RV32-NEXT: mv a1, a5
@@ -2360,10 +2360,10 @@ define i64 @bit_31_1_z_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_31_1_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_31_1_nz_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: slli a6, a0, 1
; RV32-NEXT: mv a1, a3
+; RV32-NEXT: slli a3, a0, 1
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: bnez a6, .LBB80_2
+; RV32-NEXT: bnez a3, .LBB80_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
; RV32-NEXT: mv a1, a5
@@ -2444,14 +2444,14 @@ define i64 @bit_55_1_z_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 9
; RV32-NEXT: srli a1, a1, 9
-; RV32-NEXT: or a6, a0, a1
-; RV32-NEXT: mv a1, a3
+; RV32-NEXT: or a1, a0, a1
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: beqz a6, .LBB83_2
+; RV32-NEXT: beqz a1, .LBB83_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
-; RV32-NEXT: mv a1, a5
+; RV32-NEXT: mv a3, a5
; RV32-NEXT: .LBB83_2:
+; RV32-NEXT: mv a1, a3
; RV32-NEXT: ret
;
; RV64-LABEL: bit_55_1_z_select_i64:
@@ -2474,14 +2474,14 @@ define i64 @bit_55_1_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32: # %bb.0:
; RV32-NEXT: slli a1, a1, 9
; RV32-NEXT: srli a1, a1, 9
-; RV32-NEXT: or a6, a0, a1
-; RV32-NEXT: mv a1, a3
+; RV32-NEXT: or a1, a0, a1
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: bnez a6, .LBB84_2
+; RV32-NEXT: bnez a1, .LBB84_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
-; RV32-NEXT: mv a1, a5
+; RV32-NEXT: mv a3, a5
; RV32-NEXT: .LBB84_2:
+; RV32-NEXT: mv a1, a3
; RV32-NEXT: ret
;
; RV64-LABEL: bit_55_1_nz_select_i64:
@@ -2504,14 +2504,14 @@ define i64 @bit_63_1_z_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a1, 1
; RV32I-NEXT: srli a1, a1, 1
-; RV32I-NEXT: or a6, a0, a1
-; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: or a1, a0, a1
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: beqz a6, .LBB85_2
+; RV32I-NEXT: beqz a1, .LBB85_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: .LBB85_2:
+; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: ret
;
; RV64-LABEL: bit_63_1_z_select_i64:
@@ -2527,28 +2527,28 @@ define i64 @bit_63_1_z_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32ZBS-LABEL: bit_63_1_z_select_i64:
; RV32ZBS: # %bb.0:
; RV32ZBS-NEXT: bclri a1, a1, 31
-; RV32ZBS-NEXT: or a6, a0, a1
-; RV32ZBS-NEXT: mv a1, a3
+; RV32ZBS-NEXT: or a1, a0, a1
; RV32ZBS-NEXT: mv a0, a2
-; RV32ZBS-NEXT: beqz a6, .LBB85_2
+; RV32ZBS-NEXT: beqz a1, .LBB85_2
; RV32ZBS-NEXT: # %bb.1:
; RV32ZBS-NEXT: mv a0, a4
-; RV32ZBS-NEXT: mv a1, a5
+; RV32ZBS-NEXT: mv a3, a5
; RV32ZBS-NEXT: .LBB85_2:
+; RV32ZBS-NEXT: mv a1, a3
; RV32ZBS-NEXT: ret
;
; RV32XTHEADBS-LABEL: bit_63_1_z_select_i64:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: slli a1, a1, 1
; RV32XTHEADBS-NEXT: srli a1, a1, 1
-; RV32XTHEADBS-NEXT: or a6, a0, a1
-; RV32XTHEADBS-NEXT: mv a1, a3
+; RV32XTHEADBS-NEXT: or a1, a0, a1
; RV32XTHEADBS-NEXT: mv a0, a2
-; RV32XTHEADBS-NEXT: beqz a6, .LBB85_2
+; RV32XTHEADBS-NEXT: beqz a1, .LBB85_2
; RV32XTHEADBS-NEXT: # %bb.1:
; RV32XTHEADBS-NEXT: mv a0, a4
-; RV32XTHEADBS-NEXT: mv a1, a5
+; RV32XTHEADBS-NEXT: mv a3, a5
; RV32XTHEADBS-NEXT: .LBB85_2:
+; RV32XTHEADBS-NEXT: mv a1, a3
; RV32XTHEADBS-NEXT: ret
%1 = and i64 %a, 9223372036854775807
%2 = icmp eq i64 %1, 0
@@ -2561,14 +2561,14 @@ define i64 @bit_63_1_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32I: # %bb.0:
; RV32I-NEXT: slli a1, a1, 1
; RV32I-NEXT: srli a1, a1, 1
-; RV32I-NEXT: or a6, a0, a1
-; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: or a1, a0, a1
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: bnez a6, .LBB86_2
+; RV32I-NEXT: bnez a1, .LBB86_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: .LBB86_2:
+; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: ret
;
; RV64-LABEL: bit_63_1_nz_select_i64:
@@ -2584,28 +2584,28 @@ define i64 @bit_63_1_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32ZBS-LABEL: bit_63_1_nz_select_i64:
; RV32ZBS: # %bb.0:
; RV32ZBS-NEXT: bclri a1, a1, 31
-; RV32ZBS-NEXT: or a6, a0, a1
-; RV32ZBS-NEXT: mv a1, a3
+; RV32ZBS-NEXT: or a1, a0, a1
; RV32ZBS-NEXT: mv a0, a2
-; RV32ZBS-NEXT: bnez a6, .LBB86_2
+; RV32ZBS-NEXT: bnez a1, .LBB86_2
; RV32ZBS-NEXT: # %bb.1:
; RV32ZBS-NEXT: mv a0, a4
-; RV32ZBS-NEXT: mv a1, a5
+; RV32ZBS-NEXT: mv a3, a5
; RV32ZBS-NEXT: .LBB86_2:
+; RV32ZBS-NEXT: mv a1, a3
; RV32ZBS-NEXT: ret
;
; RV32XTHEADBS-LABEL: bit_63_1_nz_select_i64:
; RV32XTHEADBS: # %bb.0:
; RV32XTHEADBS-NEXT: slli a1, a1, 1
; RV32XTHEADBS-NEXT: srli a1, a1, 1
-; RV32XTHEADBS-NEXT: or a6, a0, a1
-; RV32XTHEADBS-NEXT: mv a1, a3
+; RV32XTHEADBS-NEXT: or a1, a0, a1
; RV32XTHEADBS-NEXT: mv a0, a2
-; RV32XTHEADBS-NEXT: bnez a6, .LBB86_2
+; RV32XTHEADBS-NEXT: bnez a1, .LBB86_2
; RV32XTHEADBS-NEXT: # %bb.1:
; RV32XTHEADBS-NEXT: mv a0, a4
-; RV32XTHEADBS-NEXT: mv a1, a5
+; RV32XTHEADBS-NEXT: mv a3, a5
; RV32XTHEADBS-NEXT: .LBB86_2:
+; RV32XTHEADBS-NEXT: mv a1, a3
; RV32XTHEADBS-NEXT: ret
%1 = and i64 %a, 9223372036854775807
%2 = icmp ne i64 %1, 0
@@ -2616,14 +2616,14 @@ define i64 @bit_63_1_nz_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_64_1_z_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_64_1_z_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: or a6, a0, a1
-; RV32-NEXT: mv a1, a3
+; RV32-NEXT: or a1, a0, a1
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: beqz a6, .LBB87_2
+; RV32-NEXT: beqz a1, .LBB87_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
-; RV32-NEXT: mv a1, a5
+; RV32-NEXT: mv a3, a5
; RV32-NEXT: .LBB87_2:
+; RV32-NEXT: mv a1, a3
; RV32-NEXT: ret
;
; RV64-LABEL: bit_64_1_z_select_i64:
@@ -2643,14 +2643,14 @@ define i64 @bit_64_1_z_select_i64(i64 %a, i64 %b, i64 %c) {
define i64 @bit_64_1_nz_select_i64(i64 %a, i64 %b, i64 %c) {
; RV32-LABEL: bit_64_1_nz_select_i64:
; RV32: # %bb.0:
-; RV32-NEXT: or a6, a0, a1
-; RV32-NEXT: mv a1, a3
+; RV32-NEXT: or a1, a0, a1
; RV32-NEXT: mv a0, a2
-; RV32-NEXT: bnez a6, .LBB88_2
+; RV32-NEXT: bnez a1, .LBB88_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a0, a4
-; RV32-NEXT: mv a1, a5
+; RV32-NEXT: mv a3, a5
; RV32-NEXT: .LBB88_2:
+; RV32-NEXT: mv a1, a3
; RV32-NEXT: ret
;
; RV64-LABEL: bit_64_1_nz_select_i64:
diff --git a/llvm/test/CodeGen/RISCV/branch-relaxation.ll b/llvm/test/CodeGen/RISCV/branch-relaxation.ll
index b1efeaa2be2d99..a9c596cebcf2ae 100644
--- a/llvm/test/CodeGen/RISCV/branch-relaxation.ll
+++ b/llvm/test/CodeGen/RISCV/branch-relaxation.ll
@@ -1222,21 +1222,21 @@ branch_2:
define void @relax_jal_spill_64() {
; CHECK-RV32-LABEL: relax_jal_spill_64:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: addi sp, sp, -272
-; CHECK-RV32-NEXT: .cfi_def_cfa_offset 272
-; CHECK-RV32-NEXT: sw ra, 268(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s0, 264(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s1, 260(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s2, 256(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s3, 252(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s4, 248(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s5, 244(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s6, 240(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s7, 236(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s8, 232(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s9, 228(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s10, 224(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s11, 220(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: addi sp, sp, -240
+; CHECK-RV32-NEXT: .cfi_def_cfa_offset 240
+; CHECK-RV32-NEXT: sw ra, 236(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s0, 232(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s1, 228(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s2, 224(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s3, 220(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s4, 216(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s5, 212(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s6, 208(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s7, 204(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s8, 200(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s9, 196(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s10, 192(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s11, 188(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: .cfi_offset ra, -4
; CHECK-RV32-NEXT: .cfi_offset s0, -8
; CHECK-RV32-NEXT: .cfi_offset s1, -12
@@ -1256,143 +1256,144 @@ define void @relax_jal_spill_64() {
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li t0, 5
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw t0, 216(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw t1, 212(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw t0, 180(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw t1, 176(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li t1, 6
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw t1, 208(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw t2, 204(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw t2, 172(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li t2, 7
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw t2, 200(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw t3, 196(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw t3, 168(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s0, 8
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw s0, 192(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s1, 188(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s0, 164(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s1, 160(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s1, 9
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw s1, 184(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s2, 180(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s2, 156(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li a0, 10
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw a1, 176(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw a1, 152(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li a1, 11
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw a1, 172(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw a2, 168(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw a2, 148(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li a2, 12
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw a2, 164(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw a3, 160(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw a3, 144(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li a3, 13
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw a3, 156(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw a4, 152(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw a4, 140(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li a4, 14
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw a4, 148(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw a5, 144(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw a5, 136(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li a5, 15
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw a5, 140(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw a6, 136(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw a6, 132(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li a6, 16
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw a6, 132(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: sw a7, 128(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li a7, 17
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw a7, 124(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw t0, 120(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s2, 18
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw s2, 116(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s3, 112(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s3, 124(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s3, 19
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw s3, 108(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s4, 104(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s4, 120(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s4, 20
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw s4, 100(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s5, 96(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s5, 116(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s5, 21
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw s5, 92(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s6, 88(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s6, 112(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s6, 22
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw s6, 84(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s7, 80(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s7, 108(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s7, 23
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw s7, 76(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s8, 72(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s8, 104(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s8, 24
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw s8, 68(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s9, 64(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s9, 100(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s9, 25
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw s9, 60(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s10, 56(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s10, 96(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s10, 26
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw s10, 52(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s11, 48(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s11, 92(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s11, 27
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw s11, 44(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li t3, 28
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw t3, 40(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw t4, 36(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw t4, 88(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li t4, 29
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw t4, 32(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw t5, 28(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw t5, 84(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li t5, 30
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: mv a1, t6
+; CHECK-RV32-NEXT: sw t5, 184(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: mv t5, t6
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li t6, 31
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: sw a1, 24(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: xor a1, a1, s0
-; CHECK-RV32-NEXT: sw t6, 20(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: sw t5, 16(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: xor a2, t5, t6
-; CHECK-RV32-NEXT: or a1, a2, a1
-; CHECK-RV32-NEXT: beqz a1, .LBB4_1
+; CHECK-RV32-NEXT: sw t5, 80(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: xor s0, t5, s0
+; CHECK-RV32-NEXT: sw t6, 76(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lw t5, 184(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: xor t6, t5, t6
+; CHECK-RV32-NEXT: or s0, t6, s0
+; CHECK-RV32-NEXT: mv t5, s1
+; CHECK-RV32-NEXT: mv t6, a1
+; CHECK-RV32-NEXT: sw a2, 8(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw a3, 12(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s2, 24(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s3, 32(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s4, 40(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s5, 48(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: mv s3, a4
+; CHECK-RV32-NEXT: mv s5, a5
+; CHECK-RV32-NEXT: mv a2, t1
+; CHECK-RV32-NEXT: mv a1, t2
+; CHECK-RV32-NEXT: mv s4, a6
+; CHECK-RV32-NEXT: sw a7, 20(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw t0, 16(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s6, 28(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s7, 36(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s8, 44(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s9, 52(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s10, 56(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s11, 60(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw t3, 64(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw t4, 68(sp) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: beqz s0, .LBB4_1
; CHECK-RV32-NEXT: # %bb.3:
-; CHECK-RV32-NEXT: jump .LBB4_2, a1
+; CHECK-RV32-NEXT: jump .LBB4_2, a3
; CHECK-RV32-NEXT: .LBB4_1: # %branch_1
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: .zero 1048576
@@ -1401,152 +1402,152 @@ define void @relax_jal_spill_64() {
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use ra
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw t0, 216(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw t1, 212(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t0, 180(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t1, 176(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use t0
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw t1, 208(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw t2, 204(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: mv t1, a2
+; CHECK-RV32-NEXT: lw t2, 172(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use t1
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw t2, 200(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw t3, 196(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: mv t2, a1
+; CHECK-RV32-NEXT: lw t3, 168(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use t2
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw s0, 192(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s1, 188(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s0, 164(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s1, 160(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s0
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw s1, 184(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s2, 180(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: mv s1, t5
+; CHECK-RV32-NEXT: lw s2, 156(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s1
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw a1, 176(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw a1, 152(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use a0
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw a1, 172(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw a2, 168(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: mv a1, t6
+; CHECK-RV32-NEXT: lw a2, 148(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use a1
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw a2, 164(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw a3, 160(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw a2, 8(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw a3, 144(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use a2
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw a3, 156(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw a4, 152(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw a3, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw a4, 140(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use a3
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw a4, 148(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw a5, 144(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: mv a4, s3
+; CHECK-RV32-NEXT: lw a5, 136(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use a4
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw a5, 140(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw a6, 136(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: mv a5, s5
+; CHECK-RV32-NEXT: lw a6, 132(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use a5
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw a6, 132(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: mv a6, s4
; CHECK-RV32-NEXT: lw a7, 128(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use a6
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw a7, 124(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw t0, 120(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw a7, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t0, 16(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use a7
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw s2, 116(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s3, 112(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s2, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s3, 124(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s2
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw s3, 108(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s4, 104(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s3, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s3
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw s4, 100(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s5, 96(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s4, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s5, 116(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s4
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw s5, 92(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s6, 88(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s5, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s6, 112(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s5
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw s6, 84(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s7, 80(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s6, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s7, 108(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s6
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw s7, 76(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s8, 72(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s7, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s8, 104(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s7
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw s8, 68(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s9, 64(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s8, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s9, 100(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s8
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw s9, 60(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s10, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s9, 52(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s10, 96(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s9
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw s10, 52(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s11, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s10, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s11, 92(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s10
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw s11, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s11, 60(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s11
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw t3, 40(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw t4, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t3, 64(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t4, 88(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use t3
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw t4, 32(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw t5, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t4, 68(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t5, 84(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use t4
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw t5, 16(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw t6, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t5, 184(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t6, 80(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use t5
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw t6, 20(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t6, 76(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use t6
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lw ra, 268(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s0, 264(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s1, 260(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s2, 256(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s3, 252(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s4, 248(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s5, 244(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s6, 240(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s7, 236(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s8, 232(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s9, 228(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s10, 224(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lw s11, 220(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw ra, 236(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s0, 232(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s1, 228(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s2, 224(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s3, 220(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s4, 216(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s5, 212(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s6, 208(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s7, 204(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s8, 200(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s9, 196(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s10, 192(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s11, 188(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: .cfi_restore ra
; CHECK-RV32-NEXT: .cfi_restore s0
; CHECK-RV32-NEXT: .cfi_restore s1
@@ -1560,7 +1561,7 @@ define void @relax_jal_spill_64() {
; CHECK-RV32-NEXT: .cfi_restore s9
; CHECK-RV32-NEXT: .cfi_restore s10
; CHECK-RV32-NEXT: .cfi_restore s11
-; CHECK-RV32-NEXT: addi sp, sp, 272
+; CHECK-RV32-NEXT: addi sp, sp, 240
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 0
; CHECK-RV32-NEXT: ret
;
@@ -1917,489 +1918,476 @@ define void @relax_jal_spill_64_adjust_spill_slot() {
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li t0, 5
; CHECK-RV32-NEXT: #NO_APP
+; CHECK-RV32-NEXT: sw ra, 0(sp)
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: sw t0, -4(a0) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw t0, -8(a0) # 4-byte Folded Spill
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: sw t1, -8(a0) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw t1, -12(a0) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li t1, 6
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: sw t1, -12(a0) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: sw t2, -16(a0) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li t2, 7
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: sw t2, -20(a0) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: sw t3, -24(a0) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw t3, -20(a0) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s0, 8
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: sw s0, -28(a0) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s1, -24(a0) # 4-byte Folded Spill
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: sw s1, -32(a0) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s0, -52(a0) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s1, 9
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: sw s1, -36(a0) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: sw s2, -40(a0) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw s2, -28(a0) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li a0, 10
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a2, 1
; CHECK-RV32-NEXT: add a2, sp, a2
-; CHECK-RV32-NEXT: sw a1, -44(a2) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw a1, -32(a2) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li a1, 11
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a3, 1
; CHECK-RV32-NEXT: add a3, sp, a3
-; CHECK-RV32-NEXT: sw a1, -48(a3) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw a2, -52(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: sw a2, -36(a3) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li a2, 12
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw a2, -56(a1) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw a3, -60(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a4, 1
+; CHECK-RV32-NEXT: add a4, sp, a4
+; CHECK-RV32-NEXT: sw a3, -40(a4) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li a3, 13
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw a3, -64(a1) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw a4, -68(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a5, 1
+; CHECK-RV32-NEXT: add a5, sp, a5
+; CHECK-RV32-NEXT: sw a4, -44(a5) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li a4, 14
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw a4, -72(a1) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw a5, -76(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a6, 1
+; CHECK-RV32-NEXT: add a6, sp, a6
+; CHECK-RV32-NEXT: sw a5, -48(a6) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li a5, 15
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw a5, -80(a1) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw a6, -84(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a7, 1
+; CHECK-RV32-NEXT: add a7, sp, a7
+; CHECK-RV32-NEXT: sw a6, -56(a7) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li a6, 16
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw a6, -88(a1) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw a7, -92(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui t0, 1
+; CHECK-RV32-NEXT: add t0, sp, t0
+; CHECK-RV32-NEXT: sw a7, -60(t0) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li a7, 17
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw a7, -96(a1) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw t0, -100(a1) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s2, 18
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s2, -104(a1) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s3, -108(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui t3, 1
+; CHECK-RV32-NEXT: add t3, sp, t3
+; CHECK-RV32-NEXT: sw s3, -64(t3) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s3, 19
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s3, -112(a1) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s4, -116(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui t3, 1
+; CHECK-RV32-NEXT: add t3, sp, t3
+; CHECK-RV32-NEXT: sw s4, -68(t3) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s4, 20
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s4, -120(a1) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s5, -124(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui t3, 1
+; CHECK-RV32-NEXT: add t3, sp, t3
+; CHECK-RV32-NEXT: sw s5, -72(t3) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s5, 21
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s5, -128(a1) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s6, -132(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui t3, 1
+; CHECK-RV32-NEXT: add t3, sp, t3
+; CHECK-RV32-NEXT: sw s6, -76(t3) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s6, 22
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s6, -136(a1) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s7, -140(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui t3, 1
+; CHECK-RV32-NEXT: add t3, sp, t3
+; CHECK-RV32-NEXT: sw s7, -80(t3) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s7, 23
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s7, -144(a1) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s8, -148(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui t3, 1
+; CHECK-RV32-NEXT: add t3, sp, t3
+; CHECK-RV32-NEXT: sw s8, -84(t3) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s8, 24
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s8, -152(a1) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s9, -156(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui t3, 1
+; CHECK-RV32-NEXT: add t3, sp, t3
+; CHECK-RV32-NEXT: sw s9, -88(t3) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s9, 25
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s9, -160(a1) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s10, -164(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui t3, 1
+; CHECK-RV32-NEXT: add t3, sp, t3
+; CHECK-RV32-NEXT: sw s10, -92(t3) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s10, 26
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s10, -168(a1) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s11, -172(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui t3, 1
+; CHECK-RV32-NEXT: add t3, sp, t3
+; CHECK-RV32-NEXT: sw s11, -96(t3) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li s11, 27
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw s11, -176(a1) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li t3, 28
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw t3, -180(a1) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw t4, -184(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui t5, 1
+; CHECK-RV32-NEXT: add t5, sp, t5
+; CHECK-RV32-NEXT: sw t4, -100(t5) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li t4, 29
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw t4, -188(a1) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: sw t5, -192(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui t6, 1
+; CHECK-RV32-NEXT: add t6, sp, t6
+; CHECK-RV32-NEXT: sw t5, -104(t6) # 4-byte Folded Spill
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li t5, 30
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: mv a1, t6
+; CHECK-RV32-NEXT: lui ra, 1
+; CHECK-RV32-NEXT: add ra, sp, ra
+; CHECK-RV32-NEXT: sw t5, -4(ra) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: mv t5, t6
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: li t6, 31
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a2, 1
-; CHECK-RV32-NEXT: add a2, sp, a2
-; CHECK-RV32-NEXT: sw s0, -208(a2) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a2, 1
-; CHECK-RV32-NEXT: add a2, sp, a2
-; CHECK-RV32-NEXT: sw a1, -196(a2) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: xor a1, a1, s0
-; CHECK-RV32-NEXT: lui a2, 1
-; CHECK-RV32-NEXT: add a2, sp, a2
-; CHECK-RV32-NEXT: sw t6, -200(a2) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: lui a2, 1
-; CHECK-RV32-NEXT: add a2, sp, a2
-; CHECK-RV32-NEXT: sw t5, -204(a2) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: xor a2, t5, t6
-; CHECK-RV32-NEXT: or a1, a2, a1
-; CHECK-RV32-NEXT: beqz a1, .LBB5_1
+; CHECK-RV32-NEXT: lui ra, 1
+; CHECK-RV32-NEXT: add ra, sp, ra
+; CHECK-RV32-NEXT: sw t5, -108(ra) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: xor t5, t5, s0
+; CHECK-RV32-NEXT: lui ra, 1
+; CHECK-RV32-NEXT: add ra, sp, ra
+; CHECK-RV32-NEXT: sw t5, -116(ra) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lw ra, 0(sp)
+; CHECK-RV32-NEXT: lui t5, 1
+; CHECK-RV32-NEXT: add t5, sp, t5
+; CHECK-RV32-NEXT: sw t6, -112(t5) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui t5, 1
+; CHECK-RV32-NEXT: add t5, sp, t5
+; CHECK-RV32-NEXT: lw t5, -4(t5) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: xor t6, t5, t6
+; CHECK-RV32-NEXT: lui t5, 1
+; CHECK-RV32-NEXT: add t5, sp, t5
+; CHECK-RV32-NEXT: lw t5, -116(t5) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: or t6, t6, t5
+; CHECK-RV32-NEXT: mv t5, s1
+; CHECK-RV32-NEXT: lui s1, 1
+; CHECK-RV32-NEXT: add s1, sp, s1
+; CHECK-RV32-NEXT: sw a1, -184(s1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a1, 1
+; CHECK-RV32-NEXT: add a1, sp, a1
+; CHECK-RV32-NEXT: sw a2, -180(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a1, 1
+; CHECK-RV32-NEXT: add a1, sp, a1
+; CHECK-RV32-NEXT: sw a3, -176(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a1, 1
+; CHECK-RV32-NEXT: add a1, sp, a1
+; CHECK-RV32-NEXT: sw s2, -164(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a1, 1
+; CHECK-RV32-NEXT: add a1, sp, a1
+; CHECK-RV32-NEXT: sw s3, -156(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a1, 1
+; CHECK-RV32-NEXT: add a1, sp, a1
+; CHECK-RV32-NEXT: sw s4, -148(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a1, 1
+; CHECK-RV32-NEXT: add a1, sp, a1
+; CHECK-RV32-NEXT: sw s5, -140(a1) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: mv s3, a4
+; CHECK-RV32-NEXT: mv s5, a5
+; CHECK-RV32-NEXT: mv a2, t1
+; CHECK-RV32-NEXT: mv a1, t2
+; CHECK-RV32-NEXT: mv s4, a6
+; CHECK-RV32-NEXT: lui a3, 1
+; CHECK-RV32-NEXT: add a3, sp, a3
+; CHECK-RV32-NEXT: sw a7, -168(a3) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a3, 1
+; CHECK-RV32-NEXT: add a3, sp, a3
+; CHECK-RV32-NEXT: sw t0, -172(a3) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a3, 1
+; CHECK-RV32-NEXT: add a3, sp, a3
+; CHECK-RV32-NEXT: sw s6, -160(a3) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a3, 1
+; CHECK-RV32-NEXT: add a3, sp, a3
+; CHECK-RV32-NEXT: sw s7, -152(a3) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a3, 1
+; CHECK-RV32-NEXT: add a3, sp, a3
+; CHECK-RV32-NEXT: sw s8, -144(a3) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a3, 1
+; CHECK-RV32-NEXT: add a3, sp, a3
+; CHECK-RV32-NEXT: sw s9, -136(a3) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a3, 1
+; CHECK-RV32-NEXT: add a3, sp, a3
+; CHECK-RV32-NEXT: sw s10, -132(a3) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a3, 1
+; CHECK-RV32-NEXT: add a3, sp, a3
+; CHECK-RV32-NEXT: sw s11, -128(a3) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a3, 1
+; CHECK-RV32-NEXT: add a3, sp, a3
+; CHECK-RV32-NEXT: sw t3, -124(a3) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a3, 1
+; CHECK-RV32-NEXT: add a3, sp, a3
+; CHECK-RV32-NEXT: sw t4, -120(a3) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: lui a3, 1
+; CHECK-RV32-NEXT: add a3, sp, a3
+; CHECK-RV32-NEXT: sw s0, -116(a3) # 4-byte Folded Spill
+; CHECK-RV32-NEXT: beqz t6, .LBB5_1
; CHECK-RV32-NEXT: # %bb.3:
-; CHECK-RV32-NEXT: jump .LBB5_2, a1
+; CHECK-RV32-NEXT: jump .LBB5_2, a3
; CHECK-RV32-NEXT: .LBB5_1: # %branch_1
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: .zero 1048576
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: .LBB5_2: # %branch_2
+; CHECK-RV32-NEXT: lui a3, 1
+; CHECK-RV32-NEXT: add a3, sp, a3
+; CHECK-RV32-NEXT: lw s0, -52(a3) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use ra
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: lw t0, -4(a1) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: lw t1, -8(a1) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lui a3, 1
+; CHECK-RV32-NEXT: add a3, sp, a3
+; CHECK-RV32-NEXT: lw t0, -8(a3) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lui a3, 1
+; CHECK-RV32-NEXT: add a3, sp, a3
+; CHECK-RV32-NEXT: lw t1, -12(a3) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use t0
; CHECK-RV32-NEXT: #NO_APP
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: lw t1, -12(a1) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: lw t2, -16(a1) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: mv t1, a2
+; CHECK-RV32-NEXT: lui a2, 1
+; CHECK-RV32-NEXT: add a2, sp, a2
+; CHECK-RV32-NEXT: lw t2, -16(a2) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use t1
; CHECK-RV32-NEXT: #NO_APP
+; CHECK-RV32-NEXT: mv t2, a1
; CHECK-RV32-NEXT: lui a1, 1
; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: lw t2, -20(a1) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: lw t3, -24(a1) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t3, -20(a1) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use t2
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a1, 1
; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: lw s0, -28(a1) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: lw s1, -32(a1) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s1, -24(a1) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s0
; CHECK-RV32-NEXT: #NO_APP
+; CHECK-RV32-NEXT: mv s1, t5
; CHECK-RV32-NEXT: lui a1, 1
; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: lw s1, -36(a1) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lui a1, 1
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: lw s2, -40(a1) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s2, -28(a1) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s1
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a1, 1
; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: lw a1, -44(a1) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw a1, -32(a1) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use a0
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw a1, -48(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw a1, -184(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw a2, -52(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw a2, -36(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use a1
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw a2, -56(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw a2, -180(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw a3, -60(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw a3, -40(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use a2
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw a3, -64(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw a3, -176(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw a4, -68(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw a4, -44(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use a3
; CHECK-RV32-NEXT: #NO_APP
+; CHECK-RV32-NEXT: mv a4, s3
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw a4, -72(a0) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lui a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw a5, -76(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw a5, -48(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use a4
; CHECK-RV32-NEXT: #NO_APP
+; CHECK-RV32-NEXT: mv a5, s5
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw a5, -80(a0) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lui a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw a6, -84(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw a6, -56(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use a5
; CHECK-RV32-NEXT: #NO_APP
+; CHECK-RV32-NEXT: mv a6, s4
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw a6, -88(a0) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: lui a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw a7, -92(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw a7, -60(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use a6
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw a7, -96(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw a7, -168(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw t0, -100(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t0, -172(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use a7
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s2, -104(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s2, -164(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s3, -108(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s3, -64(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s2
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s3, -112(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s3, -156(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s4, -116(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s4, -68(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s3
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s4, -120(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s4, -148(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s5, -124(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s5, -72(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s4
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s5, -128(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s5, -140(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s6, -132(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s6, -76(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s5
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s6, -136(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s6, -160(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s7, -140(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s7, -80(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s6
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s7, -144(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s7, -152(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s8, -148(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s8, -84(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s7
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s8, -152(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s8, -144(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s9, -156(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s9, -88(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s8
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s9, -160(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s9, -136(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s10, -164(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s10, -92(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s9
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s10, -168(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s10, -132(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s11, -172(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s11, -96(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s10
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s11, -176(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s11, -128(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use s11
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw t3, -180(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t3, -124(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw t4, -184(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t4, -100(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use t3
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw t4, -188(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t4, -120(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw t5, -192(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t5, -104(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use t4
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw t5, -204(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t5, -4(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw t6, -196(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t6, -108(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use t5
; CHECK-RV32-NEXT: #NO_APP
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw s0, -208(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw t6, -112(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lui a0, 1
; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: lw t6, -200(a0) # 4-byte Folded Reload
+; CHECK-RV32-NEXT: lw s0, -116(a0) # 4-byte Folded Reload
; CHECK-RV32-NEXT: #APP
; CHECK-RV32-NEXT: # reg use t6
; CHECK-RV32-NEXT: #NO_APP
diff --git a/llvm/test/CodeGen/RISCV/condops.ll b/llvm/test/CodeGen/RISCV/condops.ll
index 622365cf13bcef..fbc9bc39942dd3 100644
--- a/llvm/test/CodeGen/RISCV/condops.ll
+++ b/llvm/test/CodeGen/RISCV/condops.ll
@@ -1374,14 +1374,14 @@ define i64 @seteq(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
; RV32I: # %bb.0:
; RV32I-NEXT: xor a1, a1, a3
; RV32I-NEXT: xor a0, a0, a2
-; RV32I-NEXT: or a2, a0, a1
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: or a1, a0, a1
; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: beqz a2, .LBB23_2
+; RV32I-NEXT: beqz a1, .LBB23_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a6
-; RV32I-NEXT: mv a1, a7
+; RV32I-NEXT: mv a5, a7
; RV32I-NEXT: .LBB23_2:
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: ret
;
; RV64I-LABEL: seteq:
@@ -1451,14 +1451,14 @@ define i64 @setne(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
; RV32I: # %bb.0:
; RV32I-NEXT: xor a1, a1, a3
; RV32I-NEXT: xor a0, a0, a2
-; RV32I-NEXT: or a2, a0, a1
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: or a1, a0, a1
; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: bnez a2, .LBB24_2
+; RV32I-NEXT: bnez a1, .LBB24_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a6
-; RV32I-NEXT: mv a1, a7
+; RV32I-NEXT: mv a5, a7
; RV32I-NEXT: .LBB24_2:
+; RV32I-NEXT: mv a1, a5
; RV32I-NEXT: ret
;
; RV64I-LABEL: setne:
@@ -2222,14 +2222,14 @@ define i64 @setule(i64 %a, i64 %b, i64 %rs1, i64 %rs2) {
define i64 @seteq_zero(i64 %a, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: seteq_zero:
; RV32I: # %bb.0:
-; RV32I-NEXT: or a6, a0, a1
-; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: or a1, a0, a1
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: beqz a6, .LBB33_2
+; RV32I-NEXT: beqz a1, .LBB33_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: .LBB33_2:
+; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: seteq_zero:
@@ -2290,14 +2290,14 @@ define i64 @seteq_zero(i64 %a, i64 %rs1, i64 %rs2) {
define i64 @setne_zero(i64 %a, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: setne_zero:
; RV32I: # %bb.0:
-; RV32I-NEXT: or a6, a0, a1
-; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: or a1, a0, a1
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: bnez a6, .LBB34_2
+; RV32I-NEXT: bnez a1, .LBB34_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: .LBB34_2:
+; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: setne_zero:
@@ -2359,14 +2359,14 @@ define i64 @seteq_constant(i64 %a, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: seteq_constant:
; RV32I: # %bb.0:
; RV32I-NEXT: xori a0, a0, 123
-; RV32I-NEXT: or a6, a0, a1
-; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: or a1, a0, a1
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: beqz a6, .LBB35_2
+; RV32I-NEXT: beqz a1, .LBB35_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: .LBB35_2:
+; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: seteq_constant:
@@ -2434,14 +2434,14 @@ define i64 @setne_constant(i64 %a, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: setne_constant:
; RV32I: # %bb.0:
; RV32I-NEXT: xori a0, a0, 456
-; RV32I-NEXT: or a6, a0, a1
-; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: or a1, a0, a1
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: bnez a6, .LBB36_2
+; RV32I-NEXT: bnez a1, .LBB36_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: .LBB36_2:
+; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: setne_constant:
@@ -2509,14 +2509,14 @@ define i64 @seteq_2048(i64 %a, i64 %rs1, i64 %rs2) {
; RV32I-LABEL: seteq_2048:
; RV32I: # %bb.0:
; RV32I-NEXT: binvi a0, a0, 11
-; RV32I-NEXT: or a6, a0, a1
-; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: or a1, a0, a1
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: beqz a6, .LBB37_2
+; RV32I-NEXT: beqz a1, .LBB37_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: .LBB37_2:
+; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: seteq_2048:
@@ -2585,14 +2585,14 @@ define i64 @seteq_neg2048(i64 %a, i64 %rs1, i64 %rs2) {
; RV32I: # %bb.0:
; RV32I-NEXT: not a1, a1
; RV32I-NEXT: xori a0, a0, -2048
-; RV32I-NEXT: or a6, a0, a1
-; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: or a1, a0, a1
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: beqz a6, .LBB38_2
+; RV32I-NEXT: beqz a1, .LBB38_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: .LBB38_2:
+; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: seteq_neg2048:
@@ -2663,14 +2663,14 @@ define i64 @setne_neg2048(i64 %a, i64 %rs1, i64 %rs2) {
; RV32I: # %bb.0:
; RV32I-NEXT: not a1, a1
; RV32I-NEXT: xori a0, a0, -2048
-; RV32I-NEXT: or a6, a0, a1
-; RV32I-NEXT: mv a1, a3
+; RV32I-NEXT: or a1, a0, a1
; RV32I-NEXT: mv a0, a2
-; RV32I-NEXT: bnez a6, .LBB39_2
+; RV32I-NEXT: bnez a1, .LBB39_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a3, a5
; RV32I-NEXT: .LBB39_2:
+; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: ret
;
; RV64I-LABEL: setne_neg2048:
diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll
index a8b141618bbb3a..a72055ab2baa3d 100644
--- a/llvm/test/CodeGen/RISCV/double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/double-convert.ll
@@ -1629,8 +1629,8 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
; RV32IFD-NEXT: lui a0, %hi(.LCPI26_1)
; RV32IFD-NEXT: fld fa4, %lo(.LCPI26_1)(a0)
; RV32IFD-NEXT: feq.d a0, fa0, fa0
-; RV32IFD-NEXT: neg a0, a0
; RV32IFD-NEXT: fmax.d fa5, fa0, fa5
+; RV32IFD-NEXT: neg a0, a0
; RV32IFD-NEXT: fmin.d fa5, fa5, fa4
; RV32IFD-NEXT: fcvt.w.d a1, fa5, rtz
; RV32IFD-NEXT: and a0, a0, a1
@@ -1643,8 +1643,8 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
; RV64IFD-NEXT: lui a0, %hi(.LCPI26_1)
; RV64IFD-NEXT: fld fa4, %lo(.LCPI26_1)(a0)
; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: neg a0, a0
; RV64IFD-NEXT: fmax.d fa5, fa0, fa5
+; RV64IFD-NEXT: neg a0, a0
; RV64IFD-NEXT: fmin.d fa5, fa5, fa4
; RV64IFD-NEXT: fcvt.l.d a1, fa5, rtz
; RV64IFD-NEXT: and a0, a0, a1
@@ -1658,26 +1658,26 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI26_1)
; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI26_1+4)(a4)
; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI26_1)(a4)
-; RV32IZFINXZDINX-NEXT: feq.d a6, a0, a0
-; RV32IZFINXZDINX-NEXT: neg a6, a6
-; RV32IZFINXZDINX-NEXT: fmax.d a0, a0, a2
-; RV32IZFINXZDINX-NEXT: fmin.d a0, a0, a4
-; RV32IZFINXZDINX-NEXT: fcvt.w.d a0, a0, rtz
-; RV32IZFINXZDINX-NEXT: and a0, a6, a0
+; RV32IZFINXZDINX-NEXT: fmax.d a2, a0, a2
+; RV32IZFINXZDINX-NEXT: feq.d a0, a0, a0
+; RV32IZFINXZDINX-NEXT: neg a0, a0
+; RV32IZFINXZDINX-NEXT: fmin.d a2, a2, a4
+; RV32IZFINXZDINX-NEXT: fcvt.w.d a1, a2, rtz
+; RV32IZFINXZDINX-NEXT: and a0, a0, a1
; RV32IZFINXZDINX-NEXT: ret
;
; RV64IZFINXZDINX-LABEL: fcvt_w_s_sat_i16:
; RV64IZFINXZDINX: # %bb.0: # %start
-; RV64IZFINXZDINX-NEXT: feq.d a1, a0, a0
-; RV64IZFINXZDINX-NEXT: neg a1, a1
+; RV64IZFINXZDINX-NEXT: li a1, -505
+; RV64IZFINXZDINX-NEXT: slli a1, a1, 53
; RV64IZFINXZDINX-NEXT: lui a2, %hi(.LCPI26_0)
; RV64IZFINXZDINX-NEXT: ld a2, %lo(.LCPI26_0)(a2)
-; RV64IZFINXZDINX-NEXT: li a3, -505
-; RV64IZFINXZDINX-NEXT: slli a3, a3, 53
-; RV64IZFINXZDINX-NEXT: fmax.d a0, a0, a3
-; RV64IZFINXZDINX-NEXT: fmin.d a0, a0, a2
-; RV64IZFINXZDINX-NEXT: fcvt.l.d a0, a0, rtz
-; RV64IZFINXZDINX-NEXT: and a0, a1, a0
+; RV64IZFINXZDINX-NEXT: fmax.d a1, a0, a1
+; RV64IZFINXZDINX-NEXT: feq.d a0, a0, a0
+; RV64IZFINXZDINX-NEXT: neg a0, a0
+; RV64IZFINXZDINX-NEXT: fmin.d a1, a1, a2
+; RV64IZFINXZDINX-NEXT: fcvt.l.d a1, a1, rtz
+; RV64IZFINXZDINX-NEXT: and a0, a0, a1
; RV64IZFINXZDINX-NEXT: ret
;
; RV32I-LABEL: fcvt_w_s_sat_i16:
@@ -2004,8 +2004,8 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
; RV32IFD-NEXT: lui a0, %hi(.LCPI30_1)
; RV32IFD-NEXT: fld fa4, %lo(.LCPI30_1)(a0)
; RV32IFD-NEXT: feq.d a0, fa0, fa0
-; RV32IFD-NEXT: neg a0, a0
; RV32IFD-NEXT: fmax.d fa5, fa0, fa5
+; RV32IFD-NEXT: neg a0, a0
; RV32IFD-NEXT: fmin.d fa5, fa5, fa4
; RV32IFD-NEXT: fcvt.w.d a1, fa5, rtz
; RV32IFD-NEXT: and a0, a0, a1
@@ -2018,8 +2018,8 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
; RV64IFD-NEXT: lui a0, %hi(.LCPI30_1)
; RV64IFD-NEXT: fld fa4, %lo(.LCPI30_1)(a0)
; RV64IFD-NEXT: feq.d a0, fa0, fa0
-; RV64IFD-NEXT: neg a0, a0
; RV64IFD-NEXT: fmax.d fa5, fa0, fa5
+; RV64IFD-NEXT: neg a0, a0
; RV64IFD-NEXT: fmin.d fa5, fa5, fa4
; RV64IFD-NEXT: fcvt.l.d a1, fa5, rtz
; RV64IFD-NEXT: and a0, a0, a1
@@ -2033,12 +2033,12 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI30_1)
; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI30_1+4)(a4)
; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI30_1)(a4)
-; RV32IZFINXZDINX-NEXT: feq.d a6, a0, a0
-; RV32IZFINXZDINX-NEXT: neg a6, a6
-; RV32IZFINXZDINX-NEXT: fmax.d a0, a0, a2
-; RV32IZFINXZDINX-NEXT: fmin.d a0, a0, a4
-; RV32IZFINXZDINX-NEXT: fcvt.w.d a0, a0, rtz
-; RV32IZFINXZDINX-NEXT: and a0, a6, a0
+; RV32IZFINXZDINX-NEXT: fmax.d a2, a0, a2
+; RV32IZFINXZDINX-NEXT: feq.d a0, a0, a0
+; RV32IZFINXZDINX-NEXT: neg a0, a0
+; RV32IZFINXZDINX-NEXT: fmin.d a2, a2, a4
+; RV32IZFINXZDINX-NEXT: fcvt.w.d a1, a2, rtz
+; RV32IZFINXZDINX-NEXT: and a0, a0, a1
; RV32IZFINXZDINX-NEXT: ret
;
; RV64IZFINXZDINX-LABEL: fcvt_w_s_sat_i8:
diff --git a/llvm/test/CodeGen/RISCV/double-mem.ll b/llvm/test/CodeGen/RISCV/double-mem.ll
index 38cb52b6f4b302..dba9489e7511db 100644
--- a/llvm/test/CodeGen/RISCV/double-mem.ll
+++ b/llvm/test/CodeGen/RISCV/double-mem.ll
@@ -93,17 +93,17 @@ define dso_local double @fld_fsd_global(double %a, double %b) nounwind {
;
; RV32IZFINXZDINX-LABEL: fld_fsd_global:
; RV32IZFINXZDINX: # %bb.0:
+; RV32IZFINXZDINX-NEXT: lui a4, %hi(G)
; RV32IZFINXZDINX-NEXT: fadd.d a0, a0, a2
-; RV32IZFINXZDINX-NEXT: lui a2, %hi(G)
-; RV32IZFINXZDINX-NEXT: lw a4, %lo(G)(a2)
-; RV32IZFINXZDINX-NEXT: lw a5, %lo(G+4)(a2)
-; RV32IZFINXZDINX-NEXT: addi a3, a2, %lo(G)
-; RV32IZFINXZDINX-NEXT: sw a0, %lo(G)(a2)
-; RV32IZFINXZDINX-NEXT: sw a1, %lo(G+4)(a2)
-; RV32IZFINXZDINX-NEXT: lw a4, 72(a3)
-; RV32IZFINXZDINX-NEXT: lw a5, 76(a3)
-; RV32IZFINXZDINX-NEXT: sw a0, 72(a3)
-; RV32IZFINXZDINX-NEXT: sw a1, 76(a3)
+; RV32IZFINXZDINX-NEXT: lw a2, %lo(G)(a4)
+; RV32IZFINXZDINX-NEXT: lw a3, %lo(G+4)(a4)
+; RV32IZFINXZDINX-NEXT: addi a2, a4, %lo(G)
+; RV32IZFINXZDINX-NEXT: sw a0, %lo(G)(a4)
+; RV32IZFINXZDINX-NEXT: sw a1, %lo(G+4)(a4)
+; RV32IZFINXZDINX-NEXT: lw a4, 72(a2)
+; RV32IZFINXZDINX-NEXT: lw a5, 76(a2)
+; RV32IZFINXZDINX-NEXT: sw a0, 72(a2)
+; RV32IZFINXZDINX-NEXT: sw a1, 76(a2)
; RV32IZFINXZDINX-NEXT: ret
;
; RV64IZFINXZDINX-LABEL: fld_fsd_global:
diff --git a/llvm/test/CodeGen/RISCV/double-select-fcmp.ll b/llvm/test/CodeGen/RISCV/double-select-fcmp.ll
index 654a4609caa23f..e7ff991413013b 100644
--- a/llvm/test/CodeGen/RISCV/double-select-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/double-select-fcmp.ll
@@ -577,9 +577,9 @@ define i32 @select_fcmp_oeq_1_2(double %a, double %b) {
;
; CHECKRV32ZDINX-LABEL: select_fcmp_oeq_1_2:
; CHECKRV32ZDINX: # %bb.0:
+; CHECKRV32ZDINX-NEXT: li a4, 2
; CHECKRV32ZDINX-NEXT: feq.d a0, a0, a2
-; CHECKRV32ZDINX-NEXT: li a1, 2
-; CHECKRV32ZDINX-NEXT: sub a0, a1, a0
+; CHECKRV32ZDINX-NEXT: sub a0, a4, a0
; CHECKRV32ZDINX-NEXT: ret
;
; CHECKRV64ZDINX-LABEL: select_fcmp_oeq_1_2:
diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
index 0a3b5d47e5650a..cfde765873386d 100644
--- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
+++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
@@ -24,31 +24,31 @@ define void @_Z3foov() {
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49)
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_48)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_48)
-; CHECK-NEXT: vle8.v v10, (a0)
+; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_46)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_46)
-; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_45)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45)
-; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vle16.v v14, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v14, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: add a0, a0, a1
+; CHECK-NEXT: vs2r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40)
@@ -58,27 +58,26 @@ define void @_Z3foov() {
; CHECK-NEXT: lui a0, 1048572
; CHECK-NEXT: addi a0, a0, 928
; CHECK-NEXT: vmsbc.vx v0, v8, a0
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vl2r.v v12, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vl2r.v v14, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_44)
+; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_44)
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: vl2r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: vl2r.v v10, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: vl2r.v v12, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: vl2r.v v14, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vle16.v v14, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl1r.v v14, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl1r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, mu
-; CHECK-NEXT: vsext.vf2 v8, v14, v0.t
-; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_44)
-; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_44)
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vle16.v v14, (a0)
+; CHECK-NEXT: vsext.vf2 v8, v16, v0.t
; CHECK-NEXT: lui a0, %hi(var_47)
; CHECK-NEXT: addi a0, a0, %lo(var_47)
; CHECK-NEXT: vsseg4e16.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
index 7523119c4ff778..8a91c46bcdaff0 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
@@ -280,17 +280,20 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZHINX32-NEXT: sw s9, 68(sp) # 4-byte Folded Spill
; ZHINX32-NEXT: sw s10, 64(sp) # 4-byte Folded Spill
; ZHINX32-NEXT: sw s11, 60(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: lh t0, 112(sp)
-; ZHINX32-NEXT: sh t0, 58(sp) # 2-byte Folded Spill
-; ZHINX32-NEXT: lh t0, 116(sp)
-; ZHINX32-NEXT: sh t0, 56(sp) # 2-byte Folded Spill
-; ZHINX32-NEXT: lh t0, 120(sp)
-; ZHINX32-NEXT: sh t0, 54(sp) # 2-byte Folded Spill
-; ZHINX32-NEXT: lh t0, 124(sp)
-; ZHINX32-NEXT: sh t0, 52(sp) # 2-byte Folded Spill
-; ZHINX32-NEXT: lh t6, 128(sp)
-; ZHINX32-NEXT: lh t4, 132(sp)
-; ZHINX32-NEXT: lh t5, 136(sp)
+; ZHINX32-NEXT: sh a7, 58(sp) # 2-byte Folded Spill
+; ZHINX32-NEXT: sh a6, 56(sp) # 2-byte Folded Spill
+; ZHINX32-NEXT: sh a5, 54(sp) # 2-byte Folded Spill
+; ZHINX32-NEXT: sh a4, 52(sp) # 2-byte Folded Spill
+; ZHINX32-NEXT: mv a7, a3
+; ZHINX32-NEXT: mv a6, a2
+; ZHINX32-NEXT: mv a5, a1
+; ZHINX32-NEXT: lh t3, 112(sp)
+; ZHINX32-NEXT: lh t4, 116(sp)
+; ZHINX32-NEXT: lh t5, 120(sp)
+; ZHINX32-NEXT: lh t6, 124(sp)
+; ZHINX32-NEXT: lh t0, 128(sp)
+; ZHINX32-NEXT: lh t1, 132(sp)
+; ZHINX32-NEXT: lh t2, 136(sp)
; ZHINX32-NEXT: lh s0, 140(sp)
; ZHINX32-NEXT: lh s1, 144(sp)
; ZHINX32-NEXT: lh s2, 148(sp)
@@ -304,14 +307,14 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZHINX32-NEXT: lh s10, 180(sp)
; ZHINX32-NEXT: lh s11, 184(sp)
; ZHINX32-NEXT: lh ra, 188(sp)
-; ZHINX32-NEXT: lh t0, 192(sp)
-; ZHINX32-NEXT: lh t1, 196(sp)
-; ZHINX32-NEXT: lh t2, 200(sp)
-; ZHINX32-NEXT: lh t3, 204(sp)
-; ZHINX32-NEXT: sh t0, 32(sp)
-; ZHINX32-NEXT: sh t1, 34(sp)
-; ZHINX32-NEXT: sh t2, 36(sp)
-; ZHINX32-NEXT: sh t3, 38(sp)
+; ZHINX32-NEXT: lh a1, 192(sp)
+; ZHINX32-NEXT: lh a2, 196(sp)
+; ZHINX32-NEXT: lh a3, 200(sp)
+; ZHINX32-NEXT: lh a4, 204(sp)
+; ZHINX32-NEXT: sh a1, 32(sp)
+; ZHINX32-NEXT: sh a2, 34(sp)
+; ZHINX32-NEXT: sh a3, 36(sp)
+; ZHINX32-NEXT: sh a4, 38(sp)
; ZHINX32-NEXT: sh s9, 24(sp)
; ZHINX32-NEXT: sh s10, 26(sp)
; ZHINX32-NEXT: sh s11, 28(sp)
@@ -324,14 +327,17 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZHINX32-NEXT: sh s2, 10(sp)
; ZHINX32-NEXT: sh s3, 12(sp)
; ZHINX32-NEXT: sh s4, 14(sp)
-; ZHINX32-NEXT: sh t6, 0(sp)
-; ZHINX32-NEXT: sh t4, 2(sp)
-; ZHINX32-NEXT: sh t5, 4(sp)
+; ZHINX32-NEXT: sh t0, 0(sp)
+; ZHINX32-NEXT: sh t1, 2(sp)
+; ZHINX32-NEXT: sh t2, 4(sp)
; ZHINX32-NEXT: sh s0, 6(sp)
-; ZHINX32-NEXT: lh t3, 58(sp) # 2-byte Folded Reload
-; ZHINX32-NEXT: lh t4, 56(sp) # 2-byte Folded Reload
-; ZHINX32-NEXT: lh t5, 54(sp) # 2-byte Folded Reload
-; ZHINX32-NEXT: lh t6, 52(sp) # 2-byte Folded Reload
+; ZHINX32-NEXT: mv a1, a5
+; ZHINX32-NEXT: mv a2, a6
+; ZHINX32-NEXT: mv a3, a7
+; ZHINX32-NEXT: lh a4, 52(sp) # 2-byte Folded Reload
+; ZHINX32-NEXT: lh a5, 54(sp) # 2-byte Folded Reload
+; ZHINX32-NEXT: lh a6, 56(sp) # 2-byte Folded Reload
+; ZHINX32-NEXT: lh a7, 58(sp) # 2-byte Folded Reload
; ZHINX32-NEXT: call callee_half_32
; ZHINX32-NEXT: lw ra, 108(sp) # 4-byte Folded Reload
; ZHINX32-NEXT: lw s0, 104(sp) # 4-byte Folded Reload
@@ -365,17 +371,20 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZHINX64-NEXT: sd s9, 72(sp) # 8-byte Folded Spill
; ZHINX64-NEXT: sd s10, 64(sp) # 8-byte Folded Spill
; ZHINX64-NEXT: sd s11, 56(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: lh t0, 160(sp)
-; ZHINX64-NEXT: sh t0, 54(sp) # 2-byte Folded Spill
-; ZHINX64-NEXT: lh t0, 168(sp)
-; ZHINX64-NEXT: sh t0, 52(sp) # 2-byte Folded Spill
-; ZHINX64-NEXT: lh t0, 176(sp)
-; ZHINX64-NEXT: sh t0, 50(sp) # 2-byte Folded Spill
-; ZHINX64-NEXT: lh t0, 184(sp)
-; ZHINX64-NEXT: sh t0, 48(sp) # 2-byte Folded Spill
-; ZHINX64-NEXT: lh t6, 192(sp)
-; ZHINX64-NEXT: lh t4, 200(sp)
-; ZHINX64-NEXT: lh t5, 208(sp)
+; ZHINX64-NEXT: sh a7, 54(sp) # 2-byte Folded Spill
+; ZHINX64-NEXT: sh a6, 52(sp) # 2-byte Folded Spill
+; ZHINX64-NEXT: sh a5, 50(sp) # 2-byte Folded Spill
+; ZHINX64-NEXT: sh a4, 48(sp) # 2-byte Folded Spill
+; ZHINX64-NEXT: mv a7, a3
+; ZHINX64-NEXT: mv a6, a2
+; ZHINX64-NEXT: mv a5, a1
+; ZHINX64-NEXT: lh t3, 160(sp)
+; ZHINX64-NEXT: lh t4, 168(sp)
+; ZHINX64-NEXT: lh t5, 176(sp)
+; ZHINX64-NEXT: lh t6, 184(sp)
+; ZHINX64-NEXT: lh t0, 192(sp)
+; ZHINX64-NEXT: lh t1, 200(sp)
+; ZHINX64-NEXT: lh t2, 208(sp)
; ZHINX64-NEXT: lh s0, 216(sp)
; ZHINX64-NEXT: lh s1, 224(sp)
; ZHINX64-NEXT: lh s2, 232(sp)
@@ -389,14 +398,14 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZHINX64-NEXT: lh s10, 296(sp)
; ZHINX64-NEXT: lh s11, 304(sp)
; ZHINX64-NEXT: lh ra, 312(sp)
-; ZHINX64-NEXT: lh t0, 320(sp)
-; ZHINX64-NEXT: lh t1, 328(sp)
-; ZHINX64-NEXT: lh t2, 336(sp)
-; ZHINX64-NEXT: lh t3, 344(sp)
-; ZHINX64-NEXT: sh t0, 32(sp)
-; ZHINX64-NEXT: sh t1, 34(sp)
-; ZHINX64-NEXT: sh t2, 36(sp)
-; ZHINX64-NEXT: sh t3, 38(sp)
+; ZHINX64-NEXT: lh a1, 320(sp)
+; ZHINX64-NEXT: lh a2, 328(sp)
+; ZHINX64-NEXT: lh a3, 336(sp)
+; ZHINX64-NEXT: lh a4, 344(sp)
+; ZHINX64-NEXT: sh a1, 32(sp)
+; ZHINX64-NEXT: sh a2, 34(sp)
+; ZHINX64-NEXT: sh a3, 36(sp)
+; ZHINX64-NEXT: sh a4, 38(sp)
; ZHINX64-NEXT: sh s9, 24(sp)
; ZHINX64-NEXT: sh s10, 26(sp)
; ZHINX64-NEXT: sh s11, 28(sp)
@@ -409,14 +418,17 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZHINX64-NEXT: sh s2, 10(sp)
; ZHINX64-NEXT: sh s3, 12(sp)
; ZHINX64-NEXT: sh s4, 14(sp)
-; ZHINX64-NEXT: sh t6, 0(sp)
-; ZHINX64-NEXT: sh t4, 2(sp)
-; ZHINX64-NEXT: sh t5, 4(sp)
+; ZHINX64-NEXT: sh t0, 0(sp)
+; ZHINX64-NEXT: sh t1, 2(sp)
+; ZHINX64-NEXT: sh t2, 4(sp)
; ZHINX64-NEXT: sh s0, 6(sp)
-; ZHINX64-NEXT: lh t3, 54(sp) # 2-byte Folded Reload
-; ZHINX64-NEXT: lh t4, 52(sp) # 2-byte Folded Reload
-; ZHINX64-NEXT: lh t5, 50(sp) # 2-byte Folded Reload
-; ZHINX64-NEXT: lh t6, 48(sp) # 2-byte Folded Reload
+; ZHINX64-NEXT: mv a1, a5
+; ZHINX64-NEXT: mv a2, a6
+; ZHINX64-NEXT: mv a3, a7
+; ZHINX64-NEXT: lh a4, 48(sp) # 2-byte Folded Reload
+; ZHINX64-NEXT: lh a5, 50(sp) # 2-byte Folded Reload
+; ZHINX64-NEXT: lh a6, 52(sp) # 2-byte Folded Reload
+; ZHINX64-NEXT: lh a7, 54(sp) # 2-byte Folded Reload
; ZHINX64-NEXT: call callee_half_32
; ZHINX64-NEXT: ld ra, 152(sp) # 8-byte Folded Reload
; ZHINX64-NEXT: ld s0, 144(sp) # 8-byte Folded Reload
@@ -874,17 +886,20 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZHINX32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill
; ZHINX32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill
; ZHINX32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: lw t0, 160(sp)
-; ZHINX32-NEXT: sw t0, 104(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: lw t0, 164(sp)
-; ZHINX32-NEXT: sw t0, 100(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: lw t0, 168(sp)
-; ZHINX32-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: lw t0, 172(sp)
-; ZHINX32-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: lw t6, 176(sp)
-; ZHINX32-NEXT: lw t4, 180(sp)
-; ZHINX32-NEXT: lw t5, 184(sp)
+; ZHINX32-NEXT: sw a7, 104(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: sw a6, 100(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: sw a5, 96(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: sw a4, 92(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: mv a7, a3
+; ZHINX32-NEXT: mv a6, a2
+; ZHINX32-NEXT: mv a5, a1
+; ZHINX32-NEXT: lw t3, 160(sp)
+; ZHINX32-NEXT: lw t4, 164(sp)
+; ZHINX32-NEXT: lw t5, 168(sp)
+; ZHINX32-NEXT: lw t6, 172(sp)
+; ZHINX32-NEXT: lw t0, 176(sp)
+; ZHINX32-NEXT: lw t1, 180(sp)
+; ZHINX32-NEXT: lw t2, 184(sp)
; ZHINX32-NEXT: lw s0, 188(sp)
; ZHINX32-NEXT: lw s1, 192(sp)
; ZHINX32-NEXT: lw s2, 196(sp)
@@ -898,14 +913,14 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZHINX32-NEXT: lw s10, 228(sp)
; ZHINX32-NEXT: lw s11, 232(sp)
; ZHINX32-NEXT: lw ra, 236(sp)
-; ZHINX32-NEXT: lw t0, 240(sp)
-; ZHINX32-NEXT: lw t1, 244(sp)
-; ZHINX32-NEXT: lw t2, 248(sp)
-; ZHINX32-NEXT: lw t3, 252(sp)
-; ZHINX32-NEXT: sw t0, 64(sp)
-; ZHINX32-NEXT: sw t1, 68(sp)
-; ZHINX32-NEXT: sw t2, 72(sp)
-; ZHINX32-NEXT: sw t3, 76(sp)
+; ZHINX32-NEXT: lw a1, 240(sp)
+; ZHINX32-NEXT: lw a2, 244(sp)
+; ZHINX32-NEXT: lw a3, 248(sp)
+; ZHINX32-NEXT: lw a4, 252(sp)
+; ZHINX32-NEXT: sw a1, 64(sp)
+; ZHINX32-NEXT: sw a2, 68(sp)
+; ZHINX32-NEXT: sw a3, 72(sp)
+; ZHINX32-NEXT: sw a4, 76(sp)
; ZHINX32-NEXT: sw s9, 48(sp)
; ZHINX32-NEXT: sw s10, 52(sp)
; ZHINX32-NEXT: sw s11, 56(sp)
@@ -918,14 +933,17 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZHINX32-NEXT: sw s2, 20(sp)
; ZHINX32-NEXT: sw s3, 24(sp)
; ZHINX32-NEXT: sw s4, 28(sp)
-; ZHINX32-NEXT: sw t6, 0(sp)
-; ZHINX32-NEXT: sw t4, 4(sp)
-; ZHINX32-NEXT: sw t5, 8(sp)
+; ZHINX32-NEXT: sw t0, 0(sp)
+; ZHINX32-NEXT: sw t1, 4(sp)
+; ZHINX32-NEXT: sw t2, 8(sp)
; ZHINX32-NEXT: sw s0, 12(sp)
-; ZHINX32-NEXT: lw t3, 104(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw t4, 100(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw t5, 96(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw t6, 92(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: mv a1, a5
+; ZHINX32-NEXT: mv a2, a6
+; ZHINX32-NEXT: mv a3, a7
+; ZHINX32-NEXT: lw a4, 92(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw a5, 96(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw a6, 100(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw a7, 104(sp) # 4-byte Folded Reload
; ZHINX32-NEXT: call callee_float_32
; ZHINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
; ZHINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
@@ -959,17 +977,20 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZHINX64-NEXT: sd s9, 120(sp) # 8-byte Folded Spill
; ZHINX64-NEXT: sd s10, 112(sp) # 8-byte Folded Spill
; ZHINX64-NEXT: sd s11, 104(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT: lw t0, 208(sp)
-; ZHINX64-NEXT: sw t0, 100(sp) # 4-byte Folded Spill
-; ZHINX64-NEXT: lw t0, 216(sp)
-; ZHINX64-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
-; ZHINX64-NEXT: lw t0, 224(sp)
-; ZHINX64-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
-; ZHINX64-NEXT: lw t0, 232(sp)
-; ZHINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill
-; ZHINX64-NEXT: lw t6, 240(sp)
-; ZHINX64-NEXT: lw t4, 248(sp)
-; ZHINX64-NEXT: lw t5, 256(sp)
+; ZHINX64-NEXT: sw a7, 100(sp) # 4-byte Folded Spill
+; ZHINX64-NEXT: sw a6, 96(sp) # 4-byte Folded Spill
+; ZHINX64-NEXT: sw a5, 92(sp) # 4-byte Folded Spill
+; ZHINX64-NEXT: sw a4, 88(sp) # 4-byte Folded Spill
+; ZHINX64-NEXT: mv a7, a3
+; ZHINX64-NEXT: mv a6, a2
+; ZHINX64-NEXT: mv a5, a1
+; ZHINX64-NEXT: lw t3, 208(sp)
+; ZHINX64-NEXT: lw t4, 216(sp)
+; ZHINX64-NEXT: lw t5, 224(sp)
+; ZHINX64-NEXT: lw t6, 232(sp)
+; ZHINX64-NEXT: lw t0, 240(sp)
+; ZHINX64-NEXT: lw t1, 248(sp)
+; ZHINX64-NEXT: lw t2, 256(sp)
; ZHINX64-NEXT: lw s0, 264(sp)
; ZHINX64-NEXT: lw s1, 272(sp)
; ZHINX64-NEXT: lw s2, 280(sp)
@@ -983,14 +1004,14 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZHINX64-NEXT: lw s10, 344(sp)
; ZHINX64-NEXT: lw s11, 352(sp)
; ZHINX64-NEXT: lw ra, 360(sp)
-; ZHINX64-NEXT: lw t0, 368(sp)
-; ZHINX64-NEXT: lw t1, 376(sp)
-; ZHINX64-NEXT: lw t2, 384(sp)
-; ZHINX64-NEXT: lw t3, 392(sp)
-; ZHINX64-NEXT: sw t0, 64(sp)
-; ZHINX64-NEXT: sw t1, 68(sp)
-; ZHINX64-NEXT: sw t2, 72(sp)
-; ZHINX64-NEXT: sw t3, 76(sp)
+; ZHINX64-NEXT: lw a1, 368(sp)
+; ZHINX64-NEXT: lw a2, 376(sp)
+; ZHINX64-NEXT: lw a3, 384(sp)
+; ZHINX64-NEXT: lw a4, 392(sp)
+; ZHINX64-NEXT: sw a1, 64(sp)
+; ZHINX64-NEXT: sw a2, 68(sp)
+; ZHINX64-NEXT: sw a3, 72(sp)
+; ZHINX64-NEXT: sw a4, 76(sp)
; ZHINX64-NEXT: sw s9, 48(sp)
; ZHINX64-NEXT: sw s10, 52(sp)
; ZHINX64-NEXT: sw s11, 56(sp)
@@ -1003,14 +1024,17 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZHINX64-NEXT: sw s2, 20(sp)
; ZHINX64-NEXT: sw s3, 24(sp)
; ZHINX64-NEXT: sw s4, 28(sp)
-; ZHINX64-NEXT: sw t6, 0(sp)
-; ZHINX64-NEXT: sw t4, 4(sp)
-; ZHINX64-NEXT: sw t5, 8(sp)
+; ZHINX64-NEXT: sw t0, 0(sp)
+; ZHINX64-NEXT: sw t1, 4(sp)
+; ZHINX64-NEXT: sw t2, 8(sp)
; ZHINX64-NEXT: sw s0, 12(sp)
-; ZHINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload
-; ZHINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload
-; ZHINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload
-; ZHINX64-NEXT: lw t6, 88(sp) # 4-byte Folded Reload
+; ZHINX64-NEXT: mv a1, a5
+; ZHINX64-NEXT: mv a2, a6
+; ZHINX64-NEXT: mv a3, a7
+; ZHINX64-NEXT: lw a4, 88(sp) # 4-byte Folded Reload
+; ZHINX64-NEXT: lw a5, 92(sp) # 4-byte Folded Reload
+; ZHINX64-NEXT: lw a6, 96(sp) # 4-byte Folded Reload
+; ZHINX64-NEXT: lw a7, 100(sp) # 4-byte Folded Reload
; ZHINX64-NEXT: call callee_float_32
; ZHINX64-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
; ZHINX64-NEXT: ld s0, 192(sp) # 8-byte Folded Reload
@@ -1044,17 +1068,20 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZFINX32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill
; ZFINX32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill
; ZFINX32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: lw t0, 160(sp)
-; ZFINX32-NEXT: sw t0, 104(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: lw t0, 164(sp)
-; ZFINX32-NEXT: sw t0, 100(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: lw t0, 168(sp)
-; ZFINX32-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: lw t0, 172(sp)
-; ZFINX32-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: lw t6, 176(sp)
-; ZFINX32-NEXT: lw t4, 180(sp)
-; ZFINX32-NEXT: lw t5, 184(sp)
+; ZFINX32-NEXT: sw a7, 104(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw a6, 100(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw a5, 96(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw a4, 92(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: mv a7, a3
+; ZFINX32-NEXT: mv a6, a2
+; ZFINX32-NEXT: mv a5, a1
+; ZFINX32-NEXT: lw t3, 160(sp)
+; ZFINX32-NEXT: lw t4, 164(sp)
+; ZFINX32-NEXT: lw t5, 168(sp)
+; ZFINX32-NEXT: lw t6, 172(sp)
+; ZFINX32-NEXT: lw t0, 176(sp)
+; ZFINX32-NEXT: lw t1, 180(sp)
+; ZFINX32-NEXT: lw t2, 184(sp)
; ZFINX32-NEXT: lw s0, 188(sp)
; ZFINX32-NEXT: lw s1, 192(sp)
; ZFINX32-NEXT: lw s2, 196(sp)
@@ -1068,14 +1095,14 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZFINX32-NEXT: lw s10, 228(sp)
; ZFINX32-NEXT: lw s11, 232(sp)
; ZFINX32-NEXT: lw ra, 236(sp)
-; ZFINX32-NEXT: lw t0, 240(sp)
-; ZFINX32-NEXT: lw t1, 244(sp)
-; ZFINX32-NEXT: lw t2, 248(sp)
-; ZFINX32-NEXT: lw t3, 252(sp)
-; ZFINX32-NEXT: sw t0, 64(sp)
-; ZFINX32-NEXT: sw t1, 68(sp)
-; ZFINX32-NEXT: sw t2, 72(sp)
-; ZFINX32-NEXT: sw t3, 76(sp)
+; ZFINX32-NEXT: lw a1, 240(sp)
+; ZFINX32-NEXT: lw a2, 244(sp)
+; ZFINX32-NEXT: lw a3, 248(sp)
+; ZFINX32-NEXT: lw a4, 252(sp)
+; ZFINX32-NEXT: sw a1, 64(sp)
+; ZFINX32-NEXT: sw a2, 68(sp)
+; ZFINX32-NEXT: sw a3, 72(sp)
+; ZFINX32-NEXT: sw a4, 76(sp)
; ZFINX32-NEXT: sw s9, 48(sp)
; ZFINX32-NEXT: sw s10, 52(sp)
; ZFINX32-NEXT: sw s11, 56(sp)
@@ -1088,14 +1115,17 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZFINX32-NEXT: sw s2, 20(sp)
; ZFINX32-NEXT: sw s3, 24(sp)
; ZFINX32-NEXT: sw s4, 28(sp)
-; ZFINX32-NEXT: sw t6, 0(sp)
-; ZFINX32-NEXT: sw t4, 4(sp)
-; ZFINX32-NEXT: sw t5, 8(sp)
+; ZFINX32-NEXT: sw t0, 0(sp)
+; ZFINX32-NEXT: sw t1, 4(sp)
+; ZFINX32-NEXT: sw t2, 8(sp)
; ZFINX32-NEXT: sw s0, 12(sp)
-; ZFINX32-NEXT: lw t3, 104(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw t4, 100(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw t5, 96(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw t6, 92(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: mv a1, a5
+; ZFINX32-NEXT: mv a2, a6
+; ZFINX32-NEXT: mv a3, a7
+; ZFINX32-NEXT: lw a4, 92(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw a5, 96(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw a6, 100(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw a7, 104(sp) # 4-byte Folded Reload
; ZFINX32-NEXT: call callee_float_32
; ZFINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
; ZFINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
@@ -1129,17 +1159,20 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZFINX64-NEXT: sd s9, 120(sp) # 8-byte Folded Spill
; ZFINX64-NEXT: sd s10, 112(sp) # 8-byte Folded Spill
; ZFINX64-NEXT: sd s11, 104(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: lw t0, 208(sp)
-; ZFINX64-NEXT: sw t0, 100(sp) # 4-byte Folded Spill
-; ZFINX64-NEXT: lw t0, 216(sp)
-; ZFINX64-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
-; ZFINX64-NEXT: lw t0, 224(sp)
-; ZFINX64-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
-; ZFINX64-NEXT: lw t0, 232(sp)
-; ZFINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill
-; ZFINX64-NEXT: lw t6, 240(sp)
-; ZFINX64-NEXT: lw t4, 248(sp)
-; ZFINX64-NEXT: lw t5, 256(sp)
+; ZFINX64-NEXT: sw a7, 100(sp) # 4-byte Folded Spill
+; ZFINX64-NEXT: sw a6, 96(sp) # 4-byte Folded Spill
+; ZFINX64-NEXT: sw a5, 92(sp) # 4-byte Folded Spill
+; ZFINX64-NEXT: sw a4, 88(sp) # 4-byte Folded Spill
+; ZFINX64-NEXT: mv a7, a3
+; ZFINX64-NEXT: mv a6, a2
+; ZFINX64-NEXT: mv a5, a1
+; ZFINX64-NEXT: lw t3, 208(sp)
+; ZFINX64-NEXT: lw t4, 216(sp)
+; ZFINX64-NEXT: lw t5, 224(sp)
+; ZFINX64-NEXT: lw t6, 232(sp)
+; ZFINX64-NEXT: lw t0, 240(sp)
+; ZFINX64-NEXT: lw t1, 248(sp)
+; ZFINX64-NEXT: lw t2, 256(sp)
; ZFINX64-NEXT: lw s0, 264(sp)
; ZFINX64-NEXT: lw s1, 272(sp)
; ZFINX64-NEXT: lw s2, 280(sp)
@@ -1153,14 +1186,14 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZFINX64-NEXT: lw s10, 344(sp)
; ZFINX64-NEXT: lw s11, 352(sp)
; ZFINX64-NEXT: lw ra, 360(sp)
-; ZFINX64-NEXT: lw t0, 368(sp)
-; ZFINX64-NEXT: lw t1, 376(sp)
-; ZFINX64-NEXT: lw t2, 384(sp)
-; ZFINX64-NEXT: lw t3, 392(sp)
-; ZFINX64-NEXT: sw t0, 64(sp)
-; ZFINX64-NEXT: sw t1, 68(sp)
-; ZFINX64-NEXT: sw t2, 72(sp)
-; ZFINX64-NEXT: sw t3, 76(sp)
+; ZFINX64-NEXT: lw a1, 368(sp)
+; ZFINX64-NEXT: lw a2, 376(sp)
+; ZFINX64-NEXT: lw a3, 384(sp)
+; ZFINX64-NEXT: lw a4, 392(sp)
+; ZFINX64-NEXT: sw a1, 64(sp)
+; ZFINX64-NEXT: sw a2, 68(sp)
+; ZFINX64-NEXT: sw a3, 72(sp)
+; ZFINX64-NEXT: sw a4, 76(sp)
; ZFINX64-NEXT: sw s9, 48(sp)
; ZFINX64-NEXT: sw s10, 52(sp)
; ZFINX64-NEXT: sw s11, 56(sp)
@@ -1173,14 +1206,17 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZFINX64-NEXT: sw s2, 20(sp)
; ZFINX64-NEXT: sw s3, 24(sp)
; ZFINX64-NEXT: sw s4, 28(sp)
-; ZFINX64-NEXT: sw t6, 0(sp)
-; ZFINX64-NEXT: sw t4, 4(sp)
-; ZFINX64-NEXT: sw t5, 8(sp)
+; ZFINX64-NEXT: sw t0, 0(sp)
+; ZFINX64-NEXT: sw t1, 4(sp)
+; ZFINX64-NEXT: sw t2, 8(sp)
; ZFINX64-NEXT: sw s0, 12(sp)
-; ZFINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload
-; ZFINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload
-; ZFINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload
-; ZFINX64-NEXT: lw t6, 88(sp) # 4-byte Folded Reload
+; ZFINX64-NEXT: mv a1, a5
+; ZFINX64-NEXT: mv a2, a6
+; ZFINX64-NEXT: mv a3, a7
+; ZFINX64-NEXT: lw a4, 88(sp) # 4-byte Folded Reload
+; ZFINX64-NEXT: lw a5, 92(sp) # 4-byte Folded Reload
+; ZFINX64-NEXT: lw a6, 96(sp) # 4-byte Folded Reload
+; ZFINX64-NEXT: lw a7, 100(sp) # 4-byte Folded Reload
; ZFINX64-NEXT: call callee_float_32
; ZFINX64-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
; ZFINX64-NEXT: ld s0, 192(sp) # 8-byte Folded Reload
@@ -1214,17 +1250,20 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZDINX32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill
; ZDINX32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill
; ZDINX32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: lw t0, 160(sp)
-; ZDINX32-NEXT: sw t0, 104(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: lw t0, 164(sp)
-; ZDINX32-NEXT: sw t0, 100(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: lw t0, 168(sp)
-; ZDINX32-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: lw t0, 172(sp)
-; ZDINX32-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: lw t6, 176(sp)
-; ZDINX32-NEXT: lw t4, 180(sp)
-; ZDINX32-NEXT: lw t5, 184(sp)
+; ZDINX32-NEXT: sw a7, 104(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw a6, 100(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw a5, 96(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw a4, 92(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: mv a7, a3
+; ZDINX32-NEXT: mv a6, a2
+; ZDINX32-NEXT: mv a5, a1
+; ZDINX32-NEXT: lw t3, 160(sp)
+; ZDINX32-NEXT: lw t4, 164(sp)
+; ZDINX32-NEXT: lw t5, 168(sp)
+; ZDINX32-NEXT: lw t6, 172(sp)
+; ZDINX32-NEXT: lw t0, 176(sp)
+; ZDINX32-NEXT: lw t1, 180(sp)
+; ZDINX32-NEXT: lw t2, 184(sp)
; ZDINX32-NEXT: lw s0, 188(sp)
; ZDINX32-NEXT: lw s1, 192(sp)
; ZDINX32-NEXT: lw s2, 196(sp)
@@ -1238,14 +1277,14 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZDINX32-NEXT: lw s10, 228(sp)
; ZDINX32-NEXT: lw s11, 232(sp)
; ZDINX32-NEXT: lw ra, 236(sp)
-; ZDINX32-NEXT: lw t0, 240(sp)
-; ZDINX32-NEXT: lw t1, 244(sp)
-; ZDINX32-NEXT: lw t2, 248(sp)
-; ZDINX32-NEXT: lw t3, 252(sp)
-; ZDINX32-NEXT: sw t0, 64(sp)
-; ZDINX32-NEXT: sw t1, 68(sp)
-; ZDINX32-NEXT: sw t2, 72(sp)
-; ZDINX32-NEXT: sw t3, 76(sp)
+; ZDINX32-NEXT: lw a1, 240(sp)
+; ZDINX32-NEXT: lw a2, 244(sp)
+; ZDINX32-NEXT: lw a3, 248(sp)
+; ZDINX32-NEXT: lw a4, 252(sp)
+; ZDINX32-NEXT: sw a1, 64(sp)
+; ZDINX32-NEXT: sw a2, 68(sp)
+; ZDINX32-NEXT: sw a3, 72(sp)
+; ZDINX32-NEXT: sw a4, 76(sp)
; ZDINX32-NEXT: sw s9, 48(sp)
; ZDINX32-NEXT: sw s10, 52(sp)
; ZDINX32-NEXT: sw s11, 56(sp)
@@ -1258,14 +1297,17 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZDINX32-NEXT: sw s2, 20(sp)
; ZDINX32-NEXT: sw s3, 24(sp)
; ZDINX32-NEXT: sw s4, 28(sp)
-; ZDINX32-NEXT: sw t6, 0(sp)
-; ZDINX32-NEXT: sw t4, 4(sp)
-; ZDINX32-NEXT: sw t5, 8(sp)
+; ZDINX32-NEXT: sw t0, 0(sp)
+; ZDINX32-NEXT: sw t1, 4(sp)
+; ZDINX32-NEXT: sw t2, 8(sp)
; ZDINX32-NEXT: sw s0, 12(sp)
-; ZDINX32-NEXT: lw t3, 104(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw t4, 100(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw t5, 96(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw t6, 92(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: mv a1, a5
+; ZDINX32-NEXT: mv a2, a6
+; ZDINX32-NEXT: mv a3, a7
+; ZDINX32-NEXT: lw a4, 92(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw a5, 96(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw a6, 100(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw a7, 104(sp) # 4-byte Folded Reload
; ZDINX32-NEXT: call callee_float_32
; ZDINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
; ZDINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
@@ -1299,17 +1341,20 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZDINX64-NEXT: sd s9, 120(sp) # 8-byte Folded Spill
; ZDINX64-NEXT: sd s10, 112(sp) # 8-byte Folded Spill
; ZDINX64-NEXT: sd s11, 104(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: lw t0, 208(sp)
-; ZDINX64-NEXT: sw t0, 100(sp) # 4-byte Folded Spill
-; ZDINX64-NEXT: lw t0, 216(sp)
-; ZDINX64-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
-; ZDINX64-NEXT: lw t0, 224(sp)
-; ZDINX64-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
-; ZDINX64-NEXT: lw t0, 232(sp)
-; ZDINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill
-; ZDINX64-NEXT: lw t6, 240(sp)
-; ZDINX64-NEXT: lw t4, 248(sp)
-; ZDINX64-NEXT: lw t5, 256(sp)
+; ZDINX64-NEXT: sw a7, 100(sp) # 4-byte Folded Spill
+; ZDINX64-NEXT: sw a6, 96(sp) # 4-byte Folded Spill
+; ZDINX64-NEXT: sw a5, 92(sp) # 4-byte Folded Spill
+; ZDINX64-NEXT: sw a4, 88(sp) # 4-byte Folded Spill
+; ZDINX64-NEXT: mv a7, a3
+; ZDINX64-NEXT: mv a6, a2
+; ZDINX64-NEXT: mv a5, a1
+; ZDINX64-NEXT: lw t3, 208(sp)
+; ZDINX64-NEXT: lw t4, 216(sp)
+; ZDINX64-NEXT: lw t5, 224(sp)
+; ZDINX64-NEXT: lw t6, 232(sp)
+; ZDINX64-NEXT: lw t0, 240(sp)
+; ZDINX64-NEXT: lw t1, 248(sp)
+; ZDINX64-NEXT: lw t2, 256(sp)
; ZDINX64-NEXT: lw s0, 264(sp)
; ZDINX64-NEXT: lw s1, 272(sp)
; ZDINX64-NEXT: lw s2, 280(sp)
@@ -1323,14 +1368,14 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZDINX64-NEXT: lw s10, 344(sp)
; ZDINX64-NEXT: lw s11, 352(sp)
; ZDINX64-NEXT: lw ra, 360(sp)
-; ZDINX64-NEXT: lw t0, 368(sp)
-; ZDINX64-NEXT: lw t1, 376(sp)
-; ZDINX64-NEXT: lw t2, 384(sp)
-; ZDINX64-NEXT: lw t3, 392(sp)
-; ZDINX64-NEXT: sw t0, 64(sp)
-; ZDINX64-NEXT: sw t1, 68(sp)
-; ZDINX64-NEXT: sw t2, 72(sp)
-; ZDINX64-NEXT: sw t3, 76(sp)
+; ZDINX64-NEXT: lw a1, 368(sp)
+; ZDINX64-NEXT: lw a2, 376(sp)
+; ZDINX64-NEXT: lw a3, 384(sp)
+; ZDINX64-NEXT: lw a4, 392(sp)
+; ZDINX64-NEXT: sw a1, 64(sp)
+; ZDINX64-NEXT: sw a2, 68(sp)
+; ZDINX64-NEXT: sw a3, 72(sp)
+; ZDINX64-NEXT: sw a4, 76(sp)
; ZDINX64-NEXT: sw s9, 48(sp)
; ZDINX64-NEXT: sw s10, 52(sp)
; ZDINX64-NEXT: sw s11, 56(sp)
@@ -1343,14 +1388,17 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZDINX64-NEXT: sw s2, 20(sp)
; ZDINX64-NEXT: sw s3, 24(sp)
; ZDINX64-NEXT: sw s4, 28(sp)
-; ZDINX64-NEXT: sw t6, 0(sp)
-; ZDINX64-NEXT: sw t4, 4(sp)
-; ZDINX64-NEXT: sw t5, 8(sp)
+; ZDINX64-NEXT: sw t0, 0(sp)
+; ZDINX64-NEXT: sw t1, 4(sp)
+; ZDINX64-NEXT: sw t2, 8(sp)
; ZDINX64-NEXT: sw s0, 12(sp)
-; ZDINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload
-; ZDINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload
-; ZDINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload
-; ZDINX64-NEXT: lw t6, 88(sp) # 4-byte Folded Reload
+; ZDINX64-NEXT: mv a1, a5
+; ZDINX64-NEXT: mv a2, a6
+; ZDINX64-NEXT: mv a3, a7
+; ZDINX64-NEXT: lw a4, 88(sp) # 4-byte Folded Reload
+; ZDINX64-NEXT: lw a5, 92(sp) # 4-byte Folded Reload
+; ZDINX64-NEXT: lw a6, 96(sp) # 4-byte Folded Reload
+; ZDINX64-NEXT: lw a7, 100(sp) # 4-byte Folded Reload
; ZDINX64-NEXT: call callee_float_32
; ZDINX64-NEXT: ld ra, 200(sp) # 8-byte Folded Reload
; ZDINX64-NEXT: ld s0, 192(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll
index 031976b4fa2b21..5e73f862bca57c 100644
--- a/llvm/test/CodeGen/RISCV/float-convert.ll
+++ b/llvm/test/CodeGen/RISCV/float-convert.ll
@@ -1417,12 +1417,12 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
; RV32IF-LABEL: fcvt_w_s_sat_i16:
; RV32IF: # %bb.0: # %start
; RV32IF-NEXT: feq.s a0, fa0, fa0
-; RV32IF-NEXT: neg a0, a0
; RV32IF-NEXT: lui a1, %hi(.LCPI24_0)
; RV32IF-NEXT: flw fa5, %lo(.LCPI24_0)(a1)
; RV32IF-NEXT: lui a1, 815104
; RV32IF-NEXT: fmv.w.x fa4, a1
; RV32IF-NEXT: fmax.s fa4, fa0, fa4
+; RV32IF-NEXT: neg a0, a0
; RV32IF-NEXT: fmin.s fa5, fa4, fa5
; RV32IF-NEXT: fcvt.w.s a1, fa5, rtz
; RV32IF-NEXT: and a0, a0, a1
@@ -1431,12 +1431,12 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
; RV64IF-LABEL: fcvt_w_s_sat_i16:
; RV64IF: # %bb.0: # %start
; RV64IF-NEXT: feq.s a0, fa0, fa0
-; RV64IF-NEXT: neg a0, a0
; RV64IF-NEXT: lui a1, %hi(.LCPI24_0)
; RV64IF-NEXT: flw fa5, %lo(.LCPI24_0)(a1)
; RV64IF-NEXT: lui a1, 815104
; RV64IF-NEXT: fmv.w.x fa4, a1
; RV64IF-NEXT: fmax.s fa4, fa0, fa4
+; RV64IF-NEXT: neg a0, a0
; RV64IF-NEXT: fmin.s fa5, fa4, fa5
; RV64IF-NEXT: fcvt.l.s a1, fa5, rtz
; RV64IF-NEXT: and a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
index a204b928304123..a5fc78445066f0 100644
--- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
+++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
@@ -932,16 +932,16 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind {
; RV32I-NEXT: lw a0, 12(s7)
; RV32I-NEXT: lw a1, 8(s7)
; RV32I-NEXT: add a0, a0, s4
+; RV32I-NEXT: addi s5, s5, 1
+; RV32I-NEXT: seqz a2, s5
+; RV32I-NEXT: add s6, s6, a2
+; RV32I-NEXT: xor a2, s5, s2
+; RV32I-NEXT: xor a3, s6, s1
+; RV32I-NEXT: or a2, a2, a3
; RV32I-NEXT: add s3, a1, s3
; RV32I-NEXT: sltu s4, s3, a1
-; RV32I-NEXT: addi s5, s5, 1
-; RV32I-NEXT: seqz a1, s5
-; RV32I-NEXT: add s6, s6, a1
-; RV32I-NEXT: xor a1, s5, s2
-; RV32I-NEXT: xor a2, s6, s1
-; RV32I-NEXT: or a1, a1, a2
; RV32I-NEXT: add s4, a0, s4
-; RV32I-NEXT: bnez a1, .LBB20_5
+; RV32I-NEXT: bnez a2, .LBB20_5
; RV32I-NEXT: .LBB20_6: # %for.cond.cleanup
; RV32I-NEXT: mv a0, s3
; RV32I-NEXT: mv a1, s4
@@ -997,16 +997,16 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind {
; RV32I-MEDIUM-NEXT: lw a0, 12(s7)
; RV32I-MEDIUM-NEXT: lw a1, 8(s7)
; RV32I-MEDIUM-NEXT: add a0, a0, s4
+; RV32I-MEDIUM-NEXT: addi s5, s5, 1
+; RV32I-MEDIUM-NEXT: seqz a2, s5
+; RV32I-MEDIUM-NEXT: add s6, s6, a2
+; RV32I-MEDIUM-NEXT: xor a2, s5, s2
+; RV32I-MEDIUM-NEXT: xor a3, s6, s1
+; RV32I-MEDIUM-NEXT: or a2, a2, a3
; RV32I-MEDIUM-NEXT: add s3, a1, s3
; RV32I-MEDIUM-NEXT: sltu s4, s3, a1
-; RV32I-MEDIUM-NEXT: addi s5, s5, 1
-; RV32I-MEDIUM-NEXT: seqz a1, s5
-; RV32I-MEDIUM-NEXT: add s6, s6, a1
-; RV32I-MEDIUM-NEXT: xor a1, s5, s2
-; RV32I-MEDIUM-NEXT: xor a2, s6, s1
-; RV32I-MEDIUM-NEXT: or a1, a1, a2
; RV32I-MEDIUM-NEXT: add s4, a0, s4
-; RV32I-MEDIUM-NEXT: bnez a1, .LBB20_5
+; RV32I-MEDIUM-NEXT: bnez a2, .LBB20_5
; RV32I-MEDIUM-NEXT: .LBB20_6: # %for.cond.cleanup
; RV32I-MEDIUM-NEXT: mv a0, s3
; RV32I-MEDIUM-NEXT: mv a1, s4
@@ -1107,10 +1107,10 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind {
; RV64I-LARGE-NEXT: # %bb.1: # %for.body.lr.ph
; RV64I-LARGE-NEXT: mv s0, a2
; RV64I-LARGE-NEXT: mv s1, a1
+; RV64I-LARGE-NEXT: li s2, 0
; RV64I-LARGE-NEXT: .Lpcrel_hi14:
; RV64I-LARGE-NEXT: auipc a1, %pcrel_hi(.LCPI20_0)
; RV64I-LARGE-NEXT: ld s3, %pcrel_lo(.Lpcrel_hi14)(a1)
-; RV64I-LARGE-NEXT: li s2, 0
; RV64I-LARGE-NEXT: slli a0, a0, 4
; RV64I-LARGE-NEXT: add s4, a2, a0
; RV64I-LARGE-NEXT: .LBB20_2: # %for.body
diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll
index 0c84a08f1fd451..054a9041a79267 100644
--- a/llvm/test/CodeGen/RISCV/half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/half-convert.ll
@@ -195,12 +195,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
; RV32IZFH: # %bb.0: # %start
; RV32IZFH-NEXT: fcvt.s.h fa5, fa0
; RV32IZFH-NEXT: feq.s a0, fa5, fa5
-; RV32IZFH-NEXT: neg a0, a0
; RV32IZFH-NEXT: lui a1, %hi(.LCPI1_0)
; RV32IZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a1)
; RV32IZFH-NEXT: lui a1, 815104
; RV32IZFH-NEXT: fmv.w.x fa3, a1
; RV32IZFH-NEXT: fmax.s fa5, fa5, fa3
+; RV32IZFH-NEXT: neg a0, a0
; RV32IZFH-NEXT: fmin.s fa5, fa5, fa4
; RV32IZFH-NEXT: fcvt.w.s a1, fa5, rtz
; RV32IZFH-NEXT: and a0, a0, a1
@@ -210,12 +210,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
; RV64IZFH: # %bb.0: # %start
; RV64IZFH-NEXT: fcvt.s.h fa5, fa0
; RV64IZFH-NEXT: feq.s a0, fa5, fa5
-; RV64IZFH-NEXT: neg a0, a0
; RV64IZFH-NEXT: lui a1, %hi(.LCPI1_0)
; RV64IZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a1)
; RV64IZFH-NEXT: lui a1, 815104
; RV64IZFH-NEXT: fmv.w.x fa3, a1
; RV64IZFH-NEXT: fmax.s fa5, fa5, fa3
+; RV64IZFH-NEXT: neg a0, a0
; RV64IZFH-NEXT: fmin.s fa5, fa5, fa4
; RV64IZFH-NEXT: fcvt.l.s a1, fa5, rtz
; RV64IZFH-NEXT: and a0, a0, a1
@@ -225,12 +225,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
; RV32IDZFH: # %bb.0: # %start
; RV32IDZFH-NEXT: fcvt.s.h fa5, fa0
; RV32IDZFH-NEXT: feq.s a0, fa5, fa5
-; RV32IDZFH-NEXT: neg a0, a0
; RV32IDZFH-NEXT: lui a1, %hi(.LCPI1_0)
; RV32IDZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a1)
; RV32IDZFH-NEXT: lui a1, 815104
; RV32IDZFH-NEXT: fmv.w.x fa3, a1
; RV32IDZFH-NEXT: fmax.s fa5, fa5, fa3
+; RV32IDZFH-NEXT: neg a0, a0
; RV32IDZFH-NEXT: fmin.s fa5, fa5, fa4
; RV32IDZFH-NEXT: fcvt.w.s a1, fa5, rtz
; RV32IDZFH-NEXT: and a0, a0, a1
@@ -240,12 +240,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
; RV64IDZFH: # %bb.0: # %start
; RV64IDZFH-NEXT: fcvt.s.h fa5, fa0
; RV64IDZFH-NEXT: feq.s a0, fa5, fa5
-; RV64IDZFH-NEXT: neg a0, a0
; RV64IDZFH-NEXT: lui a1, %hi(.LCPI1_0)
; RV64IDZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a1)
; RV64IDZFH-NEXT: lui a1, 815104
; RV64IDZFH-NEXT: fmv.w.x fa3, a1
; RV64IDZFH-NEXT: fmax.s fa5, fa5, fa3
+; RV64IDZFH-NEXT: neg a0, a0
; RV64IDZFH-NEXT: fmin.s fa5, fa5, fa4
; RV64IDZFH-NEXT: fcvt.l.s a1, fa5, rtz
; RV64IDZFH-NEXT: and a0, a0, a1
@@ -400,12 +400,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
; RV32ID-ILP32-NEXT: call __extendhfsf2
; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0
; RV32ID-ILP32-NEXT: feq.s a0, fa5, fa5
-; RV32ID-ILP32-NEXT: neg a0, a0
; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI1_0)
; RV32ID-ILP32-NEXT: flw fa4, %lo(.LCPI1_0)(a1)
; RV32ID-ILP32-NEXT: lui a1, 815104
; RV32ID-ILP32-NEXT: fmv.w.x fa3, a1
; RV32ID-ILP32-NEXT: fmax.s fa5, fa5, fa3
+; RV32ID-ILP32-NEXT: neg a0, a0
; RV32ID-ILP32-NEXT: fmin.s fa5, fa5, fa4
; RV32ID-ILP32-NEXT: fcvt.w.s a1, fa5, rtz
; RV32ID-ILP32-NEXT: and a0, a0, a1
@@ -420,12 +420,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
; RV64ID-LP64-NEXT: call __extendhfsf2
; RV64ID-LP64-NEXT: fmv.w.x fa5, a0
; RV64ID-LP64-NEXT: feq.s a0, fa5, fa5
-; RV64ID-LP64-NEXT: neg a0, a0
; RV64ID-LP64-NEXT: lui a1, %hi(.LCPI1_0)
; RV64ID-LP64-NEXT: flw fa4, %lo(.LCPI1_0)(a1)
; RV64ID-LP64-NEXT: lui a1, 815104
; RV64ID-LP64-NEXT: fmv.w.x fa3, a1
; RV64ID-LP64-NEXT: fmax.s fa5, fa5, fa3
+; RV64ID-LP64-NEXT: neg a0, a0
; RV64ID-LP64-NEXT: fmin.s fa5, fa5, fa4
; RV64ID-LP64-NEXT: fcvt.l.s a1, fa5, rtz
; RV64ID-LP64-NEXT: and a0, a0, a1
@@ -439,12 +439,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: call __extendhfsf2
; RV32ID-NEXT: feq.s a0, fa0, fa0
-; RV32ID-NEXT: neg a0, a0
; RV32ID-NEXT: lui a1, %hi(.LCPI1_0)
; RV32ID-NEXT: flw fa5, %lo(.LCPI1_0)(a1)
; RV32ID-NEXT: lui a1, 815104
; RV32ID-NEXT: fmv.w.x fa4, a1
; RV32ID-NEXT: fmax.s fa4, fa0, fa4
+; RV32ID-NEXT: neg a0, a0
; RV32ID-NEXT: fmin.s fa5, fa4, fa5
; RV32ID-NEXT: fcvt.w.s a1, fa5, rtz
; RV32ID-NEXT: and a0, a0, a1
@@ -458,12 +458,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: call __extendhfsf2
; RV64ID-NEXT: feq.s a0, fa0, fa0
-; RV64ID-NEXT: neg a0, a0
; RV64ID-NEXT: lui a1, %hi(.LCPI1_0)
; RV64ID-NEXT: flw fa5, %lo(.LCPI1_0)(a1)
; RV64ID-NEXT: lui a1, 815104
; RV64ID-NEXT: fmv.w.x fa4, a1
; RV64ID-NEXT: fmax.s fa4, fa0, fa4
+; RV64ID-NEXT: neg a0, a0
; RV64ID-NEXT: fmin.s fa5, fa4, fa5
; RV64ID-NEXT: fcvt.l.s a1, fa5, rtz
; RV64ID-NEXT: and a0, a0, a1
@@ -475,12 +475,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
; CHECK32-IZFHMIN: # %bb.0: # %start
; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa5, fa0
; CHECK32-IZFHMIN-NEXT: feq.s a0, fa5, fa5
-; CHECK32-IZFHMIN-NEXT: neg a0, a0
; CHECK32-IZFHMIN-NEXT: lui a1, %hi(.LCPI1_0)
; CHECK32-IZFHMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a1)
; CHECK32-IZFHMIN-NEXT: lui a1, 815104
; CHECK32-IZFHMIN-NEXT: fmv.w.x fa3, a1
; CHECK32-IZFHMIN-NEXT: fmax.s fa5, fa5, fa3
+; CHECK32-IZFHMIN-NEXT: neg a0, a0
; CHECK32-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4
; CHECK32-IZFHMIN-NEXT: fcvt.w.s a1, fa5, rtz
; CHECK32-IZFHMIN-NEXT: and a0, a0, a1
@@ -490,12 +490,12 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
; CHECK64-IZFHMIN: # %bb.0: # %start
; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa5, fa0
; CHECK64-IZFHMIN-NEXT: feq.s a0, fa5, fa5
-; CHECK64-IZFHMIN-NEXT: neg a0, a0
; CHECK64-IZFHMIN-NEXT: lui a1, %hi(.LCPI1_0)
; CHECK64-IZFHMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a1)
; CHECK64-IZFHMIN-NEXT: lui a1, 815104
; CHECK64-IZFHMIN-NEXT: fmv.w.x fa3, a1
; CHECK64-IZFHMIN-NEXT: fmax.s fa5, fa5, fa3
+; CHECK64-IZFHMIN-NEXT: neg a0, a0
; CHECK64-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4
; CHECK64-IZFHMIN-NEXT: fcvt.l.s a1, fa5, rtz
; CHECK64-IZFHMIN-NEXT: and a0, a0, a1
@@ -6297,12 +6297,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV32IZFH: # %bb.0: # %start
; RV32IZFH-NEXT: fcvt.s.h fa5, fa0
; RV32IZFH-NEXT: feq.s a0, fa5, fa5
-; RV32IZFH-NEXT: neg a0, a0
; RV32IZFH-NEXT: lui a1, %hi(.LCPI32_0)
; RV32IZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a1)
; RV32IZFH-NEXT: lui a1, 815104
; RV32IZFH-NEXT: fmv.w.x fa3, a1
; RV32IZFH-NEXT: fmax.s fa5, fa5, fa3
+; RV32IZFH-NEXT: neg a0, a0
; RV32IZFH-NEXT: fmin.s fa5, fa5, fa4
; RV32IZFH-NEXT: fcvt.w.s a1, fa5, rtz
; RV32IZFH-NEXT: and a0, a0, a1
@@ -6312,12 +6312,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV64IZFH: # %bb.0: # %start
; RV64IZFH-NEXT: fcvt.s.h fa5, fa0
; RV64IZFH-NEXT: feq.s a0, fa5, fa5
-; RV64IZFH-NEXT: neg a0, a0
; RV64IZFH-NEXT: lui a1, %hi(.LCPI32_0)
; RV64IZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a1)
; RV64IZFH-NEXT: lui a1, 815104
; RV64IZFH-NEXT: fmv.w.x fa3, a1
; RV64IZFH-NEXT: fmax.s fa5, fa5, fa3
+; RV64IZFH-NEXT: neg a0, a0
; RV64IZFH-NEXT: fmin.s fa5, fa5, fa4
; RV64IZFH-NEXT: fcvt.l.s a1, fa5, rtz
; RV64IZFH-NEXT: and a0, a0, a1
@@ -6327,12 +6327,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV32IDZFH: # %bb.0: # %start
; RV32IDZFH-NEXT: fcvt.s.h fa5, fa0
; RV32IDZFH-NEXT: feq.s a0, fa5, fa5
-; RV32IDZFH-NEXT: neg a0, a0
; RV32IDZFH-NEXT: lui a1, %hi(.LCPI32_0)
; RV32IDZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a1)
; RV32IDZFH-NEXT: lui a1, 815104
; RV32IDZFH-NEXT: fmv.w.x fa3, a1
; RV32IDZFH-NEXT: fmax.s fa5, fa5, fa3
+; RV32IDZFH-NEXT: neg a0, a0
; RV32IDZFH-NEXT: fmin.s fa5, fa5, fa4
; RV32IDZFH-NEXT: fcvt.w.s a1, fa5, rtz
; RV32IDZFH-NEXT: and a0, a0, a1
@@ -6342,12 +6342,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV64IDZFH: # %bb.0: # %start
; RV64IDZFH-NEXT: fcvt.s.h fa5, fa0
; RV64IDZFH-NEXT: feq.s a0, fa5, fa5
-; RV64IDZFH-NEXT: neg a0, a0
; RV64IDZFH-NEXT: lui a1, %hi(.LCPI32_0)
; RV64IDZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a1)
; RV64IDZFH-NEXT: lui a1, 815104
; RV64IDZFH-NEXT: fmv.w.x fa3, a1
; RV64IDZFH-NEXT: fmax.s fa5, fa5, fa3
+; RV64IDZFH-NEXT: neg a0, a0
; RV64IDZFH-NEXT: fmin.s fa5, fa5, fa4
; RV64IDZFH-NEXT: fcvt.l.s a1, fa5, rtz
; RV64IDZFH-NEXT: and a0, a0, a1
@@ -6506,12 +6506,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV32ID-ILP32-NEXT: call __extendhfsf2
; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0
; RV32ID-ILP32-NEXT: feq.s a0, fa5, fa5
-; RV32ID-ILP32-NEXT: neg a0, a0
; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI32_0)
; RV32ID-ILP32-NEXT: flw fa4, %lo(.LCPI32_0)(a1)
; RV32ID-ILP32-NEXT: lui a1, 815104
; RV32ID-ILP32-NEXT: fmv.w.x fa3, a1
; RV32ID-ILP32-NEXT: fmax.s fa5, fa5, fa3
+; RV32ID-ILP32-NEXT: neg a0, a0
; RV32ID-ILP32-NEXT: fmin.s fa5, fa5, fa4
; RV32ID-ILP32-NEXT: fcvt.w.s a1, fa5, rtz
; RV32ID-ILP32-NEXT: and a0, a0, a1
@@ -6526,12 +6526,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV64ID-LP64-NEXT: call __extendhfsf2
; RV64ID-LP64-NEXT: fmv.w.x fa5, a0
; RV64ID-LP64-NEXT: feq.s a0, fa5, fa5
-; RV64ID-LP64-NEXT: neg a0, a0
; RV64ID-LP64-NEXT: lui a1, %hi(.LCPI32_0)
; RV64ID-LP64-NEXT: flw fa4, %lo(.LCPI32_0)(a1)
; RV64ID-LP64-NEXT: lui a1, 815104
; RV64ID-LP64-NEXT: fmv.w.x fa3, a1
; RV64ID-LP64-NEXT: fmax.s fa5, fa5, fa3
+; RV64ID-LP64-NEXT: neg a0, a0
; RV64ID-LP64-NEXT: fmin.s fa5, fa5, fa4
; RV64ID-LP64-NEXT: fcvt.l.s a1, fa5, rtz
; RV64ID-LP64-NEXT: and a0, a0, a1
@@ -6545,12 +6545,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32ID-NEXT: call __extendhfsf2
; RV32ID-NEXT: feq.s a0, fa0, fa0
-; RV32ID-NEXT: neg a0, a0
; RV32ID-NEXT: lui a1, %hi(.LCPI32_0)
; RV32ID-NEXT: flw fa5, %lo(.LCPI32_0)(a1)
; RV32ID-NEXT: lui a1, 815104
; RV32ID-NEXT: fmv.w.x fa4, a1
; RV32ID-NEXT: fmax.s fa4, fa0, fa4
+; RV32ID-NEXT: neg a0, a0
; RV32ID-NEXT: fmin.s fa5, fa4, fa5
; RV32ID-NEXT: fcvt.w.s a1, fa5, rtz
; RV32ID-NEXT: and a0, a0, a1
@@ -6564,12 +6564,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64ID-NEXT: call __extendhfsf2
; RV64ID-NEXT: feq.s a0, fa0, fa0
-; RV64ID-NEXT: neg a0, a0
; RV64ID-NEXT: lui a1, %hi(.LCPI32_0)
; RV64ID-NEXT: flw fa5, %lo(.LCPI32_0)(a1)
; RV64ID-NEXT: lui a1, 815104
; RV64ID-NEXT: fmv.w.x fa4, a1
; RV64ID-NEXT: fmax.s fa4, fa0, fa4
+; RV64ID-NEXT: neg a0, a0
; RV64ID-NEXT: fmin.s fa5, fa4, fa5
; RV64ID-NEXT: fcvt.l.s a1, fa5, rtz
; RV64ID-NEXT: and a0, a0, a1
@@ -6581,12 +6581,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; CHECK32-IZFHMIN: # %bb.0: # %start
; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa5, fa0
; CHECK32-IZFHMIN-NEXT: feq.s a0, fa5, fa5
-; CHECK32-IZFHMIN-NEXT: neg a0, a0
; CHECK32-IZFHMIN-NEXT: lui a1, %hi(.LCPI32_0)
; CHECK32-IZFHMIN-NEXT: flw fa4, %lo(.LCPI32_0)(a1)
; CHECK32-IZFHMIN-NEXT: lui a1, 815104
; CHECK32-IZFHMIN-NEXT: fmv.w.x fa3, a1
; CHECK32-IZFHMIN-NEXT: fmax.s fa5, fa5, fa3
+; CHECK32-IZFHMIN-NEXT: neg a0, a0
; CHECK32-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4
; CHECK32-IZFHMIN-NEXT: fcvt.w.s a1, fa5, rtz
; CHECK32-IZFHMIN-NEXT: and a0, a0, a1
@@ -6596,12 +6596,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
; CHECK64-IZFHMIN: # %bb.0: # %start
; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa5, fa0
; CHECK64-IZFHMIN-NEXT: feq.s a0, fa5, fa5
-; CHECK64-IZFHMIN-NEXT: neg a0, a0
; CHECK64-IZFHMIN-NEXT: lui a1, %hi(.LCPI32_0)
; CHECK64-IZFHMIN-NEXT: flw fa4, %lo(.LCPI32_0)(a1)
; CHECK64-IZFHMIN-NEXT: lui a1, 815104
; CHECK64-IZFHMIN-NEXT: fmv.w.x fa3, a1
; CHECK64-IZFHMIN-NEXT: fmax.s fa5, fa5, fa3
+; CHECK64-IZFHMIN-NEXT: neg a0, a0
; CHECK64-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4
; CHECK64-IZFHMIN-NEXT: fcvt.l.s a1, fa5, rtz
; CHECK64-IZFHMIN-NEXT: and a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/llvm.exp10.ll b/llvm/test/CodeGen/RISCV/llvm.exp10.ll
index a1f8bd4ab12bea..15a123400fd4fa 100644
--- a/llvm/test/CodeGen/RISCV/llvm.exp10.ll
+++ b/llvm/test/CodeGen/RISCV/llvm.exp10.ll
@@ -187,12 +187,12 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) {
; RV32IFD-NEXT: .cfi_offset fs0, -24
; RV32IFD-NEXT: .cfi_offset fs1, -32
; RV32IFD-NEXT: .cfi_offset fs2, -40
-; RV32IFD-NEXT: lhu a2, 8(a1)
-; RV32IFD-NEXT: lhu a3, 0(a1)
-; RV32IFD-NEXT: lhu a1, 4(a1)
; RV32IFD-NEXT: mv s0, a0
-; RV32IFD-NEXT: fmv.w.x fs0, a2
-; RV32IFD-NEXT: fmv.w.x fs1, a3
+; RV32IFD-NEXT: lhu a0, 8(a1)
+; RV32IFD-NEXT: lhu a2, 0(a1)
+; RV32IFD-NEXT: lhu a1, 4(a1)
+; RV32IFD-NEXT: fmv.w.x fs0, a0
+; RV32IFD-NEXT: fmv.w.x fs1, a2
; RV32IFD-NEXT: fmv.w.x fa0, a1
; RV32IFD-NEXT: call __extendhfsf2
; RV32IFD-NEXT: call exp10f
diff --git a/llvm/test/CodeGen/RISCV/llvm.frexp.ll b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
index 3f615d23d3eaf6..4a77b4d32cdda6 100644
--- a/llvm/test/CodeGen/RISCV/llvm.frexp.ll
+++ b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
@@ -61,8 +61,8 @@ define { half, i32 } @test_frexp_f16_i32(half %a) nounwind {
; RV32IZFINXZDINX-NEXT: addi a1, sp, 8
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: call __truncsfhf2
-; RV32IZFINXZDINX-NEXT: lw a1, 8(sp)
; RV32IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w def $x10
+; RV32IZFINXZDINX-NEXT: lw a1, 8(sp)
; RV32IZFINXZDINX-NEXT: lui a2, 1048560
; RV32IZFINXZDINX-NEXT: or a0, a0, a2
; RV32IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10
@@ -78,8 +78,8 @@ define { half, i32 } @test_frexp_f16_i32(half %a) nounwind {
; RV64IZFINXZDINX-NEXT: mv a1, sp
; RV64IZFINXZDINX-NEXT: call frexpf
; RV64IZFINXZDINX-NEXT: call __truncsfhf2
-; RV64IZFINXZDINX-NEXT: ld a1, 0(sp)
; RV64IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w def $x10
+; RV64IZFINXZDINX-NEXT: ld a1, 0(sp)
; RV64IZFINXZDINX-NEXT: lui a2, 1048560
; RV64IZFINXZDINX-NEXT: or a0, a0, a2
; RV64IZFINXZDINX-NEXT: # kill: def $x10_w killed $x10_w killed $x10
diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll
index ebf232cc458ba0..a18f5d6902dca7 100644
--- a/llvm/test/CodeGen/RISCV/machine-combiner.ll
+++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll
@@ -10,9 +10,9 @@
define double @test_reassoc_fadd1(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fadd1:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa0, fa1
-; CHECK-NEXT: fadd.d fa4, fa2, fa3
-; CHECK-NEXT: fadd.d fa0, fa5, fa4
+; CHECK-NEXT: fadd.d fa5, fa2, fa3
+; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fadd.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fadd nsz reassoc double %t0, %a2
@@ -23,9 +23,9 @@ define double @test_reassoc_fadd1(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fadd2(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fadd2:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa0, fa1
-; CHECK-NEXT: fadd.d fa4, fa2, fa3
-; CHECK-NEXT: fadd.d fa0, fa4, fa5
+; CHECK-NEXT: fadd.d fa5, fa2, fa3
+; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fadd.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fadd nsz reassoc double %a2, %t0
@@ -36,9 +36,9 @@ define double @test_reassoc_fadd2(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fadd3(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fadd3:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa0, fa1
-; CHECK-NEXT: fadd.d fa4, fa3, fa2
-; CHECK-NEXT: fadd.d fa0, fa4, fa5
+; CHECK-NEXT: fadd.d fa5, fa3, fa2
+; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fadd.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fadd nsz reassoc double %t0, %a2
@@ -49,9 +49,9 @@ define double @test_reassoc_fadd3(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fadd4(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fadd4:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa0, fa1
-; CHECK-NEXT: fadd.d fa4, fa3, fa2
-; CHECK-NEXT: fadd.d fa0, fa4, fa5
+; CHECK-NEXT: fadd.d fa5, fa3, fa2
+; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fadd.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fadd nsz reassoc double %a2, %t0
@@ -62,9 +62,9 @@ define double @test_reassoc_fadd4(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fmul1(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fmul1:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmul.d fa5, fa0, fa1
-; CHECK-NEXT: fmul.d fa4, fa2, fa3
-; CHECK-NEXT: fmul.d fa0, fa5, fa4
+; CHECK-NEXT: fmul.d fa5, fa2, fa3
+; CHECK-NEXT: fmul.d fa4, fa0, fa1
+; CHECK-NEXT: fmul.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = fmul nsz reassoc double %a0, %a1
%t1 = fmul nsz reassoc double %t0, %a2
@@ -75,9 +75,9 @@ define double @test_reassoc_fmul1(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fmul2(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fmul2:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmul.d fa5, fa0, fa1
-; CHECK-NEXT: fmul.d fa4, fa2, fa3
-; CHECK-NEXT: fmul.d fa0, fa4, fa5
+; CHECK-NEXT: fmul.d fa5, fa2, fa3
+; CHECK-NEXT: fmul.d fa4, fa0, fa1
+; CHECK-NEXT: fmul.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fmul nsz reassoc double %a0, %a1
%t1 = fmul nsz reassoc double %a2, %t0
@@ -88,9 +88,9 @@ define double @test_reassoc_fmul2(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fmul3(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fmul3:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmul.d fa5, fa0, fa1
-; CHECK-NEXT: fmul.d fa4, fa3, fa2
-; CHECK-NEXT: fmul.d fa0, fa4, fa5
+; CHECK-NEXT: fmul.d fa5, fa3, fa2
+; CHECK-NEXT: fmul.d fa4, fa0, fa1
+; CHECK-NEXT: fmul.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fmul nsz reassoc double %a0, %a1
%t1 = fmul nsz reassoc double %t0, %a2
@@ -101,9 +101,9 @@ define double @test_reassoc_fmul3(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fmul4(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fmul4:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmul.d fa5, fa0, fa1
-; CHECK-NEXT: fmul.d fa4, fa3, fa2
-; CHECK-NEXT: fmul.d fa0, fa4, fa5
+; CHECK-NEXT: fmul.d fa5, fa3, fa2
+; CHECK-NEXT: fmul.d fa4, fa0, fa1
+; CHECK-NEXT: fmul.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fmul nsz reassoc double %a0, %a1
%t1 = fmul nsz reassoc double %a2, %t0
@@ -114,11 +114,11 @@ define double @test_reassoc_fmul4(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_big1(double %a0, double %a1, double %a2, double %a3, double %a4, double %a5, double %a6) {
; CHECK-LABEL: test_reassoc_big1:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa1, fa0, fa1
-; CHECK-NEXT: fadd.d fa3, fa2, fa3
; CHECK-NEXT: fadd.d fa5, fa4, fa5
-; CHECK-NEXT: fadd.d fa4, fa1, fa3
+; CHECK-NEXT: fadd.d fa4, fa2, fa3
+; CHECK-NEXT: fadd.d fa3, fa0, fa1
; CHECK-NEXT: fadd.d fa5, fa5, fa6
+; CHECK-NEXT: fadd.d fa4, fa3, fa4
; CHECK-NEXT: fadd.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
@@ -133,18 +133,18 @@ define double @test_reassoc_big1(double %a0, double %a1, double %a2, double %a3,
define double @test_reassoc_big2(double %a0, double %a1, i32 %a2, double %a3, i32 %a4, double %a5) {
; CHECK-LABEL: test_reassoc_big2:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa0, fa1
; CHECK-NEXT: fsub.d fa4, fa3, fa2
-; CHECK-NEXT: fadd.d fa3, fa2, fa1
-; CHECK-NEXT: fcvt.d.w ft0, a0
-; CHECK-NEXT: fcvt.d.w ft1, a1
-; CHECK-NEXT: fmul.d fa2, fa2, ft0
-; CHECK-NEXT: fmul.d fa1, ft1, fa1
-; CHECK-NEXT: fsub.d fa5, fa4, fa5
-; CHECK-NEXT: fmul.d fa4, fa0, fa3
-; CHECK-NEXT: fmul.d fa3, fa1, fa2
-; CHECK-NEXT: fmul.d fa5, fa5, fa4
-; CHECK-NEXT: fmul.d fa0, fa5, fa3
+; CHECK-NEXT: fadd.d fa3, fa0, fa1
+; CHECK-NEXT: fadd.d ft0, fa2, fa1
+; CHECK-NEXT: fcvt.d.w fa5, a1
+; CHECK-NEXT: fcvt.d.w ft1, a0
+; CHECK-NEXT: fmul.d fa5, fa5, fa1
+; CHECK-NEXT: fmul.d fa2, fa2, ft1
+; CHECK-NEXT: fsub.d fa4, fa4, fa3
+; CHECK-NEXT: fmul.d fa3, fa0, ft0
+; CHECK-NEXT: fmul.d fa5, fa5, fa2
+; CHECK-NEXT: fmul.d fa4, fa4, fa3
+; CHECK-NEXT: fmul.d fa0, fa4, fa5
; CHECK-NEXT: ret
%cvt1 = sitofp i32 %a2 to double
%cvt2 = sitofp i32 %a4 to double
@@ -245,9 +245,9 @@ define double @test_fnmsub(double %a0, double %a1, double %a2) {
define double @test_reassoc_fsub1(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub1:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa0, fa1
-; CHECK-NEXT: fsub.d fa4, fa2, fa3
-; CHECK-NEXT: fadd.d fa0, fa5, fa4
+; CHECK-NEXT: fsub.d fa5, fa2, fa3
+; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fadd.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fadd nsz reassoc double %t0, %a2
@@ -258,9 +258,9 @@ define double @test_reassoc_fsub1(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub2(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub2:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa0, fa1
-; CHECK-NEXT: fsub.d fa4, fa2, fa3
-; CHECK-NEXT: fsub.d fa0, fa5, fa4
+; CHECK-NEXT: fsub.d fa5, fa2, fa3
+; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fsub.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fsub nsz reassoc double %t0, %a2
@@ -271,9 +271,9 @@ define double @test_reassoc_fsub2(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub3(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub3:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa0, fa1
-; CHECK-NEXT: fadd.d fa4, fa2, fa3
-; CHECK-NEXT: fsub.d fa0, fa5, fa4
+; CHECK-NEXT: fadd.d fa5, fa2, fa3
+; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fsub.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fsub nsz reassoc double %t0, %a2
@@ -284,9 +284,9 @@ define double @test_reassoc_fsub3(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub4(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub4:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa0, fa1
-; CHECK-NEXT: fsub.d fa4, fa2, fa3
-; CHECK-NEXT: fadd.d fa0, fa4, fa5
+; CHECK-NEXT: fsub.d fa5, fa2, fa3
+; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fadd.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fadd nsz reassoc double %a2, %t0
@@ -297,9 +297,9 @@ define double @test_reassoc_fsub4(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub5(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub5:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa0, fa1
-; CHECK-NEXT: fadd.d fa4, fa2, fa3
-; CHECK-NEXT: fsub.d fa0, fa4, fa5
+; CHECK-NEXT: fadd.d fa5, fa2, fa3
+; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fsub.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fsub nsz reassoc double %a2, %t0
@@ -310,9 +310,9 @@ define double @test_reassoc_fsub5(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub6(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub6:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa0, fa1
-; CHECK-NEXT: fsub.d fa4, fa2, fa3
-; CHECK-NEXT: fsub.d fa0, fa4, fa5
+; CHECK-NEXT: fsub.d fa5, fa2, fa3
+; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fsub.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fsub nsz reassoc double %a2, %t0
@@ -323,9 +323,9 @@ define double @test_reassoc_fsub6(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub7(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub7:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa0, fa1
-; CHECK-NEXT: fsub.d fa4, fa3, fa2
-; CHECK-NEXT: fsub.d fa0, fa4, fa5
+; CHECK-NEXT: fsub.d fa5, fa3, fa2
+; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fsub.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fadd nsz reassoc double %t0, %a2
@@ -336,9 +336,9 @@ define double @test_reassoc_fsub7(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub8(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub8:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa0, fa1
-; CHECK-NEXT: fsub.d fa4, fa3, fa2
-; CHECK-NEXT: fadd.d fa0, fa4, fa5
+; CHECK-NEXT: fsub.d fa5, fa3, fa2
+; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fadd.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fsub nsz reassoc double %t0, %a2
@@ -349,9 +349,9 @@ define double @test_reassoc_fsub8(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub9(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub9:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa0, fa1
-; CHECK-NEXT: fadd.d fa4, fa3, fa2
-; CHECK-NEXT: fsub.d fa0, fa4, fa5
+; CHECK-NEXT: fadd.d fa5, fa3, fa2
+; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fsub.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fsub nsz reassoc double %t0, %a2
@@ -362,9 +362,9 @@ define double @test_reassoc_fsub9(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub10(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub10:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa0, fa1
-; CHECK-NEXT: fsub.d fa4, fa3, fa2
-; CHECK-NEXT: fsub.d fa0, fa4, fa5
+; CHECK-NEXT: fsub.d fa5, fa3, fa2
+; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fsub.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fadd nsz reassoc double %a2, %t0
@@ -375,9 +375,9 @@ define double @test_reassoc_fsub10(double %a0, double %a1, double %a2, double %a
define double @test_reassoc_fsub11(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub11:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa0, fa1
-; CHECK-NEXT: fadd.d fa4, fa3, fa2
-; CHECK-NEXT: fsub.d fa0, fa4, fa5
+; CHECK-NEXT: fadd.d fa5, fa3, fa2
+; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fsub.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fsub nsz reassoc double %a2, %t0
@@ -388,9 +388,9 @@ define double @test_reassoc_fsub11(double %a0, double %a1, double %a2, double %a
define double @test_reassoc_fsub12(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub12:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa0, fa1
-; CHECK-NEXT: fsub.d fa4, fa3, fa2
-; CHECK-NEXT: fadd.d fa0, fa4, fa5
+; CHECK-NEXT: fsub.d fa5, fa3, fa2
+; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fadd.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
%t1 = fsub nsz reassoc double %a2, %t0
@@ -687,9 +687,9 @@ define i64 @test_reassoc_xor_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
define i8 @test_reassoc_mul_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
; CHECK-LABEL: test_reassoc_mul_i8:
; CHECK: # %bb.0:
+; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: mul a1, a2, a3
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: ret
%t0 = mul i8 %a0, %a1
%t1 = mul i8 %t0, %a2
@@ -700,9 +700,9 @@ define i8 @test_reassoc_mul_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
define i16 @test_reassoc_mul_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
; CHECK-LABEL: test_reassoc_mul_i16:
; CHECK: # %bb.0:
+; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: mul a1, a2, a3
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: ret
%t0 = mul i16 %a0, %a1
%t1 = mul i16 %t0, %a2
@@ -713,9 +713,9 @@ define i16 @test_reassoc_mul_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
define i32 @test_reassoc_mul_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_mul_i32:
; CHECK: # %bb.0:
+; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: mul a1, a2, a3
-; CHECK-NEXT: mulw a0, a0, a1
+; CHECK-NEXT: mulw a0, a0, a2
; CHECK-NEXT: ret
%t0 = mul i32 %a0, %a1
%t1 = mul i32 %t0, %a2
@@ -726,9 +726,9 @@ define i32 @test_reassoc_mul_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
define i64 @test_reassoc_mul_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
; CHECK-LABEL: test_reassoc_mul_i64:
; CHECK: # %bb.0:
+; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: mul a1, a2, a3
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: ret
%t0 = mul i64 %a0, %a1
%t1 = mul i64 %t0, %a2
@@ -995,9 +995,9 @@ define i64 @test_reassoc_max_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
define half @test_fmin_f16(half %a0, half %a1, half %a2, half %a3) {
; CHECK-LABEL: test_fmin_f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmin.h fa5, fa0, fa1
-; CHECK-NEXT: fmin.h fa4, fa2, fa3
-; CHECK-NEXT: fmin.h fa0, fa5, fa4
+; CHECK-NEXT: fmin.h fa5, fa2, fa3
+; CHECK-NEXT: fmin.h fa4, fa0, fa1
+; CHECK-NEXT: fmin.h fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = call half @llvm.minnum.f16(half %a0, half %a1)
%t1 = call half @llvm.minnum.f16(half %t0, half %a2)
@@ -1008,9 +1008,9 @@ define half @test_fmin_f16(half %a0, half %a1, half %a2, half %a3) {
define float @test_fmin_f32(float %a0, float %a1, float %a2, float %a3) {
; CHECK-LABEL: test_fmin_f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmin.s fa5, fa0, fa1
-; CHECK-NEXT: fmin.s fa4, fa2, fa3
-; CHECK-NEXT: fmin.s fa0, fa5, fa4
+; CHECK-NEXT: fmin.s fa5, fa2, fa3
+; CHECK-NEXT: fmin.s fa4, fa0, fa1
+; CHECK-NEXT: fmin.s fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = call float @llvm.minnum.f32(float %a0, float %a1)
%t1 = call float @llvm.minnum.f32(float %t0, float %a2)
@@ -1021,9 +1021,9 @@ define float @test_fmin_f32(float %a0, float %a1, float %a2, float %a3) {
define double @test_fmin_f64(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_fmin_f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmin.d fa5, fa0, fa1
-; CHECK-NEXT: fmin.d fa4, fa2, fa3
-; CHECK-NEXT: fmin.d fa0, fa5, fa4
+; CHECK-NEXT: fmin.d fa5, fa2, fa3
+; CHECK-NEXT: fmin.d fa4, fa0, fa1
+; CHECK-NEXT: fmin.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = call double @llvm.minnum.f64(double %a0, double %a1)
%t1 = call double @llvm.minnum.f64(double %t0, double %a2)
@@ -1034,9 +1034,9 @@ define double @test_fmin_f64(double %a0, double %a1, double %a2, double %a3) {
define half @test_fmax_f16(half %a0, half %a1, half %a2, half %a3) {
; CHECK-LABEL: test_fmax_f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmax.h fa5, fa0, fa1
-; CHECK-NEXT: fmax.h fa4, fa2, fa3
-; CHECK-NEXT: fmax.h fa0, fa5, fa4
+; CHECK-NEXT: fmax.h fa5, fa2, fa3
+; CHECK-NEXT: fmax.h fa4, fa0, fa1
+; CHECK-NEXT: fmax.h fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = call half @llvm.maxnum.f16(half %a0, half %a1)
%t1 = call half @llvm.maxnum.f16(half %t0, half %a2)
@@ -1047,9 +1047,9 @@ define half @test_fmax_f16(half %a0, half %a1, half %a2, half %a3) {
define float @test_fmax_f32(float %a0, float %a1, float %a2, float %a3) {
; CHECK-LABEL: test_fmax_f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmax.s fa5, fa0, fa1
-; CHECK-NEXT: fmax.s fa4, fa2, fa3
-; CHECK-NEXT: fmax.s fa0, fa5, fa4
+; CHECK-NEXT: fmax.s fa5, fa2, fa3
+; CHECK-NEXT: fmax.s fa4, fa0, fa1
+; CHECK-NEXT: fmax.s fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = call float @llvm.maxnum.f32(float %a0, float %a1)
%t1 = call float @llvm.maxnum.f32(float %t0, float %a2)
@@ -1060,9 +1060,9 @@ define float @test_fmax_f32(float %a0, float %a1, float %a2, float %a3) {
define double @test_fmax_f64(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_fmax_f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmax.d fa5, fa0, fa1
-; CHECK-NEXT: fmax.d fa4, fa2, fa3
-; CHECK-NEXT: fmax.d fa0, fa5, fa4
+; CHECK-NEXT: fmax.d fa5, fa2, fa3
+; CHECK-NEXT: fmax.d fa4, fa0, fa1
+; CHECK-NEXT: fmax.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = call double @llvm.maxnum.f64(double %a0, double %a1)
%t1 = call double @llvm.maxnum.f64(double %t0, double %a2)
diff --git a/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll b/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll
index 27297c97871832..af8105644b57db 100644
--- a/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll
+++ b/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll
@@ -97,10 +97,10 @@ ret:
define void @test_la_tls_ie(i32 signext %n) {
; RV32I-LABEL: test_la_tls_ie:
; RV32I: # %bb.0: # %entry
-; RV32I-NEXT: .Lpcrel_hi2:
-; RV32I-NEXT: auipc a1, %tls_ie_pcrel_hi(ie)
-; RV32I-NEXT: lw a2, %pcrel_lo(.Lpcrel_hi2)(a1)
; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: .Lpcrel_hi2:
+; RV32I-NEXT: auipc a2, %tls_ie_pcrel_hi(ie)
+; RV32I-NEXT: lw a2, %pcrel_lo(.Lpcrel_hi2)(a2)
; RV32I-NEXT: add a2, a2, tp
; RV32I-NEXT: .LBB2_1: # %loop
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
@@ -112,10 +112,10 @@ define void @test_la_tls_ie(i32 signext %n) {
;
; RV64I-LABEL: test_la_tls_ie:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: .Lpcrel_hi2:
-; RV64I-NEXT: auipc a1, %tls_ie_pcrel_hi(ie)
-; RV64I-NEXT: ld a2, %pcrel_lo(.Lpcrel_hi2)(a1)
; RV64I-NEXT: li a1, 0
+; RV64I-NEXT: .Lpcrel_hi2:
+; RV64I-NEXT: auipc a2, %tls_ie_pcrel_hi(ie)
+; RV64I-NEXT: ld a2, %pcrel_lo(.Lpcrel_hi2)(a2)
; RV64I-NEXT: add a2, a2, tp
; RV64I-NEXT: .LBB2_1: # %loop
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll b/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll
index b45365e7a8b631..c2882fd46c17da 100644
--- a/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll
+++ b/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll
@@ -18,16 +18,16 @@ define void @foo(i32 signext %0, i32 signext %1) {
;
; FUSION-LABEL: foo:
; FUSION: # %bb.0:
-; FUSION-NEXT: fcvt.s.w fa0, a1
; FUSION-NEXT: lui a0, %hi(.L.str)
; FUSION-NEXT: addi a0, a0, %lo(.L.str)
+; FUSION-NEXT: fcvt.s.w fa0, a1
; FUSION-NEXT: tail bar
;
; FUSION-POSTRA-LABEL: foo:
; FUSION-POSTRA: # %bb.0:
-; FUSION-POSTRA-NEXT: fcvt.s.w fa0, a1
; FUSION-POSTRA-NEXT: lui a0, %hi(.L.str)
; FUSION-POSTRA-NEXT: addi a0, a0, %lo(.L.str)
+; FUSION-POSTRA-NEXT: fcvt.s.w fa0, a1
; FUSION-POSTRA-NEXT: tail bar
%3 = sitofp i32 %1 to float
tail call void @bar(ptr @.str, float %3)
diff --git a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
index 08df378f271897..21398d315ec93a 100644
--- a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
+++ b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
@@ -1,15 +1,15 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -verify-misched -enable-post-misched=false \
# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \
-# RUN: -start-before=machine-scheduler -stop-after=postmisched -o - 2>&1 < %s \
+# RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
# RUN: | FileCheck -check-prefix=NOPOSTMISCHED %s
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \
# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \
-# RUN: -start-before=machine-scheduler -stop-after=postmisched -o - 2>&1 < %s \
+# RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
# RUN: | FileCheck -check-prefix=NOCLUSTER %s
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \
# RUN: -debug-only=machine-scheduler \
-# RUN: -start-before=machine-scheduler -stop-after=postmisched -o - 2>&1 < %s \
+# RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
# RUN: | FileCheck -check-prefix=MEMCLUSTER %s
# REQUIRES: asserts
diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
index 95b106f4d35ba0..6412f0c8ff1cf0 100644
--- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
@@ -1081,18 +1081,18 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) {
; RV32-NEXT: .cfi_offset s4, -24
; RV32-NEXT: .cfi_offset s5, -28
; RV32-NEXT: .cfi_offset s6, -32
-; RV32-NEXT: mv s2, a5
-; RV32-NEXT: andi a5, a5, 1
-; RV32-NEXT: beqz a5, .LBB32_8
+; RV32-NEXT: mv s5, a5
+; RV32-NEXT: mv s3, a1
+; RV32-NEXT: andi a1, a5, 1
+; RV32-NEXT: beqz a1, .LBB32_8
; RV32-NEXT: # %bb.1: # %t
; RV32-NEXT: mv s0, a4
-; RV32-NEXT: mv s3, a3
+; RV32-NEXT: mv s2, a3
; RV32-NEXT: mv s1, a2
-; RV32-NEXT: mv s5, a1
; RV32-NEXT: mv s4, a0
-; RV32-NEXT: beq a1, a3, .LBB32_3
+; RV32-NEXT: beq s3, a3, .LBB32_3
; RV32-NEXT: # %bb.2: # %t
-; RV32-NEXT: sltu s6, s5, s3
+; RV32-NEXT: sltu s6, s3, s2
; RV32-NEXT: j .LBB32_4
; RV32-NEXT: .LBB32_3:
; RV32-NEXT: sltu s6, s4, s1
@@ -1103,18 +1103,18 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) {
; RV32-NEXT: # %bb.5: # %end
; RV32-NEXT: sltu a1, s4, s1
; RV32-NEXT: mv a0, a1
-; RV32-NEXT: beq s5, s3, .LBB32_7
+; RV32-NEXT: beq s3, s2, .LBB32_7
; RV32-NEXT: # %bb.6: # %end
-; RV32-NEXT: sltu a0, s5, s3
+; RV32-NEXT: sltu a0, s3, s2
; RV32-NEXT: .LBB32_7: # %end
-; RV32-NEXT: sub a2, s5, s3
+; RV32-NEXT: sub a2, s3, s2
; RV32-NEXT: sub a2, a2, a1
; RV32-NEXT: sub a1, s4, s1
; RV32-NEXT: sw a1, 0(s0)
; RV32-NEXT: sw a2, 4(s0)
; RV32-NEXT: j .LBB32_9
; RV32-NEXT: .LBB32_8: # %f
-; RV32-NEXT: mv a0, s2
+; RV32-NEXT: mv a0, s5
; RV32-NEXT: .LBB32_9: # %f
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -1153,13 +1153,13 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) {
; RV64-NEXT: .cfi_offset s3, -40
; RV64-NEXT: .cfi_offset s4, -48
; RV64-NEXT: mv s0, a3
-; RV64-NEXT: andi a3, a3, 1
-; RV64-NEXT: beqz a3, .LBB32_3
+; RV64-NEXT: mv s2, a1
+; RV64-NEXT: andi a1, a3, 1
+; RV64-NEXT: beqz a1, .LBB32_3
; RV64-NEXT: # %bb.1: # %t
; RV64-NEXT: mv s1, a2
-; RV64-NEXT: mv s2, a1
; RV64-NEXT: mv s3, a0
-; RV64-NEXT: sltu s4, a0, a1
+; RV64-NEXT: sltu s4, a0, s2
; RV64-NEXT: mv a0, s4
; RV64-NEXT: call call
; RV64-NEXT: bgeu s3, s2, .LBB32_3
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
index 4e958f5699adbf..87f6f62ce68ddd 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll
@@ -138,25 +138,25 @@ declare i64 @llvm.fshl.i64(i64, i64, i64)
define i64 @rol_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: rol_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: slli a3, a2, 26
-; CHECK-NEXT: srli a3, a3, 31
+; CHECK-NEXT: slli a5, a2, 26
+; CHECK-NEXT: srli a5, a5, 31
; CHECK-NEXT: mv a4, a1
-; CHECK-NEXT: bnez a3, .LBB7_2
+; CHECK-NEXT: bnez a5, .LBB7_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a4, a0
; CHECK-NEXT: .LBB7_2:
-; CHECK-NEXT: sll a5, a4, a2
-; CHECK-NEXT: bnez a3, .LBB7_4
+; CHECK-NEXT: sll a3, a4, a2
+; CHECK-NEXT: bnez a5, .LBB7_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB7_4:
; CHECK-NEXT: srli a1, a0, 1
-; CHECK-NEXT: not a6, a2
-; CHECK-NEXT: srl a3, a1, a6
-; CHECK-NEXT: or a3, a5, a3
+; CHECK-NEXT: not a5, a2
+; CHECK-NEXT: srl a1, a1, a5
+; CHECK-NEXT: or a3, a3, a1
; CHECK-NEXT: sll a0, a0, a2
; CHECK-NEXT: srli a4, a4, 1
-; CHECK-NEXT: srl a1, a4, a6
+; CHECK-NEXT: srl a1, a4, a5
; CHECK-NEXT: or a1, a0, a1
; CHECK-NEXT: mv a0, a3
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
index 41ff1133ff823c..c22d01987b7bd8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
@@ -382,21 +382,22 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV32-NEXT: mul a1, a1, a3
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: andi sp, sp, -128
+; RV32-NEXT: vmv8r.v v0, v16
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a3, a2, a1
-; RV32-NEXT: vl8re32.v v24, (a3)
+; RV32-NEXT: vl8re32.v v16, (a3)
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 3
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 128
-; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: add a3, a0, a1
-; RV32-NEXT: vl8re32.v v24, (a3)
+; RV32-NEXT: vl8re32.v v16, (a3)
; RV32-NEXT: addi a3, sp, 128
-; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vl8re32.v v0, (a2)
-; RV32-NEXT: vl8re32.v v24, (a0)
+; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vl8re32.v v24, (a2)
+; RV32-NEXT: vl8re32.v v16, (a0)
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
@@ -406,9 +407,9 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV32-NEXT: slli a2, a2, 5
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 128
-; RV32-NEXT: vs8r.v v24, (a2)
+; RV32-NEXT: vs8r.v v16, (a2)
; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: vs8r.v v16, (a0)
+; RV32-NEXT: vs8r.v v0, (a0)
; RV32-NEXT: add a1, a2, a1
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
@@ -422,7 +423,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV32-NEXT: addi a3, sp, 128
; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vs8r.v v8, (a1)
-; RV32-NEXT: vmv8r.v v8, v0
+; RV32-NEXT: vmv8r.v v8, v24
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a1, sp, a1
@@ -454,21 +455,22 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV64-NEXT: mul a1, a1, a3
; RV64-NEXT: sub sp, sp, a1
; RV64-NEXT: andi sp, sp, -128
+; RV64-NEXT: vmv8r.v v0, v16
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a3, a2, a1
-; RV64-NEXT: vl8re32.v v24, (a3)
+; RV64-NEXT: vl8re32.v v16, (a3)
; RV64-NEXT: csrr a3, vlenb
; RV64-NEXT: slli a3, a3, 3
; RV64-NEXT: add a3, sp, a3
; RV64-NEXT: addi a3, a3, 128
-; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
+; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV64-NEXT: add a3, a0, a1
-; RV64-NEXT: vl8re32.v v24, (a3)
+; RV64-NEXT: vl8re32.v v16, (a3)
; RV64-NEXT: addi a3, sp, 128
-; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
-; RV64-NEXT: vl8re32.v v0, (a2)
-; RV64-NEXT: vl8re32.v v24, (a0)
+; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV64-NEXT: vl8re32.v v24, (a2)
+; RV64-NEXT: vl8re32.v v16, (a0)
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 4
; RV64-NEXT: add a0, sp, a0
@@ -478,9 +480,9 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV64-NEXT: slli a2, a2, 5
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 128
-; RV64-NEXT: vs8r.v v24, (a2)
+; RV64-NEXT: vs8r.v v16, (a2)
; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vs8r.v v16, (a0)
+; RV64-NEXT: vs8r.v v0, (a0)
; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 5
@@ -494,7 +496,7 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_
; RV64-NEXT: addi a3, sp, 128
; RV64-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; RV64-NEXT: vs8r.v v8, (a1)
-; RV64-NEXT: vmv8r.v v8, v0
+; RV64-NEXT: vmv8r.v v8, v24
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a1, sp, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
index 2c05f27460bcdb..ee2946a20b17e1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll
@@ -673,9 +673,9 @@ declare <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half>, <vscale x
define <vscale x 8 x half> @vp_ceil_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_vv_nxv8f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI18_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1)
-; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
@@ -759,9 +759,9 @@ declare <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half>, <vscal
define <vscale x 16 x half> @vp_ceil_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_vv_nxv16f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI20_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1)
-; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
@@ -845,9 +845,9 @@ declare <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half>, <vscal
define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_vv_nxv32f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI22_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1)
-; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v24, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
@@ -1290,9 +1290,9 @@ declare <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double>, <vsca
define <vscale x 2 x double> @vp_ceil_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv2f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI36_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1)
-; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
@@ -1334,9 +1334,9 @@ declare <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double>, <vsca
define <vscale x 4 x double> @vp_ceil_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv4f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI38_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1)
-; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
@@ -1378,9 +1378,9 @@ declare <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double>, <vsca
define <vscale x 7 x double> @vp_ceil_vv_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv7f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI40_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -1422,9 +1422,9 @@ declare <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double>, <vsca
define <vscale x 8 x double> @vp_ceil_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_vv_nxv8f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI42_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll
index 7839b602706db1..ade1b4203148d8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll
@@ -18,20 +18,19 @@
define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lanes.b, <4 x i1> %sel) {
; RV32-LABEL: constant_folding_crash:
; RV32: # %bb.0: # %entry
+; RV32-NEXT: vmv1r.v v10, v0
; RV32-NEXT: lw a0, 8(a0)
; RV32-NEXT: andi a0, a0, 1
; RV32-NEXT: seqz a0, a0
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; RV32-NEXT: vmv.v.x v10, a0
-; RV32-NEXT: vmsne.vi v10, v10, 0
-; RV32-NEXT: vmv1r.v v11, v0
-; RV32-NEXT: vmv1r.v v0, v10
+; RV32-NEXT: vmv.v.x v11, a0
+; RV32-NEXT: vmsne.vi v0, v11, 0
; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV32-NEXT: vmv.v.i v8, 0
-; RV32-NEXT: vmv1r.v v0, v11
+; RV32-NEXT: vmv1r.v v0, v10
; RV32-NEXT: vmerge.vim v8, v8, 1, v0
; RV32-NEXT: vrgather.vi v9, v8, 0
; RV32-NEXT: vmsne.vi v0, v9, 0
@@ -42,20 +41,19 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lan
;
; RV64-LABEL: constant_folding_crash:
; RV64: # %bb.0: # %entry
+; RV64-NEXT: vmv1r.v v12, v0
; RV64-NEXT: ld a0, 8(a0)
; RV64-NEXT: andi a0, a0, 1
; RV64-NEXT: seqz a0, a0
; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; RV64-NEXT: vmv.v.x v12, a0
-; RV64-NEXT: vmsne.vi v12, v12, 0
-; RV64-NEXT: vmv1r.v v13, v0
-; RV64-NEXT: vmv1r.v v0, v12
+; RV64-NEXT: vmv.v.x v13, a0
+; RV64-NEXT: vmsne.vi v0, v13, 0
; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV64-NEXT: vmv.v.i v8, 0
-; RV64-NEXT: vmv1r.v v0, v13
+; RV64-NEXT: vmv1r.v v0, v12
; RV64-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-NEXT: vrgather.vi v9, v8, 0
; RV64-NEXT: vmsne.vi v0, v9, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
index 2b4b8e979f3d71..9e466820d83fe5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/dont-sink-splat-operands.ll
@@ -87,33 +87,33 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
; NO-SINK-LABEL: sink_splat_add_scalable:
; NO-SINK: # %bb.0: # %entry
; NO-SINK-NEXT: csrr a5, vlenb
-; NO-SINK-NEXT: srli a2, a5, 1
-; NO-SINK-NEXT: li a3, 1024
-; NO-SINK-NEXT: bgeu a3, a2, .LBB1_2
+; NO-SINK-NEXT: srli a3, a5, 1
+; NO-SINK-NEXT: li a2, 1024
+; NO-SINK-NEXT: bgeu a2, a3, .LBB1_2
; NO-SINK-NEXT: # %bb.1:
-; NO-SINK-NEXT: li a3, 0
+; NO-SINK-NEXT: li a2, 0
; NO-SINK-NEXT: j .LBB1_5
; NO-SINK-NEXT: .LBB1_2: # %vector.ph
-; NO-SINK-NEXT: addi a3, a2, -1
-; NO-SINK-NEXT: andi a4, a3, 1024
-; NO-SINK-NEXT: xori a3, a4, 1024
+; NO-SINK-NEXT: addi a2, a3, -1
+; NO-SINK-NEXT: andi a4, a2, 1024
+; NO-SINK-NEXT: xori a2, a4, 1024
; NO-SINK-NEXT: vsetvli a6, zero, e32, m2, ta, ma
; NO-SINK-NEXT: vmv.v.x v8, a1
; NO-SINK-NEXT: slli a5, a5, 1
; NO-SINK-NEXT: mv a6, a0
-; NO-SINK-NEXT: mv a7, a3
+; NO-SINK-NEXT: mv a7, a2
; NO-SINK-NEXT: .LBB1_3: # %vector.body
; NO-SINK-NEXT: # =>This Inner Loop Header: Depth=1
; NO-SINK-NEXT: vl2re32.v v10, (a6)
; NO-SINK-NEXT: vadd.vv v10, v10, v8
; NO-SINK-NEXT: vs2r.v v10, (a6)
-; NO-SINK-NEXT: sub a7, a7, a2
+; NO-SINK-NEXT: sub a7, a7, a3
; NO-SINK-NEXT: add a6, a6, a5
; NO-SINK-NEXT: bnez a7, .LBB1_3
; NO-SINK-NEXT: # %bb.4: # %middle.block
; NO-SINK-NEXT: beqz a4, .LBB1_7
; NO-SINK-NEXT: .LBB1_5: # %for.body.preheader
-; NO-SINK-NEXT: slli a2, a3, 2
+; NO-SINK-NEXT: slli a2, a2, 2
; NO-SINK-NEXT: add a2, a0, a2
; NO-SINK-NEXT: lui a3, 1
; NO-SINK-NEXT: add a0, a0, a3
@@ -130,32 +130,32 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
; SINK-LABEL: sink_splat_add_scalable:
; SINK: # %bb.0: # %entry
; SINK-NEXT: csrr a5, vlenb
-; SINK-NEXT: srli a2, a5, 1
-; SINK-NEXT: li a3, 1024
-; SINK-NEXT: bgeu a3, a2, .LBB1_2
+; SINK-NEXT: srli a3, a5, 1
+; SINK-NEXT: li a2, 1024
+; SINK-NEXT: bgeu a2, a3, .LBB1_2
; SINK-NEXT: # %bb.1:
-; SINK-NEXT: li a3, 0
+; SINK-NEXT: li a2, 0
; SINK-NEXT: j .LBB1_5
; SINK-NEXT: .LBB1_2: # %vector.ph
-; SINK-NEXT: addi a3, a2, -1
-; SINK-NEXT: andi a4, a3, 1024
-; SINK-NEXT: xori a3, a4, 1024
+; SINK-NEXT: addi a2, a3, -1
+; SINK-NEXT: andi a4, a2, 1024
+; SINK-NEXT: xori a2, a4, 1024
; SINK-NEXT: slli a5, a5, 1
; SINK-NEXT: mv a6, a0
-; SINK-NEXT: mv a7, a3
+; SINK-NEXT: mv a7, a2
; SINK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; SINK-NEXT: .LBB1_3: # %vector.body
; SINK-NEXT: # =>This Inner Loop Header: Depth=1
; SINK-NEXT: vl2re32.v v8, (a6)
; SINK-NEXT: vadd.vx v8, v8, a1
; SINK-NEXT: vs2r.v v8, (a6)
-; SINK-NEXT: sub a7, a7, a2
+; SINK-NEXT: sub a7, a7, a3
; SINK-NEXT: add a6, a6, a5
; SINK-NEXT: bnez a7, .LBB1_3
; SINK-NEXT: # %bb.4: # %middle.block
; SINK-NEXT: beqz a4, .LBB1_7
; SINK-NEXT: .LBB1_5: # %for.body.preheader
-; SINK-NEXT: slli a2, a3, 2
+; SINK-NEXT: slli a2, a2, 2
; SINK-NEXT: add a2, a0, a2
; SINK-NEXT: lui a3, 1
; SINK-NEXT: add a0, a0, a3
@@ -172,32 +172,32 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
; DEFAULT-LABEL: sink_splat_add_scalable:
; DEFAULT: # %bb.0: # %entry
; DEFAULT-NEXT: csrr a5, vlenb
-; DEFAULT-NEXT: srli a2, a5, 1
-; DEFAULT-NEXT: li a3, 1024
-; DEFAULT-NEXT: bgeu a3, a2, .LBB1_2
+; DEFAULT-NEXT: srli a3, a5, 1
+; DEFAULT-NEXT: li a2, 1024
+; DEFAULT-NEXT: bgeu a2, a3, .LBB1_2
; DEFAULT-NEXT: # %bb.1:
-; DEFAULT-NEXT: li a3, 0
+; DEFAULT-NEXT: li a2, 0
; DEFAULT-NEXT: j .LBB1_5
; DEFAULT-NEXT: .LBB1_2: # %vector.ph
-; DEFAULT-NEXT: addi a3, a2, -1
-; DEFAULT-NEXT: andi a4, a3, 1024
-; DEFAULT-NEXT: xori a3, a4, 1024
+; DEFAULT-NEXT: addi a2, a3, -1
+; DEFAULT-NEXT: andi a4, a2, 1024
+; DEFAULT-NEXT: xori a2, a4, 1024
; DEFAULT-NEXT: slli a5, a5, 1
; DEFAULT-NEXT: mv a6, a0
-; DEFAULT-NEXT: mv a7, a3
+; DEFAULT-NEXT: mv a7, a2
; DEFAULT-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; DEFAULT-NEXT: .LBB1_3: # %vector.body
; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1
; DEFAULT-NEXT: vl2re32.v v8, (a6)
; DEFAULT-NEXT: vadd.vx v8, v8, a1
; DEFAULT-NEXT: vs2r.v v8, (a6)
-; DEFAULT-NEXT: sub a7, a7, a2
+; DEFAULT-NEXT: sub a7, a7, a3
; DEFAULT-NEXT: add a6, a6, a5
; DEFAULT-NEXT: bnez a7, .LBB1_3
; DEFAULT-NEXT: # %bb.4: # %middle.block
; DEFAULT-NEXT: beqz a4, .LBB1_7
; DEFAULT-NEXT: .LBB1_5: # %for.body.preheader
-; DEFAULT-NEXT: slli a2, a3, 2
+; DEFAULT-NEXT: slli a2, a2, 2
; DEFAULT-NEXT: add a2, a0, a2
; DEFAULT-NEXT: lui a3, 1
; DEFAULT-NEXT: add a0, a0, a3
@@ -407,32 +407,32 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; NO-SINK-LABEL: sink_splat_fadd_scalable:
; NO-SINK: # %bb.0: # %entry
; NO-SINK-NEXT: csrr a1, vlenb
-; NO-SINK-NEXT: srli a2, a1, 2
-; NO-SINK-NEXT: li a3, 1024
-; NO-SINK-NEXT: bgeu a3, a2, .LBB4_2
+; NO-SINK-NEXT: srli a3, a1, 2
+; NO-SINK-NEXT: li a2, 1024
+; NO-SINK-NEXT: bgeu a2, a3, .LBB4_2
; NO-SINK-NEXT: # %bb.1:
-; NO-SINK-NEXT: li a3, 0
+; NO-SINK-NEXT: li a2, 0
; NO-SINK-NEXT: j .LBB4_5
; NO-SINK-NEXT: .LBB4_2: # %vector.ph
-; NO-SINK-NEXT: addi a3, a2, -1
-; NO-SINK-NEXT: andi a4, a3, 1024
-; NO-SINK-NEXT: xori a3, a4, 1024
+; NO-SINK-NEXT: addi a2, a3, -1
+; NO-SINK-NEXT: andi a4, a2, 1024
+; NO-SINK-NEXT: xori a2, a4, 1024
; NO-SINK-NEXT: vsetvli a5, zero, e32, m1, ta, ma
; NO-SINK-NEXT: vfmv.v.f v8, fa0
; NO-SINK-NEXT: mv a5, a0
-; NO-SINK-NEXT: mv a6, a3
+; NO-SINK-NEXT: mv a6, a2
; NO-SINK-NEXT: .LBB4_3: # %vector.body
; NO-SINK-NEXT: # =>This Inner Loop Header: Depth=1
; NO-SINK-NEXT: vl1re32.v v9, (a5)
; NO-SINK-NEXT: vfadd.vv v9, v9, v8
; NO-SINK-NEXT: vs1r.v v9, (a5)
-; NO-SINK-NEXT: sub a6, a6, a2
+; NO-SINK-NEXT: sub a6, a6, a3
; NO-SINK-NEXT: add a5, a5, a1
; NO-SINK-NEXT: bnez a6, .LBB4_3
; NO-SINK-NEXT: # %bb.4: # %middle.block
; NO-SINK-NEXT: beqz a4, .LBB4_7
; NO-SINK-NEXT: .LBB4_5: # %for.body.preheader
-; NO-SINK-NEXT: slli a1, a3, 2
+; NO-SINK-NEXT: slli a1, a2, 2
; NO-SINK-NEXT: add a1, a0, a1
; NO-SINK-NEXT: lui a2, 1
; NO-SINK-NEXT: add a0, a0, a2
@@ -449,31 +449,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; SINK-LABEL: sink_splat_fadd_scalable:
; SINK: # %bb.0: # %entry
; SINK-NEXT: csrr a1, vlenb
-; SINK-NEXT: srli a2, a1, 2
-; SINK-NEXT: li a3, 1024
-; SINK-NEXT: bgeu a3, a2, .LBB4_2
+; SINK-NEXT: srli a3, a1, 2
+; SINK-NEXT: li a2, 1024
+; SINK-NEXT: bgeu a2, a3, .LBB4_2
; SINK-NEXT: # %bb.1:
-; SINK-NEXT: li a3, 0
+; SINK-NEXT: li a2, 0
; SINK-NEXT: j .LBB4_5
; SINK-NEXT: .LBB4_2: # %vector.ph
-; SINK-NEXT: addi a3, a2, -1
-; SINK-NEXT: andi a4, a3, 1024
-; SINK-NEXT: xori a3, a4, 1024
+; SINK-NEXT: addi a2, a3, -1
+; SINK-NEXT: andi a4, a2, 1024
+; SINK-NEXT: xori a2, a4, 1024
; SINK-NEXT: mv a5, a0
-; SINK-NEXT: mv a6, a3
+; SINK-NEXT: mv a6, a2
; SINK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; SINK-NEXT: .LBB4_3: # %vector.body
; SINK-NEXT: # =>This Inner Loop Header: Depth=1
; SINK-NEXT: vl1re32.v v8, (a5)
; SINK-NEXT: vfadd.vf v8, v8, fa0
; SINK-NEXT: vs1r.v v8, (a5)
-; SINK-NEXT: sub a6, a6, a2
+; SINK-NEXT: sub a6, a6, a3
; SINK-NEXT: add a5, a5, a1
; SINK-NEXT: bnez a6, .LBB4_3
; SINK-NEXT: # %bb.4: # %middle.block
; SINK-NEXT: beqz a4, .LBB4_7
; SINK-NEXT: .LBB4_5: # %for.body.preheader
-; SINK-NEXT: slli a1, a3, 2
+; SINK-NEXT: slli a1, a2, 2
; SINK-NEXT: add a1, a0, a1
; SINK-NEXT: lui a2, 1
; SINK-NEXT: add a0, a0, a2
@@ -490,31 +490,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; DEFAULT-LABEL: sink_splat_fadd_scalable:
; DEFAULT: # %bb.0: # %entry
; DEFAULT-NEXT: csrr a1, vlenb
-; DEFAULT-NEXT: srli a2, a1, 2
-; DEFAULT-NEXT: li a3, 1024
-; DEFAULT-NEXT: bgeu a3, a2, .LBB4_2
+; DEFAULT-NEXT: srli a3, a1, 2
+; DEFAULT-NEXT: li a2, 1024
+; DEFAULT-NEXT: bgeu a2, a3, .LBB4_2
; DEFAULT-NEXT: # %bb.1:
-; DEFAULT-NEXT: li a3, 0
+; DEFAULT-NEXT: li a2, 0
; DEFAULT-NEXT: j .LBB4_5
; DEFAULT-NEXT: .LBB4_2: # %vector.ph
-; DEFAULT-NEXT: addi a3, a2, -1
-; DEFAULT-NEXT: andi a4, a3, 1024
-; DEFAULT-NEXT: xori a3, a4, 1024
+; DEFAULT-NEXT: addi a2, a3, -1
+; DEFAULT-NEXT: andi a4, a2, 1024
+; DEFAULT-NEXT: xori a2, a4, 1024
; DEFAULT-NEXT: mv a5, a0
-; DEFAULT-NEXT: mv a6, a3
+; DEFAULT-NEXT: mv a6, a2
; DEFAULT-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; DEFAULT-NEXT: .LBB4_3: # %vector.body
; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1
; DEFAULT-NEXT: vl1re32.v v8, (a5)
; DEFAULT-NEXT: vfadd.vf v8, v8, fa0
; DEFAULT-NEXT: vs1r.v v8, (a5)
-; DEFAULT-NEXT: sub a6, a6, a2
+; DEFAULT-NEXT: sub a6, a6, a3
; DEFAULT-NEXT: add a5, a5, a1
; DEFAULT-NEXT: bnez a6, .LBB4_3
; DEFAULT-NEXT: # %bb.4: # %middle.block
; DEFAULT-NEXT: beqz a4, .LBB4_7
; DEFAULT-NEXT: .LBB4_5: # %for.body.preheader
-; DEFAULT-NEXT: slli a1, a3, 2
+; DEFAULT-NEXT: slli a1, a2, 2
; DEFAULT-NEXT: add a1, a0, a1
; DEFAULT-NEXT: lui a2, 1
; DEFAULT-NEXT: add a0, a0, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/expandload.ll b/llvm/test/CodeGen/RISCV/rvv/expandload.ll
index fac033e982e10e..63da328f811d88 100644
--- a/llvm/test/CodeGen/RISCV/rvv/expandload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/expandload.ll
@@ -269,19 +269,19 @@ define <256 x i8> @test_expandload_v256i8(ptr %base, <256 x i1> %mask, <256 x i8
; CHECK-RV32-NEXT: addi a0, sp, 16
; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, ta, mu
-; CHECK-RV32-NEXT: viota.m v16, v0
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 4
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-RV32-NEXT: viota.m v24, v0
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: li a1, 24
; CHECK-RV32-NEXT: mul a0, a0, a1
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-RV32-NEXT: vrgather.vv v8, v24, v16, v0.t
+; CHECK-RV32-NEXT: csrr a0, vlenb
+; CHECK-RV32-NEXT: slli a0, a0, 4
+; CHECK-RV32-NEXT: add a0, sp, a0
+; CHECK-RV32-NEXT: addi a0, a0, 16
+; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: li a1, 24
; CHECK-RV32-NEXT: mul a0, a0, a1
@@ -340,7 +340,10 @@ define <256 x i8> @test_expandload_v256i8(ptr %base, <256 x i1> %mask, <256 x i8
; CHECK-RV64-NEXT: li a2, 128
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-RV64-NEXT: vle8.v v8, (a1)
-; CHECK-RV64-NEXT: addi a1, sp, 16
+; CHECK-RV64-NEXT: csrr a1, vlenb
+; CHECK-RV64-NEXT: slli a1, a1, 3
+; CHECK-RV64-NEXT: add a1, sp, a1
+; CHECK-RV64-NEXT: addi a1, a1, 16
; CHECK-RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-RV64-NEXT: vslidedown.vi v9, v0, 1
@@ -349,12 +352,12 @@ define <256 x i8> @test_expandload_v256i8(ptr %base, <256 x i1> %mask, <256 x i8
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-RV64-NEXT: vcpop.m a4, v0
; CHECK-RV64-NEXT: vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-RV64-NEXT: vle8.v v24, (a0)
+; CHECK-RV64-NEXT: vle8.v v8, (a0)
; CHECK-RV64-NEXT: csrr a4, vlenb
; CHECK-RV64-NEXT: slli a4, a4, 4
; CHECK-RV64-NEXT: add a4, sp, a4
; CHECK-RV64-NEXT: addi a4, a4, 16
-; CHECK-RV64-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
+; CHECK-RV64-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma
; CHECK-RV64-NEXT: vcpop.m a4, v7
; CHECK-RV64-NEXT: cpop a3, a3
@@ -363,25 +366,22 @@ define <256 x i8> @test_expandload_v256i8(ptr %base, <256 x i1> %mask, <256 x i8
; CHECK-RV64-NEXT: add a0, a0, a1
; CHECK-RV64-NEXT: vsetvli zero, a4, e8, m8, ta, ma
; CHECK-RV64-NEXT: vle8.v v8, (a0)
-; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: slli a0, a0, 3
-; CHECK-RV64-NEXT: add a0, sp, a0
-; CHECK-RV64-NEXT: addi a0, a0, 16
+; CHECK-RV64-NEXT: addi a0, sp, 16
; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, ta, mu
-; CHECK-RV64-NEXT: viota.m v16, v0
-; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: slli a0, a0, 4
-; CHECK-RV64-NEXT: add a0, sp, a0
-; CHECK-RV64-NEXT: addi a0, a0, 16
-; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-RV64-NEXT: viota.m v24, v0
; CHECK-RV64-NEXT: csrr a0, vlenb
; CHECK-RV64-NEXT: li a1, 24
; CHECK-RV64-NEXT: mul a0, a0, a1
; CHECK-RV64-NEXT: add a0, sp, a0
; CHECK-RV64-NEXT: addi a0, a0, 16
; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-RV64-NEXT: vrgather.vv v8, v24, v16, v0.t
+; CHECK-RV64-NEXT: csrr a0, vlenb
+; CHECK-RV64-NEXT: slli a0, a0, 4
+; CHECK-RV64-NEXT: add a0, sp, a0
+; CHECK-RV64-NEXT: addi a0, a0, 16
+; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t
; CHECK-RV64-NEXT: csrr a0, vlenb
; CHECK-RV64-NEXT: li a1, 24
; CHECK-RV64-NEXT: mul a0, a0, a1
@@ -399,16 +399,15 @@ define <256 x i8> @test_expandload_v256i8(ptr %base, <256 x i1> %mask, <256 x i8
; CHECK-RV64-NEXT: slli a0, a0, 3
; CHECK-RV64-NEXT: add a0, sp, a0
; CHECK-RV64-NEXT: addi a0, a0, 16
+; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-RV64-NEXT: addi a0, sp, 16
; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-RV64-NEXT: csrr a0, vlenb
; CHECK-RV64-NEXT: slli a0, a0, 4
; CHECK-RV64-NEXT: add a0, sp, a0
; CHECK-RV64-NEXT: addi a0, a0, 16
-; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-RV64-NEXT: addi a0, sp, 16
; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-RV64-NEXT: vrgather.vv v8, v24, v16, v0.t
-; CHECK-RV64-NEXT: vmv.v.v v16, v8
+; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t
; CHECK-RV64-NEXT: csrr a0, vlenb
; CHECK-RV64-NEXT: li a1, 24
; CHECK-RV64-NEXT: mul a0, a0, a1
@@ -662,31 +661,25 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x
; CHECK-RV32-NEXT: addi sp, sp, -16
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV32-NEXT: csrr a1, vlenb
-; CHECK-RV32-NEXT: li a2, 40
-; CHECK-RV32-NEXT: mul a1, a1, a2
+; CHECK-RV32-NEXT: slli a1, a1, 5
; CHECK-RV32-NEXT: sub sp, sp, a1
-; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
+; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; CHECK-RV32-NEXT: csrr a1, vlenb
-; CHECK-RV32-NEXT: li a2, 24
-; CHECK-RV32-NEXT: mul a1, a1, a2
+; CHECK-RV32-NEXT: slli a1, a1, 4
; CHECK-RV32-NEXT: add a1, sp, a1
; CHECK-RV32-NEXT: addi a1, a1, 16
; CHECK-RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; CHECK-RV32-NEXT: csrr a1, vlenb
-; CHECK-RV32-NEXT: slli a1, a1, 5
-; CHECK-RV32-NEXT: add a1, sp, a1
-; CHECK-RV32-NEXT: addi a1, a1, 16
-; CHECK-RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-RV32-NEXT: li a1, 64
; CHECK-RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-RV32-NEXT: vcpop.m a2, v0
; CHECK-RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
-; CHECK-RV32-NEXT: vle16.v v8, (a0)
+; CHECK-RV32-NEXT: vle16.v v24, (a0)
; CHECK-RV32-NEXT: csrr a2, vlenb
-; CHECK-RV32-NEXT: slli a2, a2, 4
+; CHECK-RV32-NEXT: li a3, 24
+; CHECK-RV32-NEXT: mul a2, a2, a3
; CHECK-RV32-NEXT: add a2, sp, a2
; CHECK-RV32-NEXT: addi a2, a2, 16
-; CHECK-RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; CHECK-RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; CHECK-RV32-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; CHECK-RV32-NEXT: vslidedown.vi v7, v0, 8
; CHECK-RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
@@ -702,67 +695,40 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x
; CHECK-RV32-NEXT: slli a3, a3, 1
; CHECK-RV32-NEXT: add a0, a0, a3
; CHECK-RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma
-; CHECK-RV32-NEXT: vle16.v v8, (a0)
+; CHECK-RV32-NEXT: vle16.v v16, (a0)
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 3
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-RV32-NEXT: vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-RV32-NEXT: viota.m v8, v0
-; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-RV32-NEXT: viota.m v24, v0
; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 4
+; CHECK-RV32-NEXT: li a1, 24
+; CHECK-RV32-NEXT: mul a0, a0, a1
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 5
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 5
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
+; CHECK-RV32-NEXT: addi a0, sp, 16
; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-RV32-NEXT: viota.m v16, v7
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 4
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-RV32-NEXT: viota.m v8, v7
; CHECK-RV32-NEXT: vmv1r.v v0, v7
; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 3
+; CHECK-RV32-NEXT: slli a0, a0, 4
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 4
+; CHECK-RV32-NEXT: slli a0, a0, 3
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: li a1, 24
-; CHECK-RV32-NEXT: mul a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
+; CHECK-RV32-NEXT: vrgather.vv v16, v24, v8, v0.t
+; CHECK-RV32-NEXT: addi a0, sp, 16
; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t
-; CHECK-RV32-NEXT: vmv.v.v v16, v8
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 5
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: li a1, 40
-; CHECK-RV32-NEXT: mul a0, a0, a1
; CHECK-RV32-NEXT: add sp, sp, a0
; CHECK-RV32-NEXT: .cfi_def_cfa sp, 16
; CHECK-RV32-NEXT: addi sp, sp, 16
@@ -774,12 +740,11 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x
; CHECK-RV64-NEXT: addi sp, sp, -16
; CHECK-RV64-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV64-NEXT: csrr a1, vlenb
-; CHECK-RV64-NEXT: li a2, 40
-; CHECK-RV64-NEXT: mul a1, a1, a2
+; CHECK-RV64-NEXT: slli a1, a1, 5
; CHECK-RV64-NEXT: sub sp, sp, a1
-; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
+; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; CHECK-RV64-NEXT: csrr a1, vlenb
-; CHECK-RV64-NEXT: slli a1, a1, 5
+; CHECK-RV64-NEXT: slli a1, a1, 4
; CHECK-RV64-NEXT: add a1, sp, a1
; CHECK-RV64-NEXT: addi a1, a1, 16
; CHECK-RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -787,13 +752,13 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-RV64-NEXT: vcpop.m a2, v0
; CHECK-RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
-; CHECK-RV64-NEXT: vle16.v v16, (a0)
+; CHECK-RV64-NEXT: vle16.v v24, (a0)
; CHECK-RV64-NEXT: csrr a2, vlenb
; CHECK-RV64-NEXT: li a3, 24
; CHECK-RV64-NEXT: mul a2, a2, a3
; CHECK-RV64-NEXT: add a2, sp, a2
; CHECK-RV64-NEXT: addi a2, a2, 16
-; CHECK-RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; CHECK-RV64-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; CHECK-RV64-NEXT: vslidedown.vi v7, v0, 8
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma
@@ -806,27 +771,20 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x
; CHECK-RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma
; CHECK-RV64-NEXT: vle16.v v16, (a0)
; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: slli a0, a0, 4
+; CHECK-RV64-NEXT: slli a0, a0, 3
; CHECK-RV64-NEXT: add a0, sp, a0
; CHECK-RV64-NEXT: addi a0, a0, 16
; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-RV64-NEXT: vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-RV64-NEXT: viota.m v16, v0
-; CHECK-RV64-NEXT: addi a0, sp, 16
-; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-RV64-NEXT: viota.m v24, v0
; CHECK-RV64-NEXT: csrr a0, vlenb
; CHECK-RV64-NEXT: li a1, 24
; CHECK-RV64-NEXT: mul a0, a0, a1
; CHECK-RV64-NEXT: add a0, sp, a0
; CHECK-RV64-NEXT: addi a0, a0, 16
; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-RV64-NEXT: addi a0, sp, 16
-; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t
-; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: slli a0, a0, 3
-; CHECK-RV64-NEXT: add a0, sp, a0
-; CHECK-RV64-NEXT: addi a0, a0, 16
+; CHECK-RV64-NEXT: addi a0, sp, 16
; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-RV64-NEXT: viota.m v16, v7
; CHECK-RV64-NEXT: csrr a0, vlenb
@@ -837,12 +795,12 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x
; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-RV64-NEXT: vmv1r.v v0, v7
; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: slli a0, a0, 5
+; CHECK-RV64-NEXT: slli a0, a0, 4
; CHECK-RV64-NEXT: add a0, sp, a0
; CHECK-RV64-NEXT: addi a0, a0, 16
; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: slli a0, a0, 4
+; CHECK-RV64-NEXT: slli a0, a0, 3
; CHECK-RV64-NEXT: add a0, sp, a0
; CHECK-RV64-NEXT: addi a0, a0, 16
; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
@@ -853,14 +811,10 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x
; CHECK-RV64-NEXT: addi a0, a0, 16
; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t
-; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: slli a0, a0, 3
-; CHECK-RV64-NEXT: add a0, sp, a0
-; CHECK-RV64-NEXT: addi a0, a0, 16
+; CHECK-RV64-NEXT: addi a0, sp, 16
; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: li a1, 40
-; CHECK-RV64-NEXT: mul a0, a0, a1
+; CHECK-RV64-NEXT: slli a0, a0, 5
; CHECK-RV64-NEXT: add sp, sp, a0
; CHECK-RV64-NEXT: .cfi_def_cfa sp, 16
; CHECK-RV64-NEXT: addi sp, sp, 16
@@ -1069,12 +1023,11 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32>
; CHECK-RV32-NEXT: addi sp, sp, -16
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV32-NEXT: csrr a1, vlenb
-; CHECK-RV32-NEXT: li a2, 40
-; CHECK-RV32-NEXT: mul a1, a1, a2
+; CHECK-RV32-NEXT: slli a1, a1, 5
; CHECK-RV32-NEXT: sub sp, sp, a1
-; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
+; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; CHECK-RV32-NEXT: csrr a1, vlenb
-; CHECK-RV32-NEXT: slli a1, a1, 5
+; CHECK-RV32-NEXT: slli a1, a1, 4
; CHECK-RV32-NEXT: add a1, sp, a1
; CHECK-RV32-NEXT: addi a1, a1, 16
; CHECK-RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -1082,13 +1035,13 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32>
; CHECK-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-RV32-NEXT: vcpop.m a2, v0
; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-RV32-NEXT: vle32.v v16, (a0)
+; CHECK-RV32-NEXT: vle32.v v24, (a0)
; CHECK-RV32-NEXT: csrr a2, vlenb
; CHECK-RV32-NEXT: li a3, 24
; CHECK-RV32-NEXT: mul a2, a2, a3
; CHECK-RV32-NEXT: add a2, sp, a2
; CHECK-RV32-NEXT: addi a2, a2, 16
-; CHECK-RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-RV32-NEXT: vslidedown.vi v7, v0, 4
; CHECK-RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
@@ -1100,27 +1053,20 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32>
; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-RV32-NEXT: vle32.v v16, (a0)
; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 4
+; CHECK-RV32-NEXT: slli a0, a0, 3
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-RV32-NEXT: viota.m v16, v0
-; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-RV32-NEXT: viota.m v24, v0
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: li a1, 24
; CHECK-RV32-NEXT: mul a0, a0, a1
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 3
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
+; CHECK-RV32-NEXT: addi a0, sp, 16
; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-RV32-NEXT: viota.m v16, v7
; CHECK-RV32-NEXT: csrr a0, vlenb
@@ -1131,12 +1077,12 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32>
; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-RV32-NEXT: vmv1r.v v0, v7
; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 5
+; CHECK-RV32-NEXT: slli a0, a0, 4
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 4
+; CHECK-RV32-NEXT: slli a0, a0, 3
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
@@ -1147,14 +1093,10 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32>
; CHECK-RV32-NEXT: addi a0, a0, 16
; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-RV32-NEXT: vrgather.vv v16, v24, v8, v0.t
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 3
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
+; CHECK-RV32-NEXT: addi a0, sp, 16
; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: li a1, 40
-; CHECK-RV32-NEXT: mul a0, a0, a1
+; CHECK-RV32-NEXT: slli a0, a0, 5
; CHECK-RV32-NEXT: add sp, sp, a0
; CHECK-RV32-NEXT: .cfi_def_cfa sp, 16
; CHECK-RV32-NEXT: addi sp, sp, 16
@@ -1166,12 +1108,11 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32>
; CHECK-RV64-NEXT: addi sp, sp, -16
; CHECK-RV64-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV64-NEXT: csrr a1, vlenb
-; CHECK-RV64-NEXT: li a2, 40
-; CHECK-RV64-NEXT: mul a1, a1, a2
+; CHECK-RV64-NEXT: slli a1, a1, 5
; CHECK-RV64-NEXT: sub sp, sp, a1
-; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
+; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; CHECK-RV64-NEXT: csrr a1, vlenb
-; CHECK-RV64-NEXT: slli a1, a1, 5
+; CHECK-RV64-NEXT: slli a1, a1, 4
; CHECK-RV64-NEXT: add a1, sp, a1
; CHECK-RV64-NEXT: addi a1, a1, 16
; CHECK-RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -1179,13 +1120,13 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32>
; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-RV64-NEXT: vcpop.m a2, v0
; CHECK-RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-RV64-NEXT: vle32.v v16, (a0)
+; CHECK-RV64-NEXT: vle32.v v24, (a0)
; CHECK-RV64-NEXT: csrr a2, vlenb
; CHECK-RV64-NEXT: li a3, 24
; CHECK-RV64-NEXT: mul a2, a2, a3
; CHECK-RV64-NEXT: add a2, sp, a2
; CHECK-RV64-NEXT: addi a2, a2, 16
-; CHECK-RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-RV64-NEXT: vslidedown.vi v7, v0, 4
; CHECK-RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
@@ -1197,27 +1138,20 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32>
; CHECK-RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; CHECK-RV64-NEXT: vle32.v v16, (a0)
; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: slli a0, a0, 4
+; CHECK-RV64-NEXT: slli a0, a0, 3
; CHECK-RV64-NEXT: add a0, sp, a0
; CHECK-RV64-NEXT: addi a0, a0, 16
; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-RV64-NEXT: viota.m v16, v0
-; CHECK-RV64-NEXT: addi a0, sp, 16
-; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-RV64-NEXT: viota.m v24, v0
; CHECK-RV64-NEXT: csrr a0, vlenb
; CHECK-RV64-NEXT: li a1, 24
; CHECK-RV64-NEXT: mul a0, a0, a1
; CHECK-RV64-NEXT: add a0, sp, a0
; CHECK-RV64-NEXT: addi a0, a0, 16
; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-RV64-NEXT: addi a0, sp, 16
-; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t
-; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: slli a0, a0, 3
-; CHECK-RV64-NEXT: add a0, sp, a0
-; CHECK-RV64-NEXT: addi a0, a0, 16
+; CHECK-RV64-NEXT: addi a0, sp, 16
; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-RV64-NEXT: viota.m v16, v7
; CHECK-RV64-NEXT: csrr a0, vlenb
@@ -1228,12 +1162,12 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32>
; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-RV64-NEXT: vmv1r.v v0, v7
; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: slli a0, a0, 5
+; CHECK-RV64-NEXT: slli a0, a0, 4
; CHECK-RV64-NEXT: add a0, sp, a0
; CHECK-RV64-NEXT: addi a0, a0, 16
; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: slli a0, a0, 4
+; CHECK-RV64-NEXT: slli a0, a0, 3
; CHECK-RV64-NEXT: add a0, sp, a0
; CHECK-RV64-NEXT: addi a0, a0, 16
; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
@@ -1244,14 +1178,10 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32>
; CHECK-RV64-NEXT: addi a0, a0, 16
; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t
-; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: slli a0, a0, 3
-; CHECK-RV64-NEXT: add a0, sp, a0
-; CHECK-RV64-NEXT: addi a0, a0, 16
+; CHECK-RV64-NEXT: addi a0, sp, 16
; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: li a1, 40
-; CHECK-RV64-NEXT: mul a0, a0, a1
+; CHECK-RV64-NEXT: slli a0, a0, 5
; CHECK-RV64-NEXT: add sp, sp, a0
; CHECK-RV64-NEXT: .cfi_def_cfa sp, 16
; CHECK-RV64-NEXT: addi sp, sp, 16
@@ -1420,25 +1350,24 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64>
; CHECK-RV32-NEXT: addi sp, sp, -16
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV32-NEXT: csrr a1, vlenb
-; CHECK-RV32-NEXT: li a2, 40
-; CHECK-RV32-NEXT: mul a1, a1, a2
+; CHECK-RV32-NEXT: slli a1, a1, 5
; CHECK-RV32-NEXT: sub sp, sp, a1
-; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
+; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; CHECK-RV32-NEXT: csrr a1, vlenb
-; CHECK-RV32-NEXT: slli a1, a1, 5
+; CHECK-RV32-NEXT: slli a1, a1, 4
; CHECK-RV32-NEXT: add a1, sp, a1
; CHECK-RV32-NEXT: addi a1, a1, 16
; CHECK-RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-RV32-NEXT: vcpop.m a1, v0
; CHECK-RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-RV32-NEXT: vle64.v v16, (a0)
+; CHECK-RV32-NEXT: vle64.v v24, (a0)
; CHECK-RV32-NEXT: csrr a1, vlenb
; CHECK-RV32-NEXT: li a2, 24
; CHECK-RV32-NEXT: mul a1, a1, a2
; CHECK-RV32-NEXT: add a1, sp, a1
; CHECK-RV32-NEXT: addi a1, a1, 16
-; CHECK-RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-RV32-NEXT: vmv.x.s a1, v0
; CHECK-RV32-NEXT: zext.h a1, a1
; CHECK-RV32-NEXT: cpop a1, a1
@@ -1451,27 +1380,20 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64>
; CHECK-RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-RV32-NEXT: vle64.v v16, (a0)
; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 4
+; CHECK-RV32-NEXT: slli a0, a0, 3
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
-; CHECK-RV32-NEXT: viota.m v16, v0
-; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-RV32-NEXT: viota.m v24, v0
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: li a1, 24
; CHECK-RV32-NEXT: mul a0, a0, a1
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 3
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
+; CHECK-RV32-NEXT: addi a0, sp, 16
; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-RV32-NEXT: viota.m v16, v7
; CHECK-RV32-NEXT: csrr a0, vlenb
@@ -1482,12 +1404,12 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64>
; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-RV32-NEXT: vmv1r.v v0, v7
; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 5
+; CHECK-RV32-NEXT: slli a0, a0, 4
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 4
+; CHECK-RV32-NEXT: slli a0, a0, 3
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
@@ -1498,14 +1420,10 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64>
; CHECK-RV32-NEXT: addi a0, a0, 16
; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-RV32-NEXT: vrgather.vv v16, v24, v8, v0.t
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 3
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
+; CHECK-RV32-NEXT: addi a0, sp, 16
; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: li a1, 40
-; CHECK-RV32-NEXT: mul a0, a0, a1
+; CHECK-RV32-NEXT: slli a0, a0, 5
; CHECK-RV32-NEXT: add sp, sp, a0
; CHECK-RV32-NEXT: .cfi_def_cfa sp, 16
; CHECK-RV32-NEXT: addi sp, sp, 16
@@ -1517,25 +1435,24 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64>
; CHECK-RV64-NEXT: addi sp, sp, -16
; CHECK-RV64-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV64-NEXT: csrr a1, vlenb
-; CHECK-RV64-NEXT: li a2, 40
-; CHECK-RV64-NEXT: mul a1, a1, a2
+; CHECK-RV64-NEXT: slli a1, a1, 5
; CHECK-RV64-NEXT: sub sp, sp, a1
-; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
+; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; CHECK-RV64-NEXT: csrr a1, vlenb
-; CHECK-RV64-NEXT: slli a1, a1, 5
+; CHECK-RV64-NEXT: slli a1, a1, 4
; CHECK-RV64-NEXT: add a1, sp, a1
; CHECK-RV64-NEXT: addi a1, a1, 16
; CHECK-RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-RV64-NEXT: vcpop.m a1, v0
; CHECK-RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-RV64-NEXT: vle64.v v16, (a0)
+; CHECK-RV64-NEXT: vle64.v v24, (a0)
; CHECK-RV64-NEXT: csrr a1, vlenb
; CHECK-RV64-NEXT: li a2, 24
; CHECK-RV64-NEXT: mul a1, a1, a2
; CHECK-RV64-NEXT: add a1, sp, a1
; CHECK-RV64-NEXT: addi a1, a1, 16
-; CHECK-RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-RV64-NEXT: vmv.x.s a1, v0
; CHECK-RV64-NEXT: zext.h a1, a1
; CHECK-RV64-NEXT: cpopw a1, a1
@@ -1548,27 +1465,20 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64>
; CHECK-RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-RV64-NEXT: vle64.v v16, (a0)
; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: slli a0, a0, 4
+; CHECK-RV64-NEXT: slli a0, a0, 3
; CHECK-RV64-NEXT: add a0, sp, a0
; CHECK-RV64-NEXT: addi a0, a0, 16
; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
-; CHECK-RV64-NEXT: viota.m v16, v0
-; CHECK-RV64-NEXT: addi a0, sp, 16
-; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-RV64-NEXT: viota.m v24, v0
; CHECK-RV64-NEXT: csrr a0, vlenb
; CHECK-RV64-NEXT: li a1, 24
; CHECK-RV64-NEXT: mul a0, a0, a1
; CHECK-RV64-NEXT: add a0, sp, a0
; CHECK-RV64-NEXT: addi a0, a0, 16
; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-RV64-NEXT: addi a0, sp, 16
-; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t
-; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: slli a0, a0, 3
-; CHECK-RV64-NEXT: add a0, sp, a0
-; CHECK-RV64-NEXT: addi a0, a0, 16
+; CHECK-RV64-NEXT: addi a0, sp, 16
; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-RV64-NEXT: viota.m v16, v7
; CHECK-RV64-NEXT: csrr a0, vlenb
@@ -1579,12 +1489,12 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64>
; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-RV64-NEXT: vmv1r.v v0, v7
; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: slli a0, a0, 5
+; CHECK-RV64-NEXT: slli a0, a0, 4
; CHECK-RV64-NEXT: add a0, sp, a0
; CHECK-RV64-NEXT: addi a0, a0, 16
; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: slli a0, a0, 4
+; CHECK-RV64-NEXT: slli a0, a0, 3
; CHECK-RV64-NEXT: add a0, sp, a0
; CHECK-RV64-NEXT: addi a0, a0, 16
; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
@@ -1595,14 +1505,10 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64>
; CHECK-RV64-NEXT: addi a0, a0, 16
; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t
-; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: slli a0, a0, 3
-; CHECK-RV64-NEXT: add a0, sp, a0
-; CHECK-RV64-NEXT: addi a0, a0, 16
+; CHECK-RV64-NEXT: addi a0, sp, 16
; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-RV64-NEXT: csrr a0, vlenb
-; CHECK-RV64-NEXT: li a1, 40
-; CHECK-RV64-NEXT: mul a0, a0, a1
+; CHECK-RV64-NEXT: slli a0, a0, 5
; CHECK-RV64-NEXT: add sp, sp, a0
; CHECK-RV64-NEXT: .cfi_def_cfa sp, 16
; CHECK-RV64-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
index 29f437829f3be0..087e55f904e8f9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll
@@ -261,9 +261,9 @@ declare <16 x half> @llvm.vp.ceil.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_ceil_v16f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
-; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
@@ -561,9 +561,9 @@ declare <4 x double> @llvm.vp.ceil.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v4f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1)
-; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
@@ -605,9 +605,9 @@ declare <8 x double> @llvm.vp.ceil.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v8f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1)
-; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
@@ -649,9 +649,9 @@ declare <15 x double> @llvm.vp.ceil.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v15f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -693,9 +693,9 @@ declare <16 x double> @llvm.vp.ceil.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v16f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -752,9 +752,9 @@ define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2)
-; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
index a8ae3389fb2a56..35a1822337f4df 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
@@ -1634,13 +1634,13 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
; RV32-NEXT: slli a2, a2, 5
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
-; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
-; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v24, v8, v24, v0.t
+; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vand.vv v24, v24, v8, v0.t
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: li a3, 40
; RV32-NEXT: mul a2, a2, a3
@@ -1649,15 +1649,32 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vsub.vv v8, v8, v24, v0.t
; RV32-NEXT: vand.vv v24, v8, v16, v0.t
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: li a3, 40
+; RV32-NEXT: mul a2, a2, a3
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: addi a2, a2, 48
+; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: vadd.vv v16, v24, v8, v0.t
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: li a3, 40
+; RV32-NEXT: mul a2, a2, a3
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: addi a2, a2, 48
+; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: slli a2, a2, 3
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: addi a2, a2, 48
+; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; RV32-NEXT: addi a2, sp, 24
; RV32-NEXT: addi a3, sp, 16
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v24, (a2), zero
+; RV32-NEXT: vlse64.v v16, (a2), zero
; RV32-NEXT: addi a2, sp, 48
-; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; RV32-NEXT: vlse64.v v8, (a3), zero
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: li a3, 40
@@ -1665,11 +1682,15 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; RV32-NEXT: vmv8r.v v8, v16
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: slli a2, a2, 3
+; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: addi a2, a2, 48
+; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v16, v16, 4, v0.t
-; RV32-NEXT: vadd.vv v16, v8, v16, v0.t
-; RV32-NEXT: vand.vv v16, v16, v24, v0.t
+; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t
+; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: li a2, 40
; RV32-NEXT: mul a1, a1, a2
@@ -1696,23 +1717,23 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
; RV32-NEXT: addi a2, a2, 48
; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t
+; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v8, v8, v24, v0.t
+; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vand.vv v8, v24, v8, v0.t
; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v16, v8, v24, v0.t
+; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vand.vv v24, v8, v16, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v24, v0.t
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
+; RV32-NEXT: vand.vv v8, v8, v16, v0.t
+; RV32-NEXT: vadd.vv v8, v24, v8, v0.t
; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
; RV32-NEXT: addi a0, sp, 48
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
index f86fc509983534..49200fb7fe7faf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll
@@ -261,9 +261,9 @@ declare <16 x half> @llvm.vp.floor.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_v16f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
-; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
@@ -561,9 +561,9 @@ declare <4 x double> @llvm.vp.floor.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v4f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1)
-; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
@@ -605,9 +605,9 @@ declare <8 x double> @llvm.vp.floor.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v8f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1)
-; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
@@ -649,9 +649,9 @@ declare <15 x double> @llvm.vp.floor.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v15f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -693,9 +693,9 @@ declare <16 x double> @llvm.vp.floor.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v16f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -752,9 +752,9 @@ define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2)
-; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll
index cefb246f3821a3..77d70647da1bee 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll
@@ -610,56 +610,72 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
+; CHECK-NEXT: vmv1r.v v25, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 24
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v24, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv1r.v v6, v0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a1, a0, 128
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v16, (a1)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vle64.v v16, (a0)
; CHECK-NEXT: li a1, 16
+; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v7, v0, 2
-; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: bltu a2, a1, .LBB24_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB24_2:
-; CHECK-NEXT: vmv1r.v v0, v6
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: li a3, 24
+; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v26
+; CHECK-NEXT: vmv8r.v v8, v16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
+; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv1r.v v0, v6
-; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v26
-; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: vmv1r.v v0, v6
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
@@ -669,7 +685,7 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
@@ -677,8 +693,7 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
@@ -688,12 +703,13 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
+; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmax.vv v16, v16, v8, v0.t
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmax.vv v16, v8, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
@@ -718,18 +734,21 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
+; CHECK-NEXT: vle64.v v16, (a1)
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: bltu a2, a1, .LBB25_2
; CHECK-NEXT: # %bb.1:
@@ -738,36 +757,25 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v7, v24, v24
-; CHECK-NEXT: vmv8r.v v16, v24
-; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0
; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmax.vv v8, v8, v24
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT: vfmax.vv v8, v8, v16
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: addi a0, a2, -16
; CHECK-NEXT: sltu a1, a2, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
-; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v16, v16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
@@ -776,10 +784,7 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
; CHECK-NEXT: vfmax.vv v16, v16, v24
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll
index 35bd8b7c17e6de..fc331f6c909cbf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll
@@ -610,56 +610,72 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
+; CHECK-NEXT: vmv1r.v v25, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 24
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v24, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv1r.v v6, v0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a1, a0, 128
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v16, (a1)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vle64.v v16, (a0)
; CHECK-NEXT: li a1, 16
+; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v7, v0, 2
-; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: bltu a2, a1, .LBB24_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB24_2:
-; CHECK-NEXT: vmv1r.v v0, v6
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: li a3, 24
+; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v26
+; CHECK-NEXT: vmv8r.v v8, v16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
+; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv1r.v v0, v6
-; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v26
-; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: vmv1r.v v0, v6
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
@@ -669,7 +685,7 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
@@ -677,8 +693,7 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
@@ -688,12 +703,13 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
+; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmin.vv v16, v16, v8, v0.t
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmin.vv v16, v8, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
@@ -718,18 +734,21 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
+; CHECK-NEXT: vle64.v v16, (a1)
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: bltu a2, a1, .LBB25_2
; CHECK-NEXT: # %bb.1:
@@ -738,36 +757,25 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v7, v24, v24
-; CHECK-NEXT: vmv8r.v v16, v24
-; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0
; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmin.vv v8, v8, v24
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT: vfmin.vv v8, v8, v16
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: addi a0, a2, -16
; CHECK-NEXT: sltu a1, a2, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
-; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v16, v16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
@@ -776,10 +784,7 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
; CHECK-NEXT: vfmin.vv v16, v16, v24
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll
index 86abfb771162fc..a68dc11f3d21e7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll
@@ -674,10 +674,10 @@ define <16 x i64> @fshr_v16i64(<16 x i64> %a, <16 x i64> %b, <16 x i64> %c, <16
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: li a0, 63
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vand.vx v8, v24, a0, v0.t
@@ -710,11 +710,11 @@ define <16 x i64> @fshl_v16i64(<16 x i64> %a, <16 x i64> %b, <16 x i64> %c, <16
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv8r.v v16, v8
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv8r.v v16, v8
; CHECK-NEXT: li a0, 63
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vand.vx v8, v24, a0, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
index 5581754b0721a5..d31579e45683b7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
@@ -283,14 +283,14 @@ define void @insert_v8i32_v2i32_0(ptr %vp, ptr %svp) {
define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) {
; VLA-LABEL: insert_v8i32_v2i32_2:
; VLA: # %bb.0:
-; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; VLA-NEXT: vle32.v v8, (a0)
; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; VLA-NEXT: vle32.v v10, (a1)
+; VLA-NEXT: vle32.v v8, (a1)
+; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; VLA-NEXT: vle32.v v10, (a0)
; VLA-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; VLA-NEXT: vslideup.vi v8, v10, 2
+; VLA-NEXT: vslideup.vi v10, v8, 2
; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; VLA-NEXT: vse32.v v8, (a0)
+; VLA-NEXT: vse32.v v10, (a0)
; VLA-NEXT: ret
;
; VLS-LABEL: insert_v8i32_v2i32_2:
@@ -312,13 +312,12 @@ define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) {
define void @insert_v8i32_v2i32_6(ptr %vp, ptr %svp) {
; VLA-LABEL: insert_v8i32_v2i32_6:
; VLA: # %bb.0:
-; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; VLA-NEXT: vle32.v v8, (a0)
; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; VLA-NEXT: vle32.v v10, (a1)
+; VLA-NEXT: vle32.v v8, (a1)
; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; VLA-NEXT: vslideup.vi v8, v10, 6
-; VLA-NEXT: vse32.v v8, (a0)
+; VLA-NEXT: vle32.v v10, (a0)
+; VLA-NEXT: vslideup.vi v10, v8, 6
+; VLA-NEXT: vse32.v v10, (a0)
; VLA-NEXT: ret
;
; VLS-LABEL: insert_v8i32_v2i32_6:
@@ -828,9 +827,9 @@ define void @insert_v2i64_nxv16i64_hi(ptr %psv, ptr %out) {
; RV32VLS-NEXT: vl1re64.v v8, (a0)
; RV32VLS-NEXT: addi a0, sp, 128
; RV32VLS-NEXT: vs1r.v v8, (a0)
+; RV32VLS-NEXT: addi a0, sp, 192
+; RV32VLS-NEXT: vl8re64.v v8, (a0)
; RV32VLS-NEXT: addi a0, sp, 64
-; RV32VLS-NEXT: addi a2, sp, 192
-; RV32VLS-NEXT: vl8re64.v v8, (a2)
; RV32VLS-NEXT: vl8re64.v v16, (a0)
; RV32VLS-NEXT: addi a0, a1, 128
; RV32VLS-NEXT: vs8r.v v8, (a0)
@@ -860,9 +859,9 @@ define void @insert_v2i64_nxv16i64_hi(ptr %psv, ptr %out) {
; RV64VLS-NEXT: vl1re64.v v8, (a0)
; RV64VLS-NEXT: addi a0, sp, 128
; RV64VLS-NEXT: vs1r.v v8, (a0)
+; RV64VLS-NEXT: addi a0, sp, 192
+; RV64VLS-NEXT: vl8re64.v v8, (a0)
; RV64VLS-NEXT: addi a0, sp, 64
-; RV64VLS-NEXT: addi a2, sp, 192
-; RV64VLS-NEXT: vl8re64.v v8, (a2)
; RV64VLS-NEXT: vl8re64.v v16, (a0)
; RV64VLS-NEXT: addi a0, a1, 128
; RV64VLS-NEXT: vs8r.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index 49644fa4a9fb87..aaed16097d15c2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -1091,9 +1091,10 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16,
define <8 x i32> @prefix_overwrite(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: prefix_overwrite:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
-; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; CHECK-NEXT: vmv.s.x v10, a1
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 1
; CHECK-NEXT: vmv.s.x v10, a2
; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 1e77b3710928d2..2f73e3c7a2be6c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -157,18 +157,20 @@ define <8 x i64> @vrgather_permute_shuffle_uv_v8i64(<8 x i64> %x) {
define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) {
; RV32-LABEL: vrgather_shuffle_vv_v8i64:
; RV32: # %bb.0:
+; RV32-NEXT: lui a0, %hi(.LCPI11_0)
+; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vmv.v.i v16, 2
-; RV32-NEXT: li a0, 5
-; RV32-NEXT: lui a1, %hi(.LCPI11_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI11_0)
-; RV32-NEXT: vle16.v v20, (a1)
-; RV32-NEXT: vslide1down.vx v21, v16, a0
+; RV32-NEXT: vle16.v v20, (a0)
+; RV32-NEXT: vmv.v.i v21, 2
+; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV32-NEXT: vrgatherei16.vv v16, v8, v20
; RV32-NEXT: li a0, 164
; RV32-NEXT: vmv.s.x v0, a0
+; RV32-NEXT: li a0, 5
+; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32-NEXT: vslide1down.vx v8, v21, a0
; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT: vrgatherei16.vv v16, v8, v20
-; RV32-NEXT: vrgatherei16.vv v16, v12, v21, v0.t
+; RV32-NEXT: vrgatherei16.vv v16, v12, v8, v0.t
; RV32-NEXT: vmv.v.v v8, v16
; RV32-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll
index 67c18b5eef736e..123e2243647953 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll
@@ -9,9 +9,9 @@
define void @vector_interleave_store_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b, ptr %p) {
; CHECK-LABEL: vector_interleave_store_v32i1_v16i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v0, v8, 2
-; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index bb4589a46bf41e..1ca34e9dfd1be3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -406,6 +406,7 @@ define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32ZVE32F-NEXT: sw zero, 12(a0)
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: andi a1, a1, 255
@@ -414,7 +415,6 @@ define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x
; RV32ZVE32F-NEXT: sw a2, 0(a0)
; RV32ZVE32F-NEXT: sw zero, 4(a0)
; RV32ZVE32F-NEXT: sw a1, 8(a0)
-; RV32ZVE32F-NEXT: sw zero, 12(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i64:
@@ -1097,18 +1097,18 @@ define <2 x i64> @mgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2
;
; RV32ZVE32F-LABEL: mgather_v2i16_zextload_v2i64:
; RV32ZVE32F: # %bb.0:
+; RV32ZVE32F-NEXT: lui a1, 16
; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV32ZVE32F-NEXT: vmv.x.s a1, v8
-; RV32ZVE32F-NEXT: lui a2, 16
-; RV32ZVE32F-NEXT: addi a2, a2, -1
-; RV32ZVE32F-NEXT: and a1, a1, a2
+; RV32ZVE32F-NEXT: vmv.x.s a2, v8
+; RV32ZVE32F-NEXT: addi a1, a1, -1
+; RV32ZVE32F-NEXT: and a2, a2, a1
; RV32ZVE32F-NEXT: vmv.x.s a3, v9
-; RV32ZVE32F-NEXT: and a2, a3, a2
-; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: and a1, a3, a1
+; RV32ZVE32F-NEXT: sw a1, 0(a0)
; RV32ZVE32F-NEXT: sw zero, 4(a0)
-; RV32ZVE32F-NEXT: sw a1, 8(a0)
+; RV32ZVE32F-NEXT: sw a2, 8(a0)
; RV32ZVE32F-NEXT: sw zero, 12(a0)
; RV32ZVE32F-NEXT: ret
;
@@ -2096,19 +2096,19 @@ define <2 x i64> @mgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2
;
; RV32ZVE32F-LABEL: mgather_v2i32_sextload_v2i64:
; RV32ZVE32F: # %bb.0:
+; RV32ZVE32F-NEXT: addi a1, a0, 8
; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV32ZVE32F-NEXT: vmv.x.s a1, v8
-; RV32ZVE32F-NEXT: srai a1, a1, 31
-; RV32ZVE32F-NEXT: vmv.x.s a2, v9
+; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: srai a2, a2, 31
+; RV32ZVE32F-NEXT: vmv.x.s a3, v9
+; RV32ZVE32F-NEXT: srai a3, a3, 31
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vse32.v v9, (a0)
-; RV32ZVE32F-NEXT: addi a3, a0, 8
-; RV32ZVE32F-NEXT: vse32.v v8, (a3)
-; RV32ZVE32F-NEXT: sw a2, 4(a0)
-; RV32ZVE32F-NEXT: sw a1, 12(a0)
+; RV32ZVE32F-NEXT: vse32.v v8, (a1)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
+; RV32ZVE32F-NEXT: sw a2, 12(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i32_sextload_v2i64:
@@ -2160,15 +2160,15 @@ define <2 x i64> @mgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2
;
; RV32ZVE32F-LABEL: mgather_v2i32_zextload_v2i64:
; RV32ZVE32F: # %bb.0:
+; RV32ZVE32F-NEXT: addi a1, a0, 8
; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
-; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: sw zero, 4(a0)
; RV32ZVE32F-NEXT: sw zero, 12(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vse32.v v9, (a0)
-; RV32ZVE32F-NEXT: addi a0, a0, 8
-; RV32ZVE32F-NEXT: vse32.v v8, (a0)
+; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
+; RV32ZVE32F-NEXT: vse32.v v8, (a1)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i32_zextload_v2i64:
@@ -6813,89 +6813,89 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
; RV64ZVE32F-LABEL: mgather_baseidx_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a6, v0
-; RV64ZVE32F-NEXT: andi a4, a6, 1
+; RV64ZVE32F-NEXT: vmv.x.s a7, v0
+; RV64ZVE32F-NEXT: andi a4, a7, 1
; RV64ZVE32F-NEXT: beqz a4, .LBB57_9
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a4, 0(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
-; RV64ZVE32F-NEXT: andi a5, a6, 2
+; RV64ZVE32F-NEXT: andi a5, a7, 2
; RV64ZVE32F-NEXT: bnez a5, .LBB57_10
; RV64ZVE32F-NEXT: .LBB57_2:
; RV64ZVE32F-NEXT: ld a5, 8(a3)
-; RV64ZVE32F-NEXT: andi a7, a6, 4
-; RV64ZVE32F-NEXT: bnez a7, .LBB57_11
+; RV64ZVE32F-NEXT: andi a6, a7, 4
+; RV64ZVE32F-NEXT: bnez a6, .LBB57_11
; RV64ZVE32F-NEXT: .LBB57_3:
-; RV64ZVE32F-NEXT: ld a7, 16(a3)
-; RV64ZVE32F-NEXT: andi t0, a6, 8
+; RV64ZVE32F-NEXT: ld a6, 16(a3)
+; RV64ZVE32F-NEXT: andi t0, a7, 8
; RV64ZVE32F-NEXT: bnez t0, .LBB57_12
; RV64ZVE32F-NEXT: .LBB57_4:
; RV64ZVE32F-NEXT: ld t0, 24(a3)
-; RV64ZVE32F-NEXT: andi t1, a6, 16
+; RV64ZVE32F-NEXT: andi t1, a7, 16
; RV64ZVE32F-NEXT: bnez t1, .LBB57_13
; RV64ZVE32F-NEXT: .LBB57_5:
; RV64ZVE32F-NEXT: ld t1, 32(a3)
-; RV64ZVE32F-NEXT: andi t2, a6, 32
+; RV64ZVE32F-NEXT: andi t2, a7, 32
; RV64ZVE32F-NEXT: bnez t2, .LBB57_14
; RV64ZVE32F-NEXT: .LBB57_6:
; RV64ZVE32F-NEXT: ld t2, 40(a3)
-; RV64ZVE32F-NEXT: andi t3, a6, 64
+; RV64ZVE32F-NEXT: andi t3, a7, 64
; RV64ZVE32F-NEXT: bnez t3, .LBB57_15
; RV64ZVE32F-NEXT: .LBB57_7:
; RV64ZVE32F-NEXT: ld t3, 48(a3)
-; RV64ZVE32F-NEXT: andi a6, a6, -128
-; RV64ZVE32F-NEXT: bnez a6, .LBB57_16
+; RV64ZVE32F-NEXT: andi a7, a7, -128
+; RV64ZVE32F-NEXT: bnez a7, .LBB57_16
; RV64ZVE32F-NEXT: .LBB57_8:
; RV64ZVE32F-NEXT: ld a1, 56(a3)
; RV64ZVE32F-NEXT: j .LBB57_17
; RV64ZVE32F-NEXT: .LBB57_9:
; RV64ZVE32F-NEXT: ld a4, 0(a3)
-; RV64ZVE32F-NEXT: andi a5, a6, 2
+; RV64ZVE32F-NEXT: andi a5, a7, 2
; RV64ZVE32F-NEXT: beqz a5, .LBB57_2
; RV64ZVE32F-NEXT: .LBB57_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a5, 8(a2)
; RV64ZVE32F-NEXT: slli a5, a5, 3
; RV64ZVE32F-NEXT: add a5, a1, a5
; RV64ZVE32F-NEXT: ld a5, 0(a5)
-; RV64ZVE32F-NEXT: andi a7, a6, 4
-; RV64ZVE32F-NEXT: beqz a7, .LBB57_3
+; RV64ZVE32F-NEXT: andi a6, a7, 4
+; RV64ZVE32F-NEXT: beqz a6, .LBB57_3
; RV64ZVE32F-NEXT: .LBB57_11: # %cond.load4
-; RV64ZVE32F-NEXT: ld a7, 16(a2)
-; RV64ZVE32F-NEXT: slli a7, a7, 3
-; RV64ZVE32F-NEXT: add a7, a1, a7
-; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: andi t0, a6, 8
+; RV64ZVE32F-NEXT: ld a6, 16(a2)
+; RV64ZVE32F-NEXT: slli a6, a6, 3
+; RV64ZVE32F-NEXT: add a6, a1, a6
+; RV64ZVE32F-NEXT: ld a6, 0(a6)
+; RV64ZVE32F-NEXT: andi t0, a7, 8
; RV64ZVE32F-NEXT: beqz t0, .LBB57_4
; RV64ZVE32F-NEXT: .LBB57_12: # %cond.load7
; RV64ZVE32F-NEXT: ld t0, 24(a2)
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: andi t1, a6, 16
+; RV64ZVE32F-NEXT: andi t1, a7, 16
; RV64ZVE32F-NEXT: beqz t1, .LBB57_5
; RV64ZVE32F-NEXT: .LBB57_13: # %cond.load10
; RV64ZVE32F-NEXT: ld t1, 32(a2)
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
-; RV64ZVE32F-NEXT: andi t2, a6, 32
+; RV64ZVE32F-NEXT: andi t2, a7, 32
; RV64ZVE32F-NEXT: beqz t2, .LBB57_6
; RV64ZVE32F-NEXT: .LBB57_14: # %cond.load13
; RV64ZVE32F-NEXT: ld t2, 40(a2)
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
-; RV64ZVE32F-NEXT: andi t3, a6, 64
+; RV64ZVE32F-NEXT: andi t3, a7, 64
; RV64ZVE32F-NEXT: beqz t3, .LBB57_7
; RV64ZVE32F-NEXT: .LBB57_15: # %cond.load16
; RV64ZVE32F-NEXT: ld t3, 48(a2)
; RV64ZVE32F-NEXT: slli t3, t3, 3
; RV64ZVE32F-NEXT: add t3, a1, t3
; RV64ZVE32F-NEXT: ld t3, 0(t3)
-; RV64ZVE32F-NEXT: andi a6, a6, -128
-; RV64ZVE32F-NEXT: beqz a6, .LBB57_8
+; RV64ZVE32F-NEXT: andi a7, a7, -128
+; RV64ZVE32F-NEXT: beqz a7, .LBB57_8
; RV64ZVE32F-NEXT: .LBB57_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a2, 56(a2)
; RV64ZVE32F-NEXT: slli a2, a2, 3
@@ -6904,7 +6904,7 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
; RV64ZVE32F-NEXT: .LBB57_17: # %else20
; RV64ZVE32F-NEXT: sd a4, 0(a0)
; RV64ZVE32F-NEXT: sd a5, 8(a0)
-; RV64ZVE32F-NEXT: sd a7, 16(a0)
+; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd t0, 24(a0)
; RV64ZVE32F-NEXT: sd t1, 32(a0)
; RV64ZVE32F-NEXT: sd t2, 40(a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll
index 3fab9ce6367868..c47b79a2df92c5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll
@@ -135,9 +135,9 @@ declare <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_v16f16:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
-; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
@@ -393,9 +393,9 @@ declare <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_nearbyint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_v4f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1)
-; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
@@ -437,9 +437,9 @@ declare <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_nearbyint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_v8f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1)
-; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
@@ -481,9 +481,9 @@ declare <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_nearbyint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_v15f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -525,9 +525,9 @@ declare <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_nearbyint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_v16f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -578,9 +578,9 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2)
-; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
index 487234674befe0..4b30725f973c7b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
@@ -2459,9 +2459,9 @@ define double @vreduce_fminimum_v64f64_nonans(ptr %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: addi a1, a0, 384
+; CHECK-NEXT: vle64.v v16, (a1)
; CHECK-NEXT: addi a1, a0, 256
-; CHECK-NEXT: addi a2, a0, 384
-; CHECK-NEXT: vle64.v v16, (a2)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: vle64.v v0, (a1)
@@ -3217,9 +3217,9 @@ define double @vreduce_fmaximum_v64f64_nonans(ptr %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: addi a1, a0, 384
+; CHECK-NEXT: vle64.v v16, (a1)
; CHECK-NEXT: addi a1, a0, 256
-; CHECK-NEXT: addi a2, a0, 384
-; CHECK-NEXT: vle64.v v16, (a2)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: vle64.v v0, (a1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
index 0a3c4874c5e8ba..71dc75dcc96c2a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
@@ -1789,24 +1789,24 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m,
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
-; RV32-NEXT: li a3, 32
-; RV32-NEXT: lui a2, %hi(.LCPI72_0)
-; RV32-NEXT: addi a2, a2, %lo(.LCPI72_0)
-; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; RV32-NEXT: vle8.v v12, (a2)
; RV32-NEXT: mv a2, a0
+; RV32-NEXT: li a0, 32
+; RV32-NEXT: lui a3, %hi(.LCPI72_0)
+; RV32-NEXT: addi a3, a3, %lo(.LCPI72_0)
+; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT: vle8.v v12, (a3)
; RV32-NEXT: vid.v v16
; RV32-NEXT: vmsltu.vx v14, v16, a1
; RV32-NEXT: vsext.vf4 v16, v12
; RV32-NEXT: vmsltu.vx v12, v16, a1
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: vslideup.vi v14, v12, 4
-; RV32-NEXT: li a0, 64
-; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; RV32-NEXT: li a1, 64
+; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; RV32-NEXT: vmand.mm v0, v14, v0
; RV32-NEXT: vmv.v.i v12, 1
; RV32-NEXT: vmerge.vvm v8, v12, v8, v0
-; RV32-NEXT: vslidedown.vx v12, v8, a3
+; RV32-NEXT: vslidedown.vx v12, v8, a0
; RV32-NEXT: vmul.vv v8, v8, v12
; RV32-NEXT: vslidedown.vi v12, v8, 16
; RV32-NEXT: vmul.vv v8, v8, v12
@@ -1835,24 +1835,24 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m,
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
-; RV64-NEXT: li a3, 32
-; RV64-NEXT: lui a2, %hi(.LCPI72_0)
-; RV64-NEXT: addi a2, a2, %lo(.LCPI72_0)
-; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; RV64-NEXT: vle8.v v12, (a2)
; RV64-NEXT: mv a2, a0
+; RV64-NEXT: li a0, 32
+; RV64-NEXT: lui a3, %hi(.LCPI72_0)
+; RV64-NEXT: addi a3, a3, %lo(.LCPI72_0)
+; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; RV64-NEXT: vle8.v v12, (a3)
; RV64-NEXT: vid.v v16
; RV64-NEXT: vmsltu.vx v14, v16, a1
; RV64-NEXT: vsext.vf4 v16, v12
; RV64-NEXT: vmsltu.vx v12, v16, a1
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: vslideup.vi v14, v12, 4
-; RV64-NEXT: li a0, 64
-; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; RV64-NEXT: li a1, 64
+; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; RV64-NEXT: vmand.mm v0, v14, v0
; RV64-NEXT: vmv.v.i v12, 1
; RV64-NEXT: vmerge.vvm v8, v12, v8, v0
-; RV64-NEXT: vslidedown.vx v12, v8, a3
+; RV64-NEXT: vslidedown.vx v12, v8, a0
; RV64-NEXT: vmul.vv v8, v8, v12
; RV64-NEXT: vslidedown.vi v12, v8, 16
; RV64-NEXT: vmul.vv v8, v8, v12
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
index a4a104abd2ef88..9f674ea6dfd607 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
@@ -2324,9 +2324,9 @@ define i64 @vreduce_and_v64i64(ptr %x) nounwind {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: addi a1, a0, 384
+; RV64-NEXT: vle64.v v16, (a1)
; RV64-NEXT: addi a1, a0, 256
-; RV64-NEXT: addi a2, a0, 384
-; RV64-NEXT: vle64.v v16, (a2)
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle64.v v24, (a0)
; RV64-NEXT: vle64.v v0, (a1)
@@ -2925,9 +2925,9 @@ define i64 @vreduce_or_v64i64(ptr %x) nounwind {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: addi a1, a0, 384
+; RV64-NEXT: vle64.v v16, (a1)
; RV64-NEXT: addi a1, a0, 256
-; RV64-NEXT: addi a2, a0, 384
-; RV64-NEXT: vle64.v v16, (a2)
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle64.v v24, (a0)
; RV64-NEXT: vle64.v v0, (a1)
@@ -4161,9 +4161,9 @@ define i64 @vreduce_smin_v64i64(ptr %x) nounwind {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: addi a1, a0, 384
+; RV64-NEXT: vle64.v v16, (a1)
; RV64-NEXT: addi a1, a0, 256
-; RV64-NEXT: addi a2, a0, 384
-; RV64-NEXT: vle64.v v16, (a2)
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle64.v v24, (a0)
; RV64-NEXT: vle64.v v0, (a1)
@@ -4763,9 +4763,9 @@ define i64 @vreduce_smax_v64i64(ptr %x) nounwind {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: addi a1, a0, 384
+; RV64-NEXT: vle64.v v16, (a1)
; RV64-NEXT: addi a1, a0, 256
-; RV64-NEXT: addi a2, a0, 384
-; RV64-NEXT: vle64.v v16, (a2)
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle64.v v24, (a0)
; RV64-NEXT: vle64.v v0, (a1)
@@ -5365,9 +5365,9 @@ define i64 @vreduce_umin_v64i64(ptr %x) nounwind {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: addi a1, a0, 384
+; RV64-NEXT: vle64.v v16, (a1)
; RV64-NEXT: addi a1, a0, 256
-; RV64-NEXT: addi a2, a0, 384
-; RV64-NEXT: vle64.v v16, (a2)
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle64.v v24, (a0)
; RV64-NEXT: vle64.v v0, (a1)
@@ -5966,9 +5966,9 @@ define i64 @vreduce_umax_v64i64(ptr %x) nounwind {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: addi a1, a0, 384
+; RV64-NEXT: vle64.v v16, (a1)
; RV64-NEXT: addi a1, a0, 256
-; RV64-NEXT: addi a2, a0, 384
-; RV64-NEXT: vle64.v v16, (a2)
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle64.v v24, (a0)
; RV64-NEXT: vle64.v v0, (a1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll
index dc0f4e74305550..ad358d73202402 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll
@@ -231,16 +231,18 @@ declare i1 @llvm.vp.reduce.and.v256i1(i1, <256 x i1>, <256 x i1>, i32)
define zeroext i1 @vpreduce_and_v256i1(i1 zeroext %s, <256 x i1> %v, <256 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpreduce_and_v256i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v11, v9
+; CHECK-NEXT: vmv1r.v v9, v0
; CHECK-NEXT: li a3, 128
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: bltu a1, a3, .LBB14_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: .LBB14_2:
+; CHECK-NEXT: vmv1r.v v0, v11
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
-; CHECK-NEXT: vmnot.m v11, v0
-; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vcpop.m a2, v11, v0.t
+; CHECK-NEXT: vmnot.m v9, v9
+; CHECK-NEXT: vcpop.m a2, v9, v0.t
; CHECK-NEXT: seqz a2, a2
; CHECK-NEXT: and a0, a2, a0
; CHECK-NEXT: addi a2, a1, -128
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll
index b27492b43cfbbd..483fad54203f99 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll
@@ -123,9 +123,9 @@ declare <16 x half> @llvm.vp.rint.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_rint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v16f16:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
-; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
@@ -357,9 +357,9 @@ declare <4 x double> @llvm.vp.rint.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_rint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v4f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1)
-; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
@@ -397,9 +397,9 @@ declare <8 x double> @llvm.vp.rint.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_rint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v8f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1)
-; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
@@ -437,9 +437,9 @@ declare <15 x double> @llvm.vp.rint.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_rint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v15f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -477,9 +477,9 @@ declare <16 x double> @llvm.vp.rint.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_rint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v16f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -532,9 +532,9 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2)
-; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: addi a1, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll
index 315fe257626fd8..a2dc0181c56c4d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll
@@ -261,9 +261,9 @@ declare <16 x half> @llvm.vp.round.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_round_v16f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
-; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
@@ -561,9 +561,9 @@ declare <4 x double> @llvm.vp.round.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_round_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v4f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1)
-; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
@@ -605,9 +605,9 @@ declare <8 x double> @llvm.vp.round.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_round_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v8f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1)
-; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
@@ -649,9 +649,9 @@ declare <15 x double> @llvm.vp.round.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v15f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -693,9 +693,9 @@ declare <16 x double> @llvm.vp.round.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v16f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -752,9 +752,9 @@ define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2)
-; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll
index b510532408cb8c..83c4723d00cb76 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll
@@ -261,9 +261,9 @@ declare <16 x half> @llvm.vp.roundeven.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundeven_v16f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
-; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
@@ -561,9 +561,9 @@ declare <4 x double> @llvm.vp.roundeven.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_roundeven_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v4f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1)
-; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
@@ -605,9 +605,9 @@ declare <8 x double> @llvm.vp.roundeven.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_roundeven_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v8f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1)
-; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
@@ -649,9 +649,9 @@ declare <15 x double> @llvm.vp.roundeven.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v15f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -693,9 +693,9 @@ declare <16 x double> @llvm.vp.roundeven.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v16f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -752,9 +752,9 @@ define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 z
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2)
-; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll
index d0a41a2bb968ca..35d1e59a5a3798 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll
@@ -261,9 +261,9 @@ declare <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundtozero_v16f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI6_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a1)
-; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
@@ -561,9 +561,9 @@ declare <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_v4f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a1)
-; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
@@ -605,9 +605,9 @@ declare <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_v8f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a1)
-; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
@@ -649,9 +649,9 @@ declare <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_v15f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -693,9 +693,9 @@ declare <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_v16f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -752,9 +752,9 @@ define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2)
-; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
index 5c8be062649f51..62ef8cbdb19163 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll
@@ -1066,38 +1066,38 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128
; ZVFH-NEXT: slli a1, a1, 4
; ZVFH-NEXT: sub sp, sp, a1
; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; ZVFH-NEXT: addi a1, a0, 128
-; ZVFH-NEXT: li a3, 64
-; ZVFH-NEXT: vsetvli zero, a3, e16, m8, ta, ma
-; ZVFH-NEXT: vle16.v v24, (a1)
; ZVFH-NEXT: csrr a1, vlenb
; ZVFH-NEXT: slli a1, a1, 3
; ZVFH-NEXT: add a1, sp, a1
; ZVFH-NEXT: addi a1, a1, 16
-; ZVFH-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; ZVFH-NEXT: vle16.v v24, (a0)
-; ZVFH-NEXT: addi a0, sp, 16
-; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFH-NEXT: vsetivli zero, 8, e8, m1, ta, ma
-; ZVFH-NEXT: vslidedown.vi v6, v0, 8
+; ZVFH-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFH-NEXT: addi a1, a0, 128
+; ZVFH-NEXT: li a3, 64
+; ZVFH-NEXT: vsetvli zero, a3, e16, m8, ta, ma
+; ZVFH-NEXT: vle16.v v16, (a1)
+; ZVFH-NEXT: addi a1, sp, 16
+; ZVFH-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFH-NEXT: vle16.v v16, (a0)
; ZVFH-NEXT: mv a0, a2
+; ZVFH-NEXT: vsetivli zero, 8, e8, m1, ta, ma
+; ZVFH-NEXT: vslidedown.vi v24, v0, 8
; ZVFH-NEXT: bltu a2, a3, .LBB43_2
; ZVFH-NEXT: # %bb.1:
; ZVFH-NEXT: li a0, 64
; ZVFH-NEXT: .LBB43_2:
-; ZVFH-NEXT: addi a1, sp, 16
-; ZVFH-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; ZVFH-NEXT: vmfeq.vv v7, v8, v24, v0.t
+; ZVFH-NEXT: vmfeq.vv v7, v8, v16, v0.t
; ZVFH-NEXT: addi a0, a2, -64
; ZVFH-NEXT: sltu a1, a2, a0
; ZVFH-NEXT: addi a1, a1, -1
; ZVFH-NEXT: and a0, a1, a0
-; ZVFH-NEXT: vmv1r.v v0, v6
+; ZVFH-NEXT: vmv1r.v v0, v24
; ZVFH-NEXT: csrr a1, vlenb
; ZVFH-NEXT: slli a1, a1, 3
; ZVFH-NEXT: add a1, sp, a1
; ZVFH-NEXT: addi a1, a1, 16
+; ZVFH-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; ZVFH-NEXT: addi a1, sp, 16
; ZVFH-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vmfeq.vv v8, v16, v24, v0.t
@@ -3435,38 +3435,38 @@ define <32 x i1> @fcmp_oeq_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v24, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a1, a0, 128
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v16, (a1)
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vle64.v v16, (a0)
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v6, v0, 2
; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: bltu a2, a1, .LBB87_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB87_2:
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v7, v8, v24, v0.t
+; CHECK-NEXT: vmfeq.vv v7, v8, v16, v0.t
; CHECK-NEXT: addi a0, a2, -16
; CHECK-NEXT: sltu a1, a2, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
-; CHECK-NEXT: vmv1r.v v0, v6
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v8, v16, v24, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll
index 4afa75e87c8f86..c4ef8e059a5860 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll
@@ -1253,38 +1253,38 @@ define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: li a3, 32
-; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vle32.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vle32.v v24, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v6, v0, 4
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a1, a0, 128
+; CHECK-NEXT: li a3, 32
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vle32.v v16, (a1)
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vle32.v v16, (a0)
; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; CHECK-NEXT: vslidedown.vi v24, v0, 4
; CHECK-NEXT: bltu a2, a3, .LBB99_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: .LBB99_2:
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vmseq.vv v7, v8, v24, v0.t
+; CHECK-NEXT: vmseq.vv v7, v8, v16, v0.t
; CHECK-NEXT: addi a0, a2, -32
; CHECK-NEXT: sltu a1, a2, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
-; CHECK-NEXT: vmv1r.v v0, v6
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vmseq.vv v8, v16, v24, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll
index 925366e8b1d500..e4ea64c0bf955d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll
@@ -23,11 +23,12 @@ define <8 x i32> @concat_2xv4i32(<4 x i32> %a, <4 x i32> %b) {
define <8 x i32> @concat_4xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) {
; VLA-LABEL: concat_4xv2i32:
; VLA: # %bb.0:
+; VLA-NEXT: vmv1r.v v12, v10
; VLA-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; VLA-NEXT: vslideup.vi v10, v11, 2
+; VLA-NEXT: vslideup.vi v12, v11, 2
; VLA-NEXT: vslideup.vi v8, v9, 2
; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; VLA-NEXT: vslideup.vi v8, v10, 4
+; VLA-NEXT: vslideup.vi v8, v12, 4
; VLA-NEXT: ret
;
; VLS-LABEL: concat_4xv2i32:
@@ -48,26 +49,27 @@ define <8 x i32> @concat_4xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x
define <8 x i32> @concat_8xv1i32(<1 x i32> %a, <1 x i32> %b, <1 x i32> %c, <1 x i32> %d, <1 x i32> %e, <1 x i32> %f, <1 x i32> %g, <1 x i32> %h) {
; VLA-LABEL: concat_8xv1i32:
; VLA: # %bb.0:
+; VLA-NEXT: vmv1r.v v16, v12
; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; VLA-NEXT: vslideup.vi v14, v15, 1
-; VLA-NEXT: vslideup.vi v12, v13, 1
+; VLA-NEXT: vslideup.vi v16, v13, 1
; VLA-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; VLA-NEXT: vslideup.vi v12, v14, 2
+; VLA-NEXT: vslideup.vi v16, v14, 2
; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; VLA-NEXT: vslideup.vi v10, v11, 1
; VLA-NEXT: vslideup.vi v8, v9, 1
; VLA-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; VLA-NEXT: vslideup.vi v8, v10, 2
; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; VLA-NEXT: vslideup.vi v8, v12, 4
+; VLA-NEXT: vslideup.vi v8, v16, 4
; VLA-NEXT: ret
;
; VLS-LABEL: concat_8xv1i32:
; VLS: # %bb.0:
; VLS-NEXT: vmv1r.v v17, v12
+; VLS-NEXT: vmv1r.v v16, v8
; VLS-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; VLS-NEXT: vslideup.vi v14, v15, 1
-; VLS-NEXT: vmv1r.v v16, v8
; VLS-NEXT: vslideup.vi v17, v13, 1
; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; VLS-NEXT: vslideup.vi v17, v14, 2
@@ -128,11 +130,12 @@ define <16 x i32> @concat_4xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
define <16 x i32> @concat_8xv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d, <2 x i32> %e, <2 x i32> %f, <2 x i32> %g, <2 x i32> %h) {
; VLA-LABEL: concat_8xv2i32:
; VLA: # %bb.0:
+; VLA-NEXT: vmv1r.v v16, v14
; VLA-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; VLA-NEXT: vslideup.vi v14, v15, 2
+; VLA-NEXT: vslideup.vi v16, v15, 2
; VLA-NEXT: vslideup.vi v12, v13, 2
; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; VLA-NEXT: vslideup.vi v12, v14, 4
+; VLA-NEXT: vslideup.vi v12, v16, 4
; VLA-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; VLA-NEXT: vslideup.vi v10, v11, 2
; VLA-NEXT: vslideup.vi v8, v9, 2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll
index f5c45ba9ea5817..b70aff413aec53 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll
@@ -1058,22 +1058,22 @@ define <16 x i16> @reverse_v16i16_2(<8 x i16> %a, <8 x i16> %b) {
define <32 x i16> @reverse_v32i16_2(<16 x i16> %a, <16 x i16> %b) {
; CHECK-LABEL: reverse_v32i16_2:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v12, v10
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a1, a0, 1
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; CHECK-NEXT: vid.v v12
-; CHECK-NEXT: vrsub.vx v12, v12, a1
-; CHECK-NEXT: vrgather.vv v19, v8, v12
-; CHECK-NEXT: vrgather.vv v18, v9, v12
-; CHECK-NEXT: vrgather.vv v16, v8, v12
-; CHECK-NEXT: vmv2r.v v12, v10
-; CHECK-NEXT: vmv.v.v v17, v16
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v14, v10, a1
+; CHECK-NEXT: vrgather.vv v11, v8, v14
+; CHECK-NEXT: vrgather.vv v8, v10, v14
+; CHECK-NEXT: vrgather.vv v10, v9, v14
+; CHECK-NEXT: vmv.v.v v9, v8
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: addi a0, a0, -32
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
; CHECK-NEXT: vid.v v16
; CHECK-NEXT: vrsub.vi v16, v16, 15
; CHECK-NEXT: lui a0, 16
@@ -1141,16 +1141,16 @@ define <8 x i32> @reverse_v8i32_2(<4 x i32> %a, <4 x i32> %b) {
define <16 x i32> @reverse_v16i32_2(<8 x i32> %a, <8 x i32> %b) {
; CHECK-LABEL: reverse_v16i32_2:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v12, v10
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; CHECK-NEXT: vid.v v12
-; CHECK-NEXT: vrsub.vx v12, v12, a1
-; CHECK-NEXT: vrgather.vv v19, v8, v12
-; CHECK-NEXT: vrgather.vv v18, v9, v12
-; CHECK-NEXT: vrgather.vv v16, v8, v12
-; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v10, v10, a1
+; CHECK-NEXT: vrgather.vv v19, v8, v10
+; CHECK-NEXT: vrgather.vv v16, v8, v10
+; CHECK-NEXT: vrgather.vv v18, v9, v10
; CHECK-NEXT: vmv.v.v v17, v16
; CHECK-NEXT: addi a0, a0, -16
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
@@ -1170,33 +1170,33 @@ define <16 x i32> @reverse_v16i32_2(<8 x i32> %a, <8 x i32> %b) {
define <32 x i32> @reverse_v32i32_2(<16 x i32> %a, <16 x i32> %b) {
; CHECK-LABEL: reverse_v32i32_2:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv4r.v v16, v12
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; CHECK-NEXT: vid.v v16
-; CHECK-NEXT: vrsub.vx v17, v16, a1
-; CHECK-NEXT: vrgather.vv v23, v8, v17
-; CHECK-NEXT: vrgather.vv v22, v9, v17
-; CHECK-NEXT: vrgather.vv v21, v10, v17
-; CHECK-NEXT: vrgather.vv v20, v11, v17
-; CHECK-NEXT: vrgather.vv v16, v8, v17
-; CHECK-NEXT: vmv.v.v v17, v16
-; CHECK-NEXT: vmv4r.v v24, v12
-; CHECK-NEXT: vmv2r.v v18, v16
+; CHECK-NEXT: vid.v v12
+; CHECK-NEXT: vrsub.vx v20, v12, a1
+; CHECK-NEXT: vrgather.vv v15, v8, v20
+; CHECK-NEXT: vrgather.vv v14, v9, v20
+; CHECK-NEXT: vrgather.vv v13, v10, v20
+; CHECK-NEXT: vrgather.vv v8, v9, v20
+; CHECK-NEXT: vrgather.vv v12, v11, v20
+; CHECK-NEXT: vmv.v.v v9, v8
+; CHECK-NEXT: vmv2r.v v10, v8
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: addi a0, a0, -32
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; CHECK-NEXT: vid.v v16
-; CHECK-NEXT: vrsub.vi v16, v16, 15
+; CHECK-NEXT: vid.v v20
+; CHECK-NEXT: vrsub.vi v24, v20, 15
; CHECK-NEXT: lui a0, 16
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
; CHECK-NEXT: vmv.s.x v0, a0
-; CHECK-NEXT: vrgatherei16.vv v8, v24, v16, v0.t
+; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t
; CHECK-NEXT: ret
%res = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <32 x i32> %res
@@ -1220,16 +1220,16 @@ define <4 x i64> @reverse_v4i64_2(<2 x i64> %a, < 2 x i64> %b) {
define <8 x i64> @reverse_v8i64_2(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: reverse_v8i64_2:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v12, v10
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a1, a0, 3
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; CHECK-NEXT: vid.v v12
-; CHECK-NEXT: vrsub.vx v12, v12, a1
-; CHECK-NEXT: vrgather.vv v19, v8, v12
-; CHECK-NEXT: vrgather.vv v18, v9, v12
-; CHECK-NEXT: vrgather.vv v16, v8, v12
-; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v10, v10, a1
+; CHECK-NEXT: vrgather.vv v19, v8, v10
+; CHECK-NEXT: vrgather.vv v16, v8, v10
+; CHECK-NEXT: vrgather.vv v18, v9, v10
; CHECK-NEXT: vmv.v.v v17, v16
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: addi a0, a0, -8
@@ -1319,16 +1319,16 @@ define <32 x half> @reverse_v32f16_2(<16 x half> %a) {
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
; CHECK-NEXT: vid.v v10
-; CHECK-NEXT: vrsub.vx v10, v10, a1
-; CHECK-NEXT: vrgather.vv v15, v8, v10
-; CHECK-NEXT: vrgather.vv v14, v9, v10
-; CHECK-NEXT: vrgather.vv v12, v8, v10
-; CHECK-NEXT: vmv.v.v v13, v12
+; CHECK-NEXT: vrsub.vx v12, v10, a1
+; CHECK-NEXT: vrgather.vv v11, v8, v12
+; CHECK-NEXT: vrgather.vv v8, v10, v12
+; CHECK-NEXT: vrgather.vv v10, v9, v12
+; CHECK-NEXT: vmv.v.v v9, v8
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: addi a0, a0, -32
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v12, a0
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
; CHECK-NEXT: ret
%res = shufflevector <16 x half> %a, <16 x half> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <32 x half> %res
@@ -1388,16 +1388,16 @@ define <8 x float> @reverse_v8f32_2(<4 x float> %a, <4 x float> %b) {
define <16 x float> @reverse_v16f32_2(<8 x float> %a, <8 x float> %b) {
; CHECK-LABEL: reverse_v16f32_2:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v12, v10
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a1, a0, 2
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, ma
-; CHECK-NEXT: vid.v v12
-; CHECK-NEXT: vrsub.vx v12, v12, a1
-; CHECK-NEXT: vrgather.vv v19, v8, v12
-; CHECK-NEXT: vrgather.vv v18, v9, v12
-; CHECK-NEXT: vrgather.vv v16, v8, v12
-; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v10, v10, a1
+; CHECK-NEXT: vrgather.vv v19, v8, v10
+; CHECK-NEXT: vrgather.vv v16, v8, v10
+; CHECK-NEXT: vrgather.vv v18, v9, v10
; CHECK-NEXT: vmv.v.v v17, v16
; CHECK-NEXT: addi a0, a0, -16
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
@@ -1432,16 +1432,16 @@ define <4 x double> @reverse_v4f64_2(<2 x double> %a, < 2 x double> %b) {
define <8 x double> @reverse_v8f64_2(<4 x double> %a, <4 x double> %b) {
; CHECK-LABEL: reverse_v8f64_2:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv2r.v v12, v10
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a1, a0, 3
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; CHECK-NEXT: vid.v v12
-; CHECK-NEXT: vrsub.vx v12, v12, a1
-; CHECK-NEXT: vrgather.vv v19, v8, v12
-; CHECK-NEXT: vrgather.vv v18, v9, v12
-; CHECK-NEXT: vrgather.vv v16, v8, v12
-; CHECK-NEXT: vmv2r.v v12, v10
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v10, v10, a1
+; CHECK-NEXT: vrgather.vv v19, v8, v10
+; CHECK-NEXT: vrgather.vv v16, v8, v10
+; CHECK-NEXT: vrgather.vv v18, v9, v10
; CHECK-NEXT: vmv.v.v v17, v16
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: addi a0, a0, -8
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
index ed72883e9d0523..ddde1e94abbde9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
@@ -74,9 +74,9 @@ define void @widen_4xv4i16_unaligned(ptr %x, ptr %z) {
; CHECK-NO-MISALIGN: # %bb.0:
; CHECK-NO-MISALIGN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NO-MISALIGN-NEXT: vle8.v v8, (a0)
+; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 16
+; CHECK-NO-MISALIGN-NEXT: vle8.v v10, (a2)
; CHECK-NO-MISALIGN-NEXT: addi a2, a0, 8
-; CHECK-NO-MISALIGN-NEXT: addi a3, a0, 16
-; CHECK-NO-MISALIGN-NEXT: vle8.v v10, (a3)
; CHECK-NO-MISALIGN-NEXT: addi a0, a0, 24
; CHECK-NO-MISALIGN-NEXT: vle8.v v9, (a0)
; CHECK-NO-MISALIGN-NEXT: vle8.v v11, (a2)
@@ -185,9 +185,9 @@ define void @strided_constant_mismatch_4xv4i16(ptr %x, ptr %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: addi a2, a0, 6
+; CHECK-NEXT: vle16.v v10, (a2)
; CHECK-NEXT: addi a2, a0, 2
-; CHECK-NEXT: addi a3, a0, 6
-; CHECK-NEXT: vle16.v v10, (a3)
; CHECK-NEXT: addi a0, a0, 8
; CHECK-NEXT: vle16.v v9, (a0)
; CHECK-NEXT: vle16.v v11, (a2)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
index 47efa058df641a..98c0020e500046 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
@@ -542,8 +542,8 @@ declare <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0.i32(ptr, i32, <
define <32 x double> @strided_vpload_v32f64(ptr %ptr, i32 signext %stride, <32 x i1> %m, i32 zeroext %evl) nounwind {
; CHECK-LABEL: strided_vpload_v32f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a4, 16
; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: li a4, 16
; CHECK-NEXT: mv a3, a2
; CHECK-NEXT: bltu a2, a4, .LBB45_2
; CHECK-NEXT: # %bb.1:
@@ -598,8 +598,8 @@ declare <32 x double> @llvm.experimental.vp.strided.load.v32f64.p0.i32(ptr, i32,
define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask, i32 zeroext %evl) {
; CHECK-RV32-LABEL: strided_load_v33f64:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: li a5, 32
; CHECK-RV32-NEXT: vmv1r.v v8, v0
+; CHECK-RV32-NEXT: li a5, 32
; CHECK-RV32-NEXT: mv a3, a4
; CHECK-RV32-NEXT: bltu a4, a5, .LBB47_2
; CHECK-RV32-NEXT: # %bb.1:
@@ -648,8 +648,8 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
;
; CHECK-RV64-LABEL: strided_load_v33f64:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: li a5, 32
; CHECK-RV64-NEXT: vmv1r.v v8, v0
+; CHECK-RV64-NEXT: li a5, 32
; CHECK-RV64-NEXT: mv a4, a3
; CHECK-RV64-NEXT: bltu a3, a5, .LBB47_2
; CHECK-RV64-NEXT: # %bb.1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
index ad55f276a74c8a..0e950c054c924e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
@@ -227,9 +227,10 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 6
+; CHECK-NEXT: li a3, 72
+; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: sub sp, sp, a2
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 64 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 72 * vlenb
; CHECK-NEXT: vmv1r.v v7, v0
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 5
@@ -282,23 +283,22 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a3, 16
; CHECK-NEXT: .LBB16_2:
+; CHECK-NEXT: vmv1r.v v0, v27
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v16, (a5)
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: li a6, 48
+; CHECK-NEXT: li a6, 56
; CHECK-NEXT: mul a5, a5, a6
; CHECK-NEXT: add a5, sp, a5
; CHECK-NEXT: addi a5, a5, 16
; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v28, v26, 2
+; CHECK-NEXT: vslidedown.vi v27, v26, 2
; CHECK-NEXT: li a5, 64
-; CHECK-NEXT: vmv1r.v v0, v27
; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma
; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: li a6, 56
-; CHECK-NEXT: mul a3, a3, a6
+; CHECK-NEXT: slli a3, a3, 6
; CHECK-NEXT: add a3, sp, a3
; CHECK-NEXT: addi a3, a3, 16
; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
@@ -307,12 +307,14 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a6, 64
; CHECK-NEXT: .LBB16_4:
+; CHECK-NEXT: vmv1r.v v0, v27
; CHECK-NEXT: addi a5, a1, 384
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v8, (a1)
; CHECK-NEXT: csrr t0, vlenb
-; CHECK-NEXT: slli t0, t0, 3
+; CHECK-NEXT: li t1, 48
+; CHECK-NEXT: mul t0, t0, t1
; CHECK-NEXT: add t0, sp, t0
; CHECK-NEXT: addi t0, t0, 16
; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill
@@ -324,9 +326,8 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: sltu t1, a6, t0
; CHECK-NEXT: addi t1, t1, -1
; CHECK-NEXT: and t0, t1, t0
-; CHECK-NEXT: vmv1r.v v0, v28
; CHECK-NEXT: csrr t1, vlenb
-; CHECK-NEXT: li t2, 48
+; CHECK-NEXT: li t2, 56
; CHECK-NEXT: mul t1, t1, t2
; CHECK-NEXT: add t1, sp, t1
; CHECK-NEXT: addi t1, t1, 16
@@ -342,27 +343,28 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: li a6, 16
; CHECK-NEXT: .LBB16_6:
+; CHECK-NEXT: vmv1r.v v0, v26
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v8, (a5)
; CHECK-NEXT: addi a5, sp, 16
; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a1, 256
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v27, v25, 2
-; CHECK-NEXT: vmv1r.v v0, v26
+; CHECK-NEXT: vslidedown.vi v26, v25, 2
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: slli a5, a5, 3
+; CHECK-NEXT: li t0, 48
+; CHECK-NEXT: mul a5, a5, t0
; CHECK-NEXT: add a5, sp, a5
; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: li a6, 48
+; CHECK-NEXT: li a6, 56
; CHECK-NEXT: mul a5, a5, a6
; CHECK-NEXT: add a5, sp, a5
; CHECK-NEXT: addi a5, a5, 16
-; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
; CHECK-NEXT: mv a5, a4
; CHECK-NEXT: bltu a4, a3, .LBB16_8
; CHECK-NEXT: # %bb.7:
@@ -379,84 +381,92 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: sltu a5, a5, a1
; CHECK-NEXT: addi a5, a5, -1
; CHECK-NEXT: and a1, a5, a1
-; CHECK-NEXT: vmv1r.v v0, v27
+; CHECK-NEXT: vmv1r.v v0, v26
; CHECK-NEXT: addi a5, sp, 16
-; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
+; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
; CHECK-NEXT: bltu a4, a2, .LBB16_10
; CHECK-NEXT: # %bb.9:
; CHECK-NEXT: li a4, 16
; CHECK-NEXT: .LBB16_10:
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v6, v7, 2
; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v12, v7, 2
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t
-; CHECK-NEXT: vmv.v.v v0, v8
+; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: li a4, 48
+; CHECK-NEXT: mul a1, a1, a4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: mv a1, a7
; CHECK-NEXT: bltu a7, a3, .LBB16_12
; CHECK-NEXT: # %bb.11:
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: .LBB16_12:
+; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: csrr a4, vlenb
; CHECK-NEXT: li a5, 24
; CHECK-NEXT: mul a4, a4, a5
; CHECK-NEXT: add a4, sp, a4
; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
-; CHECK-NEXT: vmv4r.v v24, v8
+; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
+; CHECK-NEXT: vmv4r.v v24, v16
; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: li a5, 56
-; CHECK-NEXT: mul a4, a4, a5
+; CHECK-NEXT: slli a4, a4, 6
; CHECK-NEXT: add a4, sp, a4
; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v24, 16
+; CHECK-NEXT: vslideup.vi v16, v24, 16
; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: li a5, 56
-; CHECK-NEXT: mul a4, a4, a5
+; CHECK-NEXT: slli a4, a4, 6
; CHECK-NEXT: add a4, sp, a4
; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a4, vlenb
; CHECK-NEXT: slli a4, a4, 4
; CHECK-NEXT: add a4, sp, a4
; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
-; CHECK-NEXT: vmv4r.v v24, v8
+; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
+; CHECK-NEXT: vmv4r.v v24, v16
; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: li a5, 48
+; CHECK-NEXT: li a5, 56
; CHECK-NEXT: mul a4, a4, a5
; CHECK-NEXT: add a4, sp, a4
; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
-; CHECK-NEXT: vslideup.vi v8, v24, 16
+; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
+; CHECK-NEXT: vslideup.vi v16, v24, 16
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: li a5, 56
+; CHECK-NEXT: mul a4, a4, a5
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a4, vlenb
; CHECK-NEXT: li a5, 48
; CHECK-NEXT: mul a4, a4, a5
; CHECK-NEXT: add a4, sp, a4
; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv4r.v v8, v0
-; CHECK-NEXT: vslideup.vi v8, v16, 16
+; CHECK-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
+; CHECK-NEXT: vslideup.vi v16, v8, 16
; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: li a5, 24
+; CHECK-NEXT: li a5, 48
; CHECK-NEXT: mul a4, a4, a5
; CHECK-NEXT: add a4, sp, a4
; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; CHECK-NEXT: addi a4, a1, -16
; CHECK-NEXT: sltu a1, a1, a4
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a1, a1, a4
-; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: csrr a4, vlenb
; CHECK-NEXT: slli a4, a4, 5
; CHECK-NEXT: add a4, sp, a4
@@ -474,15 +484,15 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: mul a1, a1, a2
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t
+; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vslideup.vi v16, v8, 16
-; CHECK-NEXT: vse32.v v16, (a0)
+; CHECK-NEXT: vslideup.vi v24, v8, 16
+; CHECK-NEXT: vse32.v v24, (a0)
; CHECK-NEXT: addi a1, a0, 256
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: li a3, 24
+; CHECK-NEXT: li a3, 48
; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
@@ -490,7 +500,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: vse32.v v8, (a1)
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: li a3, 48
+; CHECK-NEXT: li a3, 56
; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
@@ -498,14 +508,14 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: vse32.v v8, (a1)
; CHECK-NEXT: addi a0, a0, 384
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a2, 56
-; CHECK-NEXT: mul a1, a1, a2
+; CHECK-NEXT: slli a1, a1, 6
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 6
+; CHECK-NEXT: li a1, 72
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: .cfi_def_cfa sp, 16
; CHECK-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
index 4477ce73a9ffc0..fa82065f3b4131 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
@@ -301,38 +301,38 @@ define <32 x double> @vfsgnj_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v24, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a1, a0, 128
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v16, (a1)
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vle64.v v16, (a0)
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v7, v0, 2
; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: bltu a2, a1, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB26_2:
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t
+; CHECK-NEXT: vfsgnj.vv v8, v8, v16, v0.t
; CHECK-NEXT: addi a0, a2, -16
; CHECK-NEXT: sltu a1, a2, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
-; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfsgnj.vv v16, v16, v24, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll
index f03f1ec639eb65..35ce42ec841dc8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll
@@ -822,31 +822,30 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x
; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; CHECK-NEXT: addi a1, a2, 128
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 24
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: vle64.v v24, (a1)
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv8r.v v24, v8
+; CHECK-NEXT: addi a1, a2, 128
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v8, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v24, (a2)
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v24, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a1, a0, 128
+; CHECK-NEXT: vle64.v v8, (a1)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vle64.v v16, (a2)
+; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: mv a0, a4
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
@@ -855,12 +854,10 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB50_2:
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfmadd.vv v24, v8, v16, v0.t
+; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: addi a0, a4, -16
; CHECK-NEXT: sltu a1, a4, a0
; CHECK-NEXT: addi a1, a1, -1
@@ -871,19 +868,20 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x
; CHECK-NEXT: mul a1, a1, a2
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t
+; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t
+; CHECK-NEXT: vmv.v.v v16, v8
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
@@ -903,60 +901,48 @@ define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 24
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; CHECK-NEXT: addi a1, a2, 128
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv8r.v v0, v8
+; CHECK-NEXT: addi a1, a2, 128
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v8, (a1)
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vle64.v v24, (a1)
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v24, (a2)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v0, (a0)
+; CHECK-NEXT: vle64.v v16, (a2)
+; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: mv a0, a4
; CHECK-NEXT: bltu a4, a1, .LBB51_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB51_2:
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfmadd.vv v0, v8, v24
+; CHECK-NEXT: vfmadd.vv v8, v0, v16
; CHECK-NEXT: addi a0, a4, -16
; CHECK-NEXT: sltu a1, a4, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfmadd.vv v24, v16, v8
-; CHECK-NEXT: vmv8r.v v8, v0
+; CHECK-NEXT: vfmadd.vv v24, v16, v0
; CHECK-NEXT: vmv.v.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: .cfi_def_cfa sp, 16
; CHECK-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll
index 3be992b7e0c3b5..cad7adbc19f3c8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll
@@ -393,38 +393,38 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v24, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a1, a0, 128
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v16, (a1)
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vle64.v v16, (a0)
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v7, v0, 2
; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: bltu a2, a1, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB26_2:
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t
+; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t
; CHECK-NEXT: addi a0, a2, -16
; CHECK-NEXT: sltu a1, a2, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
-; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll
index 49a774c29e432e..d8ee7a7044b49c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll
@@ -393,38 +393,38 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v24, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a1, a0, 128
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v16, (a1)
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vle64.v v16, (a0)
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v7, v0, 2
; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: bltu a2, a1, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB26_2:
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t
+; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t
; CHECK-NEXT: addi a0, a2, -16
; CHECK-NEXT: sltu a1, a2, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
-; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll
index e2189b55c8bf2c..18abded9ea8b93 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll
@@ -610,31 +610,30 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x
; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; CHECK-NEXT: addi a1, a2, 128
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 24
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a1, a0, 128
-; CHECK-NEXT: vle64.v v24, (a1)
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv8r.v v24, v8
+; CHECK-NEXT: addi a1, a2, 128
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v8, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v24, (a2)
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v24, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a1, a0, 128
+; CHECK-NEXT: vle64.v v8, (a1)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vle64.v v16, (a2)
+; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: mv a0, a4
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
@@ -643,12 +642,10 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB50_2:
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfmadd.vv v24, v8, v16, v0.t
+; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: addi a0, a4, -16
; CHECK-NEXT: sltu a1, a4, a0
; CHECK-NEXT: addi a1, a1, -1
@@ -659,19 +656,20 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x
; CHECK-NEXT: mul a1, a1, a2
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t
+; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t
+; CHECK-NEXT: vmv.v.v v16, v8
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
@@ -691,60 +689,48 @@ define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 24
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; CHECK-NEXT: addi a1, a2, 128
-; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv8r.v v0, v8
+; CHECK-NEXT: addi a1, a2, 128
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vle64.v v8, (a1)
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vle64.v v24, (a1)
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v24, (a2)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v0, (a0)
+; CHECK-NEXT: vle64.v v16, (a2)
+; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: mv a0, a4
; CHECK-NEXT: bltu a4, a1, .LBB51_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB51_2:
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfmadd.vv v0, v8, v24
+; CHECK-NEXT: vfmadd.vv v8, v0, v16
; CHECK-NEXT: addi a0, a4, -16
; CHECK-NEXT: sltu a1, a4, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: and a0, a1, a0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vfmadd.vv v24, v16, v8
-; CHECK-NEXT: vmv8r.v v8, v0
+; CHECK-NEXT: vfmadd.vv v24, v16, v0
; CHECK-NEXT: vmv.v.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: .cfi_def_cfa sp, 16
; CHECK-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
index 351d7d4cd9b09b..a3cd46e485c7cb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
@@ -394,8 +394,8 @@ declare <33 x double> @llvm.vp.load.v33f64.p0(ptr, <33 x i1>, i32)
define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpload_v33f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a4, 32
; CHECK-NEXT: vmv1r.v v8, v0
+; CHECK-NEXT: li a4, 32
; CHECK-NEXT: mv a3, a2
; CHECK-NEXT: bltu a2, a4, .LBB32_2
; CHECK-NEXT: # %bb.1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index bc42b42208bc3d..a11c2b6bca12ec 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -1176,46 +1176,37 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv8r.v v16, v8
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v24, (a1)
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v24, (a0)
+; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: bltu a2, a1, .LBB79_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB79_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma
-; CHECK-NEXT: vmerge.vvm v24, v24, v8, v0
+; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
; CHECK-NEXT: addi a0, a2, -16
; CHECK-NEXT: sltu a1, a2, a0
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: and a0, a1, a0
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma
-; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vmerge.vvm v24, v24, v16, v0
+; CHECK-NEXT: vmv8r.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: .cfi_def_cfa sp, 16
; CHECK-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
index 9e37780f56e1c5..ad13603ee13ec7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
@@ -1775,11 +1775,11 @@ define void @vpscatter_v32f64(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: sub sp, sp, a1
; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV64-NEXT: addi a1, a0, 128
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vle64.v v24, (a1)
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vle64.v v16, (a1)
; RV64-NEXT: vle64.v v24, (a0)
; RV64-NEXT: li a1, 16
; RV64-NEXT: mv a0, a2
@@ -1798,7 +1798,7 @@ define void @vpscatter_v32f64(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m
; RV64-NEXT: addi a1, sp, 16
; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t
+; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 3
; RV64-NEXT: add sp, sp, a0
@@ -1842,18 +1842,21 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: slli a3, a3, 3
+; RV64-NEXT: slli a4, a3, 3
+; RV64-NEXT: add a3, a4, a3
; RV64-NEXT: sub sp, sp, a3
-; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb
+; RV64-NEXT: addi a3, sp, 16
+; RV64-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill
+; RV64-NEXT: csrr a3, vlenb
+; RV64-NEXT: add a3, sp, a3
+; RV64-NEXT: addi a3, a3, 16
+; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV64-NEXT: li a3, 32
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; RV64-NEXT: vle32.v v24, (a1)
-; RV64-NEXT: vmv1r.v v7, v0
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vle32.v v0, (a1)
; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV64-NEXT: vslidedown.vi v16, v24, 16
-; RV64-NEXT: vmv4r.v v0, v24
+; RV64-NEXT: vslidedown.vi v16, v0, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vsext.vf2 v24, v16
; RV64-NEXT: vsll.vi v16, v24, 3
@@ -1865,21 +1868,25 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32
; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 16
; RV64-NEXT: .LBB84_2:
-; RV64-NEXT: vmv1r.v v0, v7
+; RV64-NEXT: addi a3, sp, 16
+; RV64-NEXT: vl1r.v v0, (a3) # Unknown-size Folded Reload
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
; RV64-NEXT: addi a1, a2, -16
; RV64-NEXT: sltu a2, a2, a1
; RV64-NEXT: addi a2, a2, -1
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v0, v7, 2
+; RV64-NEXT: vslidedown.vi v0, v0, 2
; RV64-NEXT: and a1, a2, a1
-; RV64-NEXT: addi a2, sp, 16
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: slli a1, a0, 3
+; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: .cfi_def_cfa sp, 16
; RV64-NEXT: addi sp, sp, 16
@@ -1926,22 +1933,22 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base,
; RV64-NEXT: add a3, a4, a3
; RV64-NEXT: sub sp, sp, a3
; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb
+; RV64-NEXT: addi a3, sp, 16
+; RV64-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill
+; RV64-NEXT: csrr a3, vlenb
+; RV64-NEXT: add a3, sp, a3
+; RV64-NEXT: addi a3, a3, 16
+; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV64-NEXT: li a3, 32
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; RV64-NEXT: vle32.v v24, (a1)
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vle32.v v16, (a1)
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf2 v0, v24
+; RV64-NEXT: vsext.vf2 v0, v16
; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV64-NEXT: vslidedown.vi v24, v24, 16
+; RV64-NEXT: vslidedown.vi v16, v16, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf2 v16, v24
-; RV64-NEXT: vsll.vi v16, v16, 3
+; RV64-NEXT: vsext.vf2 v24, v16
+; RV64-NEXT: vsll.vi v16, v24, 3
; RV64-NEXT: li a3, 16
; RV64-NEXT: vsll.vi v24, v0, 3
; RV64-NEXT: mv a1, a2
@@ -2015,22 +2022,22 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base,
; RV64-NEXT: add a3, a4, a3
; RV64-NEXT: sub sp, sp, a3
; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb
+; RV64-NEXT: addi a3, sp, 16
+; RV64-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill
+; RV64-NEXT: csrr a3, vlenb
+; RV64-NEXT: add a3, sp, a3
+; RV64-NEXT: addi a3, a3, 16
+; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV64-NEXT: li a3, 32
; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; RV64-NEXT: vle32.v v24, (a1)
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: vle32.v v16, (a1)
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vzext.vf2 v0, v24
+; RV64-NEXT: vzext.vf2 v0, v16
; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma
-; RV64-NEXT: vslidedown.vi v24, v24, 16
+; RV64-NEXT: vslidedown.vi v16, v16, 16
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vzext.vf2 v16, v24
-; RV64-NEXT: vsll.vi v16, v16, 3
+; RV64-NEXT: vzext.vf2 v24, v16
+; RV64-NEXT: vsll.vi v16, v24, 3
; RV64-NEXT: li a3, 16
; RV64-NEXT: vsll.vi v24, v0, 3
; RV64-NEXT: mv a1, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
index 18251eeef0f236..add32b61a02628 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
@@ -219,43 +219,43 @@ define <256 x i8> @select_evl_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c
; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: li a2, 128
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
-; CHECK-NEXT: vle8.v v24, (a0)
+; CHECK-NEXT: vle8.v v16, (a0)
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: addi a0, a1, 128
; CHECK-NEXT: vle8.v v24, (a0)
+; CHECK-NEXT: vle8.v v16, (a1)
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vle8.v v24, (a1)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv1r.v v7, v0
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v9, v0
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma
-; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0
-; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmerge.vvm v24, v24, v16, v0
+; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
; CHECK-NEXT: vmv8r.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
@@ -424,11 +424,11 @@ define <32 x i64> @select_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a1)
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vle64.v v16, (a1)
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: mv a0, a2
@@ -447,7 +447,7 @@ define <32 x i64> @select_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
+; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
@@ -465,36 +465,48 @@ define <32 x i64> @select_evl_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c)
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: li a2, 24
+; CHECK-NEXT: mul a1, a1, a2
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vle64.v v24, (a0)
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: vle64.v v16, (a1)
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vle64.v v24, (a1)
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v7, v0, 2
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
-; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma
-; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
+; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: .cfi_def_cfa sp, 16
; CHECK-NEXT: addi sp, sp, 16
@@ -611,12 +623,12 @@ define <64 x float> @select_v64f32(<64 x i1> %a, <64 x float> %b, <64 x float> %
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: li a3, 32
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT: vle32.v v24, (a1)
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vle32.v v16, (a1)
; CHECK-NEXT: vle32.v v24, (a0)
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: bltu a2, a3, .LBB35_2
@@ -634,7 +646,7 @@ define <64 x float> @select_v64f32(<64 x i1> %a, <64 x float> %b, <64 x float> %
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
+; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
index d9028b293e60b6..808962e0344c6e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll
@@ -673,9 +673,9 @@ declare <vscale x 8 x half> @llvm.vp.floor.nxv8f16(<vscale x 8 x half>, <vscale
define <vscale x 8 x half> @vp_floor_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_nxv8f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI18_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1)
-; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
@@ -759,9 +759,9 @@ declare <vscale x 16 x half> @llvm.vp.floor.nxv16f16(<vscale x 16 x half>, <vsca
define <vscale x 16 x half> @vp_floor_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_nxv16f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI20_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1)
-; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
@@ -845,9 +845,9 @@ declare <vscale x 32 x half> @llvm.vp.floor.nxv32f16(<vscale x 32 x half>, <vsca
define <vscale x 32 x half> @vp_floor_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_floor_nxv32f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI22_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1)
-; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v24, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
@@ -1290,9 +1290,9 @@ declare <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double>, <vsc
define <vscale x 2 x double> @vp_floor_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv2f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI36_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1)
-; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
@@ -1334,9 +1334,9 @@ declare <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double>, <vsc
define <vscale x 4 x double> @vp_floor_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv4f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI38_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1)
-; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
@@ -1378,9 +1378,9 @@ declare <vscale x 7 x double> @llvm.vp.floor.nxv7f64(<vscale x 7 x double>, <vsc
define <vscale x 7 x double> @vp_floor_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv7f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI40_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -1422,9 +1422,9 @@ declare <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double>, <vsc
define <vscale x 8 x double> @vp_floor_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv8f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI42_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
index 61fa2339570512..f85be11fc60f8d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll
@@ -1545,99 +1545,61 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 35
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x23, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 35 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; CHECK-NEXT: vmv1r.v v7, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: li a3, 24
+; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a3, a1, 3
; CHECK-NEXT: add a3, a0, a3
-; CHECK-NEXT: vl8re64.v v24, (a3)
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: li a4, 27
-; CHECK-NEXT: mul a3, a3, a4
-; CHECK-NEXT: add a3, sp, a3
-; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8re64.v v8, (a3)
; CHECK-NEXT: srli a3, a1, 3
-; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: li a5, 18
-; CHECK-NEXT: mul a4, a4, a5
-; CHECK-NEXT: add a4, sp, a4
-; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vx v7, v0, a3
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: add a3, sp, a3
-; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vx v6, v0, a3
; CHECK-NEXT: sub a3, a2, a1
; CHECK-NEXT: sltu a4, a2, a3
; CHECK-NEXT: addi a4, a4, -1
; CHECK-NEXT: and a3, a4, a3
-; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: slli a4, a4, 1
-; CHECK-NEXT: add a4, sp, a4
-; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v26
+; CHECK-NEXT: addi a3, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv8r.v v24, v8
+; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: li a4, 27
-; CHECK-NEXT: mul a3, a3, a4
-; CHECK-NEXT: add a3, sp, a3
-; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
-; CHECK-NEXT: vmerge.vvm v24, v16, v24, v0
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: li a4, 10
-; CHECK-NEXT: mul a3, a3, a4
-; CHECK-NEXT: add a3, sp, a3
-; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: li a4, 27
-; CHECK-NEXT: mul a3, a3, a4
+; CHECK-NEXT: slli a3, a3, 4
; CHECK-NEXT: add a3, sp, a3
; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
+; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vmfeq.vv v16, v24, v24, v0.t
-; CHECK-NEXT: addi a3, sp, 16
-; CHECK-NEXT: vs1r.v v16, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a3, 19
-; CHECK-NEXT: mul a0, a0, a3
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a3, 10
-; CHECK-NEXT: mul a0, a0, a3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmax.vv v8, v16, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a3, 10
-; CHECK-NEXT: mul a0, a0, a3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
@@ -1645,66 +1607,38 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB40_2:
+; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 18
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 19
+; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 27
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 27
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 27
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmax.vv v8, v16, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 10
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 35
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: .cfi_def_cfa sp, 16
; CHECK-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
index fea3eafaad5d02..0f9ab5985c9dbc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll
@@ -1545,99 +1545,61 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 35
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x23, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 35 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; CHECK-NEXT: vmv1r.v v7, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: li a3, 24
+; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a3, a1, 3
; CHECK-NEXT: add a3, a0, a3
-; CHECK-NEXT: vl8re64.v v24, (a3)
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: li a4, 27
-; CHECK-NEXT: mul a3, a3, a4
-; CHECK-NEXT: add a3, sp, a3
-; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8re64.v v8, (a3)
; CHECK-NEXT: srli a3, a1, 3
-; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: li a5, 18
-; CHECK-NEXT: mul a4, a4, a5
-; CHECK-NEXT: add a4, sp, a4
-; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vx v7, v0, a3
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: add a3, sp, a3
-; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vx v6, v0, a3
; CHECK-NEXT: sub a3, a2, a1
; CHECK-NEXT: sltu a4, a2, a3
; CHECK-NEXT: addi a4, a4, -1
; CHECK-NEXT: and a3, a4, a3
-; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: slli a4, a4, 1
-; CHECK-NEXT: add a4, sp, a4
-; CHECK-NEXT: addi a4, a4, 16
-; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t
; CHECK-NEXT: vmv1r.v v0, v26
+; CHECK-NEXT: addi a3, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv8r.v v24, v8
+; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: li a4, 27
-; CHECK-NEXT: mul a3, a3, a4
-; CHECK-NEXT: add a3, sp, a3
-; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
-; CHECK-NEXT: vmerge.vvm v24, v16, v24, v0
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: li a4, 10
-; CHECK-NEXT: mul a3, a3, a4
-; CHECK-NEXT: add a3, sp, a3
-; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: li a4, 27
-; CHECK-NEXT: mul a3, a3, a4
+; CHECK-NEXT: slli a3, a3, 4
; CHECK-NEXT: add a3, sp, a3
; CHECK-NEXT: addi a3, a3, 16
-; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
+; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: vmfeq.vv v16, v24, v24, v0.t
-; CHECK-NEXT: addi a3, sp, 16
-; CHECK-NEXT: vs1r.v v16, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a3, 19
-; CHECK-NEXT: mul a0, a0, a3
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v6
; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a3, 10
-; CHECK-NEXT: mul a0, a0, a3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmin.vv v8, v16, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a3, 10
-; CHECK-NEXT: mul a0, a0, a3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
@@ -1645,66 +1607,38 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB40_2:
+; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 18
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t
-; CHECK-NEXT: vmv1r.v v0, v25
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 19
+; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 27
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 27
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 27
-; CHECK-NEXT: mul a0, a0, a1
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
-; CHECK-NEXT: vmv1r.v v0, v24
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t
+; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t
+; CHECK-NEXT: vmv1r.v v0, v25
+; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmin.vv v8, v16, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 10
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 35
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: .cfi_def_cfa sp, 16
; CHECK-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
index 4aa26d6b79ca46..025874a1a74e2e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
@@ -7,10 +7,10 @@
define i32 @test(i32 %size, ptr %add.ptr, i64 %const) {
; RV32-LABEL: test:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: th.lbib a3, (a1), -1, 0
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a3
; RV32-NEXT: addi a3, a2, 1
+; RV32-NEXT: th.lbib a4, (a1), -1, 0
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: vmv.s.x v9, zero
; RV32-NEXT: vsetvli zero, a3, e8, mf2, tu, ma
; RV32-NEXT: vslideup.vx v8, v9, a2
@@ -33,10 +33,10 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) {
;
; RV64-LABEL: test:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: th.lbib a3, (a1), -1, 0
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vmv.v.x v8, a3
; RV64-NEXT: addi a3, a2, 1
+; RV64-NEXT: th.lbib a4, (a1), -1, 0
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v8, a4
; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetvli zero, a3, e8, mf2, tu, ma
; RV64-NEXT: vslideup.vx v8, v9, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll
index cf35e9c40b8a73..48b8bba236e1b8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll
@@ -216,15 +216,16 @@ define <vscale x 64 x i8> @fshr_v64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8>
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vl8r.v v24, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vsll.vi v16, v8, 1, v0.t
; CHECK-NEXT: vnot.v v8, v24, v0.t
; CHECK-NEXT: vand.vi v8, v8, 7, v0.t
; CHECK-NEXT: vsll.vv v8, v16, v8, v0.t
; CHECK-NEXT: vand.vi v16, v24, 7, v0.t
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsrl.vv v16, v24, v16, v0.t
; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
@@ -249,15 +250,16 @@ define <vscale x 64 x i8> @fshl_v64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8>
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vl8r.v v24, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vsrl.vi v16, v16, 1, v0.t
; CHECK-NEXT: vnot.v v8, v24, v0.t
; CHECK-NEXT: vand.vi v8, v8, 7, v0.t
; CHECK-NEXT: vsrl.vv v8, v16, v8, v0.t
; CHECK-NEXT: vand.vi v16, v24, 7, v0.t
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsll.vv v16, v24, v16, v0.t
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
@@ -452,15 +454,16 @@ define <vscale x 32 x i16> @fshr_v32i16(<vscale x 32 x i16> %a, <vscale x 32 x i
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vl8re16.v v24, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vsll.vi v16, v8, 1, v0.t
; CHECK-NEXT: vnot.v v8, v24, v0.t
; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
; CHECK-NEXT: vsll.vv v8, v16, v8, v0.t
; CHECK-NEXT: vand.vi v16, v24, 15, v0.t
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsrl.vv v16, v24, v16, v0.t
; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
@@ -485,15 +488,16 @@ define <vscale x 32 x i16> @fshl_v32i16(<vscale x 32 x i16> %a, <vscale x 32 x i
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vl8re16.v v24, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; CHECK-NEXT: vsrl.vi v16, v16, 1, v0.t
; CHECK-NEXT: vnot.v v8, v24, v0.t
; CHECK-NEXT: vand.vi v8, v8, 15, v0.t
; CHECK-NEXT: vsrl.vv v8, v16, v8, v0.t
; CHECK-NEXT: vand.vi v16, v24, 15, v0.t
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsll.vv v16, v24, v16, v0.t
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
@@ -662,9 +666,9 @@ define <vscale x 16 x i32> @fshr_v16i32(<vscale x 16 x i32> %a, <vscale x 16 x i
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vl8re32.v v24, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: li a0, 31
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vand.vx v8, v24, a0, v0.t
@@ -697,10 +701,10 @@ define <vscale x 16 x i32> @fshl_v16i32(<vscale x 16 x i32> %a, <vscale x 16 x i
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: vl8re32.v v24, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: vl8re32.v v24, (a0)
; CHECK-NEXT: li a0, 31
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; CHECK-NEXT: vand.vx v8, v24, a0, v0.t
@@ -841,9 +845,9 @@ define <vscale x 7 x i64> @fshr_v7i64(<vscale x 7 x i64> %a, <vscale x 7 x i64>
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vl8re64.v v24, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: li a0, 63
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vand.vx v8, v24, a0, v0.t
@@ -876,10 +880,10 @@ define <vscale x 7 x i64> @fshl_v7i64(<vscale x 7 x i64> %a, <vscale x 7 x i64>
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: vl8re64.v v24, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: vl8re64.v v24, (a0)
; CHECK-NEXT: li a0, 63
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vand.vx v8, v24, a0, v0.t
@@ -912,9 +916,9 @@ define <vscale x 8 x i64> @fshr_v8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64>
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vl8re64.v v24, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: li a0, 63
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vand.vx v8, v24, a0, v0.t
@@ -947,10 +951,10 @@ define <vscale x 8 x i64> @fshl_v8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64>
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: vl8re64.v v24, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: vl8re64.v v24, (a0)
; CHECK-NEXT: li a0, 63
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vand.vx v8, v24, a0, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
index c7c7dbafd630a1..72b0401eb602a4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
@@ -1894,36 +1894,57 @@ define void @mscatter_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: slli a2, a2, 4
+; RV64-NEXT: slli a2, a2, 5
; RV64-NEXT: sub sp, sp, a2
-; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; RV64-NEXT: csrr a2, vlenb
-; RV64-NEXT: slli a2, a2, 3
+; RV64-NEXT: li a3, 24
+; RV64-NEXT: mul a2, a2, a3
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: addi a2, a2, 16
; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; RV64-NEXT: vmv8r.v v16, v8
+; RV64-NEXT: csrr a2, vlenb
+; RV64-NEXT: slli a2, a2, 4
+; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: addi a2, a2, 16
+; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT: vl8re64.v v8, (a0)
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: add a0, sp, a0
+; RV64-NEXT: addi a0, a0, 16
+; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: vl8re64.v v8, (a1)
; RV64-NEXT: addi a1, sp, 16
; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vl8re64.v v8, (a0)
-; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: srli a0, a0, 3
; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64-NEXT: vslidedown.vx v24, v0, a0
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a0, a0, 4
+; RV64-NEXT: add a0, sp, a0
+; RV64-NEXT: addi a0, a0, 16
+; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: add a0, sp, a0
+; RV64-NEXT: addi a0, a0, 16
+; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t
; RV64-NEXT: vmv1r.v v0, v24
; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: li a1, 24
+; RV64-NEXT: mul a0, a0, a1
; RV64-NEXT: add a0, sp, a0
; RV64-NEXT: addi a0, a0, 16
-; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: addi a0, sp, 16
; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
+; RV64-NEXT: addi a0, sp, 16
+; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t
; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a0, a0, 4
+; RV64-NEXT: slli a0, a0, 5
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: .cfi_def_cfa sp, 16
; RV64-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
index 8da064529f1b3d..5061752bb2b8a0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
@@ -1911,15 +1911,14 @@ define <vscale x 3 x i64> @reverse_nxv3i64(<vscale x 3 x i64> %a) {
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; CHECK-NEXT: vid.v v12
-; CHECK-NEXT: vrsub.vx v14, v12, a0
-; CHECK-NEXT: vrgather.vv v13, v10, v14
-; CHECK-NEXT: vrgather.vv v10, v9, v14
-; CHECK-NEXT: vmv.v.v v12, v13
-; CHECK-NEXT: vrgather.vv v15, v8, v14
-; CHECK-NEXT: vmv.v.v v13, v10
-; CHECK-NEXT: vrgather.vv v8, v11, v14
-; CHECK-NEXT: vmv.v.v v14, v15
-; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: vrsub.vx v12, v12, a0
+; CHECK-NEXT: vrgather.vv v15, v8, v12
+; CHECK-NEXT: vrgather.vv v14, v9, v12
+; CHECK-NEXT: vrgather.vv v13, v10, v12
+; CHECK-NEXT: vrgather.vv v8, v11, v12
+; CHECK-NEXT: vmv.v.v v9, v14
+; CHECK-NEXT: vmv.v.v v10, v15
+; CHECK-NEXT: vmv.v.v v8, v13
; CHECK-NEXT: ret
%res = call <vscale x 3 x i64> @llvm.vector.reverse.nxv3i64(<vscale x 3 x i64> %a)
ret <vscale x 3 x i64> %res
@@ -1933,19 +1932,18 @@ define <vscale x 6 x i64> @reverse_nxv6i64(<vscale x 6 x i64> %a) {
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; CHECK-NEXT: vid.v v16
-; CHECK-NEXT: vrsub.vx v22, v16, a0
-; CHECK-NEXT: vrgather.vv v21, v10, v22
-; CHECK-NEXT: vrgather.vv v19, v12, v22
-; CHECK-NEXT: vrgather.vv v18, v13, v22
-; CHECK-NEXT: vrgather.vv v20, v11, v22
-; CHECK-NEXT: vmv2r.v v16, v18
-; CHECK-NEXT: vmv2r.v v18, v20
-; CHECK-NEXT: vrgather.vv v31, v8, v22
-; CHECK-NEXT: vrgather.vv v30, v9, v22
-; CHECK-NEXT: vrgather.vv v9, v14, v22
-; CHECK-NEXT: vrgather.vv v8, v15, v22
-; CHECK-NEXT: vmv2r.v v20, v30
-; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: vrsub.vx v16, v16, a0
+; CHECK-NEXT: vrgather.vv v23, v8, v16
+; CHECK-NEXT: vrgather.vv v22, v9, v16
+; CHECK-NEXT: vrgather.vv v21, v10, v16
+; CHECK-NEXT: vrgather.vv v20, v11, v16
+; CHECK-NEXT: vrgather.vv v9, v14, v16
+; CHECK-NEXT: vrgather.vv v19, v12, v16
+; CHECK-NEXT: vrgather.vv v18, v13, v16
+; CHECK-NEXT: vrgather.vv v8, v15, v16
+; CHECK-NEXT: vmv2r.v v10, v20
+; CHECK-NEXT: vmv2r.v v12, v22
+; CHECK-NEXT: vmv2r.v v8, v18
; CHECK-NEXT: ret
%res = call <vscale x 6 x i64> @llvm.vector.reverse.nxv6i64(<vscale x 6 x i64> %a)
ret <vscale x 6 x i64> %res
diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
index b43655283b9757..6db5c4fe046ce9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll
@@ -665,9 +665,9 @@ declare <vscale x 8 x half> @llvm.vp.nearbyint.nxv8f16(<vscale x 8 x half>, <vsc
define <vscale x 8 x half> @vp_nearbyint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_nearbyint_nxv8f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI18_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1)
-; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
@@ -751,9 +751,9 @@ declare <vscale x 16 x half> @llvm.vp.nearbyint.nxv16f16(<vscale x 16 x half>, <
define <vscale x 16 x half> @vp_nearbyint_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_nearbyint_nxv16f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI20_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1)
-; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
@@ -837,9 +837,9 @@ declare <vscale x 32 x half> @llvm.vp.nearbyint.nxv32f16(<vscale x 32 x half>, <
define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_nearbyint_nxv32f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI22_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1)
-; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v24, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
@@ -1274,9 +1274,9 @@ declare <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double>,
define <vscale x 2 x double> @vp_nearbyint_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_nxv2f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI36_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1)
-; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
@@ -1318,9 +1318,9 @@ declare <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double>,
define <vscale x 4 x double> @vp_nearbyint_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_nxv4f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI38_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1)
-; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
@@ -1362,9 +1362,9 @@ declare <vscale x 7 x double> @llvm.vp.nearbyint.nxv7f64(<vscale x 7 x double>,
define <vscale x 7 x double> @vp_nearbyint_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_nxv7f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI40_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -1406,9 +1406,9 @@ declare <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double>,
define <vscale x 8 x double> @vp_nearbyint_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_nearbyint_nxv8f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI42_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr88576.ll b/llvm/test/CodeGen/RISCV/rvv/pr88576.ll
index e8a8d9e422ac18..56894184a5c420 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr88576.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr88576.ll
@@ -23,13 +23,13 @@ define i1 @foo(<vscale x 16 x i8> %x, i64 %y) {
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: andi sp, sp, -64
+; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: addi a2, sp, 64
; CHECK-NEXT: add a0, a2, a0
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, a2, a1
; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma
; CHECK-NEXT: vmv.v.i v16, 0
-; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vim v24, v16, 1, v0
; CHECK-NEXT: vs8r.v v24, (a1)
; CHECK-NEXT: vmv1r.v v0, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
index 859d87d096962b..59bb57daeed724 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll
@@ -618,9 +618,9 @@ declare <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half>, <vscale x
define <vscale x 8 x half> @vp_rint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_rint_nxv8f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI18_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1)
-; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
@@ -696,9 +696,9 @@ declare <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half>, <vscal
define <vscale x 16 x half> @vp_rint_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_rint_nxv16f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI20_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1)
-; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
@@ -774,9 +774,9 @@ declare <vscale x 32 x half> @llvm.vp.rint.nxv32f16(<vscale x 32 x half>, <vscal
define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_rint_nxv32f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI22_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1)
-; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v24, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
@@ -1180,9 +1180,9 @@ declare <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double>, <vsca
define <vscale x 2 x double> @vp_rint_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv2f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI36_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1)
-; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
@@ -1220,9 +1220,9 @@ declare <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double>, <vsca
define <vscale x 4 x double> @vp_rint_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv4f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI38_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1)
-; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
@@ -1260,9 +1260,9 @@ declare <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double>, <vsca
define <vscale x 7 x double> @vp_rint_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv7f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI40_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -1300,9 +1300,9 @@ declare <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double>, <vsca
define <vscale x 8 x double> @vp_rint_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv8f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI42_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
index 9ed28248e0cc10..eb6044359e24aa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll
@@ -673,9 +673,9 @@ declare <vscale x 8 x half> @llvm.vp.round.nxv8f16(<vscale x 8 x half>, <vscale
define <vscale x 8 x half> @vp_round_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_round_nxv8f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI18_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1)
-; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
@@ -759,9 +759,9 @@ declare <vscale x 16 x half> @llvm.vp.round.nxv16f16(<vscale x 16 x half>, <vsca
define <vscale x 16 x half> @vp_round_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_round_nxv16f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI20_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1)
-; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
@@ -845,9 +845,9 @@ declare <vscale x 32 x half> @llvm.vp.round.nxv32f16(<vscale x 32 x half>, <vsca
define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_round_nxv32f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI22_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1)
-; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v24, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
@@ -1290,9 +1290,9 @@ declare <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double>, <vsc
define <vscale x 2 x double> @vp_round_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv2f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI36_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1)
-; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
@@ -1334,9 +1334,9 @@ declare <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double>, <vsc
define <vscale x 4 x double> @vp_round_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv4f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI38_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1)
-; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
@@ -1378,9 +1378,9 @@ declare <vscale x 7 x double> @llvm.vp.round.nxv7f64(<vscale x 7 x double>, <vsc
define <vscale x 7 x double> @vp_round_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv7f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI40_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -1422,9 +1422,9 @@ declare <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double>, <vsc
define <vscale x 8 x double> @vp_round_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv8f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI42_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
index 3fdb354bff94bf..060b00836b2f45 100644
--- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll
@@ -673,9 +673,9 @@ declare <vscale x 8 x half> @llvm.vp.roundeven.nxv8f16(<vscale x 8 x half>, <vsc
define <vscale x 8 x half> @vp_roundeven_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundeven_nxv8f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI18_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1)
-; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
@@ -759,9 +759,9 @@ declare <vscale x 16 x half> @llvm.vp.roundeven.nxv16f16(<vscale x 16 x half>, <
define <vscale x 16 x half> @vp_roundeven_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundeven_nxv16f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI20_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1)
-; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
@@ -845,9 +845,9 @@ declare <vscale x 32 x half> @llvm.vp.roundeven.nxv32f16(<vscale x 32 x half>, <
define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundeven_nxv32f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI22_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1)
-; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v24, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
@@ -1290,9 +1290,9 @@ declare <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double>,
define <vscale x 2 x double> @vp_roundeven_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv2f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI36_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1)
-; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
@@ -1334,9 +1334,9 @@ declare <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double>,
define <vscale x 4 x double> @vp_roundeven_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv4f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI38_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1)
-; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
@@ -1378,9 +1378,9 @@ declare <vscale x 7 x double> @llvm.vp.roundeven.nxv7f64(<vscale x 7 x double>,
define <vscale x 7 x double> @vp_roundeven_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv7f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI40_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -1422,9 +1422,9 @@ declare <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double>,
define <vscale x 8 x double> @vp_roundeven_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv8f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI42_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
index d999f65af4e346..e0f0ed8ac259eb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll
@@ -673,9 +673,9 @@ declare <vscale x 8 x half> @llvm.vp.roundtozero.nxv8f16(<vscale x 8 x half>, <v
define <vscale x 8 x half> @vp_roundtozero_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundtozero_nxv8f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI18_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI18_0)(a1)
-; ZVFH-NEXT: vmv1r.v v10, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFH-NEXT: vfabs.v v12, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu
@@ -759,9 +759,9 @@ declare <vscale x 16 x half> @llvm.vp.roundtozero.nxv16f16(<vscale x 16 x half>,
define <vscale x 16 x half> @vp_roundtozero_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundtozero_nxv16f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI20_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a1)
-; ZVFH-NEXT: vmv1r.v v12, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFH-NEXT: vfabs.v v16, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu
@@ -845,9 +845,9 @@ declare <vscale x 32 x half> @llvm.vp.roundtozero.nxv32f16(<vscale x 32 x half>,
define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vp_roundtozero_nxv32f16:
; ZVFH: # %bb.0:
+; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: lui a1, %hi(.LCPI22_0)
; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a1)
-; ZVFH-NEXT: vmv1r.v v16, v0
; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFH-NEXT: vfabs.v v24, v8, v0.t
; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu
@@ -1290,9 +1290,9 @@ declare <vscale x 2 x double> @llvm.vp.roundtozero.nxv2f64(<vscale x 2 x double>
define <vscale x 2 x double> @vp_roundtozero_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_nxv2f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI36_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a1)
-; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
@@ -1334,9 +1334,9 @@ declare <vscale x 4 x double> @llvm.vp.roundtozero.nxv4f64(<vscale x 4 x double>
define <vscale x 4 x double> @vp_roundtozero_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_nxv4f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI38_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a1)
-; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
@@ -1378,9 +1378,9 @@ declare <vscale x 7 x double> @llvm.vp.roundtozero.nxv7f64(<vscale x 7 x double>
define <vscale x 7 x double> @vp_roundtozero_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_nxv7f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI40_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
@@ -1422,9 +1422,9 @@ declare <vscale x 8 x double> @llvm.vp.roundtozero.nxv8f64(<vscale x 8 x double>
define <vscale x 8 x double> @vp_roundtozero_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundtozero_nxv8f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI42_0)
; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a1)
-; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
index 24b86b28e9a2c2..c33d11d871c4f0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
@@ -1470,20 +1470,25 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb
-; CHECK-NEXT: vmv8r.v v24, v16
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
+; CHECK-NEXT: vmv1r.v v7, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: mv a3, a1
; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a1, a3, 3
@@ -1500,158 +1505,131 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64bf16(<vscale x 64 x bfloat> %va, <vs
; CHECK-NEXT: addi a7, a7, -1
; CHECK-NEXT: and a7, a7, a1
; CHECK-NEXT: srli a1, a3, 1
-; CHECK-NEXT: csrr t0, vlenb
-; CHECK-NEXT: add t0, sp, t0
-; CHECK-NEXT: addi t0, t0, 16
-; CHECK-NEXT: vs1r.v v0, (t0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli t0, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vx v8, v0, a1
+; CHECK-NEXT: vslidedown.vx v25, v0, a1
; CHECK-NEXT: srli a3, a3, 2
-; CHECK-NEXT: addi t0, sp, 16
-; CHECK-NEXT: vs1r.v v8, (t0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli t0, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v8, a3
+; CHECK-NEXT: vslidedown.vx v0, v25, a3
; CHECK-NEXT: vl8re16.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: mv t0, a0
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: add t0, t0, a0
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: add a0, a0, t0
+; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20
-; CHECK-NEXT: vmv4r.v v16, v24
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: mv t0, a0
-; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add a0, a0, t0
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a7, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v26, v16, v8, v0.t
+; CHECK-NEXT: vmfeq.vv v6, v16, v8, v0.t
; CHECK-NEXT: bltu a6, a4, .LBB85_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a6, a4
; CHECK-NEXT: .LBB85_2:
+; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: mv a7, a0
-; CHECK-NEXT: slli a0, a0, 2
+; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add a0, a0, a7
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v0
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
; CHECK-NEXT: vsetvli zero, a6, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v6, v16, v8, v0.t
+; CHECK-NEXT: vmfeq.vv v5, v24, v8, v0.t
; CHECK-NEXT: add a0, a3, a3
; CHECK-NEXT: bltu a2, a5, .LBB85_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a2, a5
; CHECK-NEXT: .LBB85_4:
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v6, v26, a3
; CHECK-NEXT: sub a5, a2, a4
; CHECK-NEXT: sltu a6, a2, a5
; CHECK-NEXT: addi a6, a6, -1
; CHECK-NEXT: and a5, a6, a5
-; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: add a6, sp, a6
-; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vl1r.v v8, (a6) # Unknown-size Folded Reload
-; CHECK-NEXT: vmv1r.v v7, v8
; CHECK-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v8, a3
+; CHECK-NEXT: vslidedown.vx v0, v7, a3
; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: slli a6, a6, 1
-; CHECK-NEXT: mv a7, a6
-; CHECK-NEXT: slli a6, a6, 3
-; CHECK-NEXT: add a6, a6, a7
+; CHECK-NEXT: slli a6, a6, 4
; CHECK-NEXT: add a6, sp, a6
; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli a6, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20
; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: slli a6, a6, 1
+; CHECK-NEXT: slli a6, a6, 3
; CHECK-NEXT: mv a7, a6
-; CHECK-NEXT: slli a6, a6, 2
+; CHECK-NEXT: slli a6, a6, 1
; CHECK-NEXT: add a6, a6, a7
; CHECK-NEXT: add a6, sp, a6
; CHECK-NEXT: addi a6, a6, 16
; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: slli a6, a6, 1
-; CHECK-NEXT: mv a7, a6
-; CHECK-NEXT: slli a6, a6, 2
-; CHECK-NEXT: add a7, a7, a6
-; CHECK-NEXT: slli a6, a6, 1
-; CHECK-NEXT: add a6, a6, a7
+; CHECK-NEXT: slli a6, a6, 5
; CHECK-NEXT: add a6, sp, a6
; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20
+; CHECK-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v28
; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: slli a6, a6, 1
+; CHECK-NEXT: slli a6, a6, 3
; CHECK-NEXT: mv a7, a6
-; CHECK-NEXT: slli a6, a6, 2
+; CHECK-NEXT: slli a6, a6, 1
; CHECK-NEXT: add a6, a6, a7
; CHECK-NEXT: add a6, sp, a6
; CHECK-NEXT: addi a6, a6, 16
-; CHECK-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a5, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v5, v16, v8, v0.t
+; CHECK-NEXT: vmfeq.vv v4, v24, v8, v0.t
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v5, v6, a3
; CHECK-NEXT: bltu a2, a4, .LBB85_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: mv a2, a4
; CHECK-NEXT: .LBB85_6:
+; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vsetvli a4, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v24
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: slli a4, a4, 1
-; CHECK-NEXT: mv a5, a4
-; CHECK-NEXT: slli a4, a4, 2
-; CHECK-NEXT: add a5, a5, a4
-; CHECK-NEXT: slli a4, a4, 1
-; CHECK-NEXT: add a4, a4, a5
+; CHECK-NEXT: slli a4, a4, 5
; CHECK-NEXT: add a4, sp, a4
; CHECK-NEXT: addi a4, a4, 16
; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v8
-; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; CHECK-NEXT: vmfeq.vv v8, v16, v24, v0.t
+; CHECK-NEXT: vmfeq.vv v8, v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v5, a3
+; CHECK-NEXT: vslideup.vx v8, v4, a3
; CHECK-NEXT: add a0, a1, a1
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v6, a1
+; CHECK-NEXT: vslideup.vx v8, v5, a1
; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: mv a1, a0
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: .cfi_def_cfa sp, 16
@@ -3793,20 +3771,25 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: slli a1, a1, 3
; ZVFHMIN-NEXT: mv a3, a1
-; ZVFHMIN-NEXT: slli a1, a1, 4
+; ZVFHMIN-NEXT: slli a1, a1, 2
; ZVFHMIN-NEXT: add a1, a1, a3
; ZVFHMIN-NEXT: sub sp, sp, a1
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb
-; ZVFHMIN-NEXT: vmv8r.v v24, v16
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
+; ZVFHMIN-NEXT: vmv1r.v v7, v0
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 1
-; ZVFHMIN-NEXT: mv a3, a1
; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a3, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
; ZVFHMIN-NEXT: add a1, a1, a3
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: csrr a1, vlenb
+; ZVFHMIN-NEXT: slli a1, a1, 4
+; ZVFHMIN-NEXT: add a1, sp, a1
+; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a1, a3, 3
@@ -3823,158 +3806,131 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal
; ZVFHMIN-NEXT: addi a7, a7, -1
; ZVFHMIN-NEXT: and a7, a7, a1
; ZVFHMIN-NEXT: srli a1, a3, 1
-; ZVFHMIN-NEXT: csrr t0, vlenb
-; ZVFHMIN-NEXT: add t0, sp, t0
-; ZVFHMIN-NEXT: addi t0, t0, 16
-; ZVFHMIN-NEXT: vs1r.v v0, (t0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli t0, zero, e8, m1, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v8, v0, a1
+; ZVFHMIN-NEXT: vslidedown.vx v25, v0, a1
; ZVFHMIN-NEXT: srli a3, a3, 2
-; ZVFHMIN-NEXT: addi t0, sp, 16
-; ZVFHMIN-NEXT: vs1r.v v8, (t0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli t0, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3
+; ZVFHMIN-NEXT: vslidedown.vx v0, v25, a3
; ZVFHMIN-NEXT: vl8re16.v v8, (a0)
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 1
-; ZVFHMIN-NEXT: mv t0, a0
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: add t0, t0, a0
-; ZVFHMIN-NEXT: slli a0, a0, 1
-; ZVFHMIN-NEXT: add a0, a0, t0
+; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
-; ZVFHMIN-NEXT: vmv4r.v v16, v24
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: mv t0, a0
-; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: slli a0, a0, 1
; ZVFHMIN-NEXT: add a0, a0, t0
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a7, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v26, v16, v8, v0.t
+; ZVFHMIN-NEXT: vmfeq.vv v6, v16, v8, v0.t
; ZVFHMIN-NEXT: bltu a6, a4, .LBB171_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a6, a4
; ZVFHMIN-NEXT: .LBB171_2:
+; ZVFHMIN-NEXT: vmv1r.v v0, v25
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: mv a7, a0
-; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: slli a0, a0, 1
; ZVFHMIN-NEXT: add a0, a0, a7
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: vsetvli zero, a6, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v6, v16, v8, v0.t
+; ZVFHMIN-NEXT: vmfeq.vv v5, v24, v8, v0.t
; ZVFHMIN-NEXT: add a0, a3, a3
; ZVFHMIN-NEXT: bltu a2, a5, .LBB171_4
; ZVFHMIN-NEXT: # %bb.3:
; ZVFHMIN-NEXT: mv a2, a5
; ZVFHMIN-NEXT: .LBB171_4:
-; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslideup.vx v6, v26, a3
; ZVFHMIN-NEXT: sub a5, a2, a4
; ZVFHMIN-NEXT: sltu a6, a2, a5
; ZVFHMIN-NEXT: addi a6, a6, -1
; ZVFHMIN-NEXT: and a5, a6, a5
-; ZVFHMIN-NEXT: csrr a6, vlenb
-; ZVFHMIN-NEXT: add a6, sp, a6
-; ZVFHMIN-NEXT: addi a6, a6, 16
-; ZVFHMIN-NEXT: vl1r.v v8, (a6) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vmv1r.v v7, v8
; ZVFHMIN-NEXT: vsetvli a6, zero, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3
+; ZVFHMIN-NEXT: vslidedown.vx v0, v7, a3
; ZVFHMIN-NEXT: csrr a6, vlenb
-; ZVFHMIN-NEXT: slli a6, a6, 1
-; ZVFHMIN-NEXT: mv a7, a6
-; ZVFHMIN-NEXT: slli a6, a6, 3
-; ZVFHMIN-NEXT: add a6, a6, a7
+; ZVFHMIN-NEXT: slli a6, a6, 4
; ZVFHMIN-NEXT: add a6, sp, a6
; ZVFHMIN-NEXT: addi a6, a6, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a6, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
; ZVFHMIN-NEXT: csrr a6, vlenb
-; ZVFHMIN-NEXT: slli a6, a6, 1
+; ZVFHMIN-NEXT: slli a6, a6, 3
; ZVFHMIN-NEXT: mv a7, a6
-; ZVFHMIN-NEXT: slli a6, a6, 2
+; ZVFHMIN-NEXT: slli a6, a6, 1
; ZVFHMIN-NEXT: add a6, a6, a7
; ZVFHMIN-NEXT: add a6, sp, a6
; ZVFHMIN-NEXT: addi a6, a6, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a6, vlenb
-; ZVFHMIN-NEXT: slli a6, a6, 1
-; ZVFHMIN-NEXT: mv a7, a6
-; ZVFHMIN-NEXT: slli a6, a6, 2
-; ZVFHMIN-NEXT: add a7, a7, a6
-; ZVFHMIN-NEXT: slli a6, a6, 1
-; ZVFHMIN-NEXT: add a6, a6, a7
+; ZVFHMIN-NEXT: slli a6, a6, 5
; ZVFHMIN-NEXT: add a6, sp, a6
; ZVFHMIN-NEXT: addi a6, a6, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
+; ZVFHMIN-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
; ZVFHMIN-NEXT: csrr a6, vlenb
-; ZVFHMIN-NEXT: slli a6, a6, 1
+; ZVFHMIN-NEXT: slli a6, a6, 3
; ZVFHMIN-NEXT: mv a7, a6
-; ZVFHMIN-NEXT: slli a6, a6, 2
+; ZVFHMIN-NEXT: slli a6, a6, 1
; ZVFHMIN-NEXT: add a6, a6, a7
; ZVFHMIN-NEXT: add a6, sp, a6
; ZVFHMIN-NEXT: addi a6, a6, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a5, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v5, v16, v8, v0.t
+; ZVFHMIN-NEXT: vmfeq.vv v4, v24, v8, v0.t
+; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; ZVFHMIN-NEXT: vslideup.vx v5, v6, a3
; ZVFHMIN-NEXT: bltu a2, a4, .LBB171_6
; ZVFHMIN-NEXT: # %bb.5:
; ZVFHMIN-NEXT: mv a2, a4
; ZVFHMIN-NEXT: .LBB171_6:
+; ZVFHMIN-NEXT: vmv1r.v v0, v7
; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: slli a4, a4, 1
-; ZVFHMIN-NEXT: mv a5, a4
-; ZVFHMIN-NEXT: slli a4, a4, 2
-; ZVFHMIN-NEXT: add a5, a5, a4
-; ZVFHMIN-NEXT: slli a4, a4, 1
-; ZVFHMIN-NEXT: add a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: vmv1r.v v0, v7
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v24, v0.t
+; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v16, v0.t
; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; ZVFHMIN-NEXT: vslideup.vx v8, v5, a3
+; ZVFHMIN-NEXT: vslideup.vx v8, v4, a3
; ZVFHMIN-NEXT: add a0, a1, a1
; ZVFHMIN-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; ZVFHMIN-NEXT: vslideup.vx v8, v6, a1
+; ZVFHMIN-NEXT: vslideup.vx v8, v5, a1
; ZVFHMIN-NEXT: vmv.v.v v0, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: mv a1, a0
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: slli a0, a0, 2
; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index abf89361cdea53..3f3bbd638f9fc8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -244,32 +244,32 @@ define void @sink_splat_mul_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_mul_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB7_2
+; CHECK-NEXT: srli a3, a5, 1
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB7_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB7_5
; CHECK-NEXT: .LBB7_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: mv a7, a2
; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB7_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vmul.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
+; CHECK-NEXT: sub a7, a7, a3
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB7_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB7_7
; CHECK-NEXT: .LBB7_5: # %for.body.preheader
-; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: add a0, a0, a3
@@ -335,32 +335,32 @@ define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_add_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB8_2
+; CHECK-NEXT: srli a3, a5, 1
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB8_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB8_5
; CHECK-NEXT: .LBB8_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: mv a7, a2
; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB8_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
+; CHECK-NEXT: sub a7, a7, a3
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB8_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB8_7
; CHECK-NEXT: .LBB8_5: # %for.body.preheader
-; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: add a0, a0, a3
@@ -426,32 +426,32 @@ define void @sink_splat_sub_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_sub_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB9_2
+; CHECK-NEXT: srli a3, a5, 1
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB9_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB9_5
; CHECK-NEXT: .LBB9_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: mv a7, a2
; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB9_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vsub.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
+; CHECK-NEXT: sub a7, a7, a3
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB9_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB9_7
; CHECK-NEXT: .LBB9_5: # %for.body.preheader
-; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: add a0, a0, a3
@@ -517,32 +517,32 @@ define void @sink_splat_rsub_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_rsub_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB10_2
+; CHECK-NEXT: srli a3, a5, 1
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB10_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB10_5
; CHECK-NEXT: .LBB10_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: mv a7, a2
; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB10_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vrsub.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
+; CHECK-NEXT: sub a7, a7, a3
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB10_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB10_7
; CHECK-NEXT: .LBB10_5: # %for.body.preheader
-; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: add a0, a0, a3
@@ -608,32 +608,32 @@ define void @sink_splat_and_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_and_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB11_2
+; CHECK-NEXT: srli a3, a5, 1
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB11_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB11_5
; CHECK-NEXT: .LBB11_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: mv a7, a2
; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB11_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vand.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
+; CHECK-NEXT: sub a7, a7, a3
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB11_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB11_7
; CHECK-NEXT: .LBB11_5: # %for.body.preheader
-; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: add a0, a0, a3
@@ -699,32 +699,32 @@ define void @sink_splat_or_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_or_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB12_2
+; CHECK-NEXT: srli a3, a5, 1
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB12_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB12_5
; CHECK-NEXT: .LBB12_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: mv a7, a2
; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB12_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vor.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
+; CHECK-NEXT: sub a7, a7, a3
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB12_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB12_7
; CHECK-NEXT: .LBB12_5: # %for.body.preheader
-; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: add a0, a0, a3
@@ -790,32 +790,32 @@ define void @sink_splat_xor_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_xor_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB13_2
+; CHECK-NEXT: srli a3, a5, 1
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB13_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB13_5
; CHECK-NEXT: .LBB13_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: mv a7, a2
; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB13_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vxor.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
+; CHECK-NEXT: sub a7, a7, a3
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB13_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB13_7
; CHECK-NEXT: .LBB13_5: # %for.body.preheader
-; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: add a0, a0, a3
@@ -983,32 +983,32 @@ define void @sink_splat_shl_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_shl_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB17_2
+; CHECK-NEXT: srli a3, a5, 1
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB17_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB17_5
; CHECK-NEXT: .LBB17_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: mv a7, a2
; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB17_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vsll.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
+; CHECK-NEXT: sub a7, a7, a3
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB17_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB17_7
; CHECK-NEXT: .LBB17_5: # %for.body.preheader
-; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: add a0, a0, a3
@@ -1074,32 +1074,32 @@ define void @sink_splat_lshr_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_lshr_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB18_2
+; CHECK-NEXT: srli a3, a5, 1
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB18_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB18_5
; CHECK-NEXT: .LBB18_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: mv a7, a2
; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB18_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vsrl.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
+; CHECK-NEXT: sub a7, a7, a3
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB18_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB18_7
; CHECK-NEXT: .LBB18_5: # %for.body.preheader
-; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: add a0, a0, a3
@@ -1458,31 +1458,31 @@ define void @sink_splat_fmul_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fmul_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB26_2
+; CHECK-NEXT: srli a3, a1, 2
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB26_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB26_5
; CHECK-NEXT: .LBB26_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a3
+; CHECK-NEXT: mv a6, a2
; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB26_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfmul.vf v8, v8, fa0
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a2
+; CHECK-NEXT: sub a6, a6, a3
; CHECK-NEXT: add a5, a5, a1
; CHECK-NEXT: bnez a6, .LBB26_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB26_7
; CHECK-NEXT: .LBB26_5: # %for.body.preheader
-; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: slli a1, a2, 2
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
@@ -1548,31 +1548,31 @@ define void @sink_splat_fdiv_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fdiv_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB27_2
+; CHECK-NEXT: srli a3, a1, 2
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB27_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB27_5
; CHECK-NEXT: .LBB27_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a3
+; CHECK-NEXT: mv a6, a2
; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB27_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfdiv.vf v8, v8, fa0
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a2
+; CHECK-NEXT: sub a6, a6, a3
; CHECK-NEXT: add a5, a5, a1
; CHECK-NEXT: bnez a6, .LBB27_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB27_7
; CHECK-NEXT: .LBB27_5: # %for.body.preheader
-; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: slli a1, a2, 2
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
@@ -1638,31 +1638,31 @@ define void @sink_splat_frdiv_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_frdiv_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB28_2
+; CHECK-NEXT: srli a3, a1, 2
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB28_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB28_5
; CHECK-NEXT: .LBB28_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a3
+; CHECK-NEXT: mv a6, a2
; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB28_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfrdiv.vf v8, v8, fa0
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a2
+; CHECK-NEXT: sub a6, a6, a3
; CHECK-NEXT: add a5, a5, a1
; CHECK-NEXT: bnez a6, .LBB28_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB28_7
; CHECK-NEXT: .LBB28_5: # %for.body.preheader
-; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: slli a1, a2, 2
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
@@ -1728,31 +1728,31 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fadd_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB29_2
+; CHECK-NEXT: srli a3, a1, 2
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB29_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB29_5
; CHECK-NEXT: .LBB29_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a3
+; CHECK-NEXT: mv a6, a2
; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB29_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfadd.vf v8, v8, fa0
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a2
+; CHECK-NEXT: sub a6, a6, a3
; CHECK-NEXT: add a5, a5, a1
; CHECK-NEXT: bnez a6, .LBB29_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB29_7
; CHECK-NEXT: .LBB29_5: # %for.body.preheader
-; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: slli a1, a2, 2
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
@@ -1818,31 +1818,31 @@ define void @sink_splat_fsub_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_fsub_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB30_2
+; CHECK-NEXT: srli a3, a1, 2
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB30_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB30_5
; CHECK-NEXT: .LBB30_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a3
+; CHECK-NEXT: mv a6, a2
; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB30_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfsub.vf v8, v8, fa0
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a2
+; CHECK-NEXT: sub a6, a6, a3
; CHECK-NEXT: add a5, a5, a1
; CHECK-NEXT: bnez a6, .LBB30_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB30_7
; CHECK-NEXT: .LBB30_5: # %for.body.preheader
-; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: slli a1, a2, 2
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
@@ -1908,31 +1908,31 @@ define void @sink_splat_frsub_scalable(ptr nocapture %a, float %x) {
; CHECK-LABEL: sink_splat_frsub_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: srli a2, a1, 2
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB31_2
+; CHECK-NEXT: srli a3, a1, 2
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB31_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB31_5
; CHECK-NEXT: .LBB31_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: mv a5, a0
-; CHECK-NEXT: mv a6, a3
+; CHECK-NEXT: mv a6, a2
; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB31_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl1re32.v v8, (a5)
; CHECK-NEXT: vfrsub.vf v8, v8, fa0
; CHECK-NEXT: vs1r.v v8, (a5)
-; CHECK-NEXT: sub a6, a6, a2
+; CHECK-NEXT: sub a6, a6, a3
; CHECK-NEXT: add a5, a5, a1
; CHECK-NEXT: bnez a6, .LBB31_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB31_7
; CHECK-NEXT: .LBB31_5: # %for.body.preheader
-; CHECK-NEXT: slli a1, a3, 2
+; CHECK-NEXT: slli a1, a2, 2
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: lui a2, 1
; CHECK-NEXT: add a0, a0, a2
@@ -2074,19 +2074,19 @@ define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocap
; CHECK-LABEL: sink_splat_fma_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: srli a3, a2, 2
-; CHECK-NEXT: li a4, 1024
-; CHECK-NEXT: bgeu a4, a3, .LBB34_2
+; CHECK-NEXT: srli a4, a2, 2
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a4, .LBB34_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB34_5
; CHECK-NEXT: .LBB34_2: # %vector.ph
-; CHECK-NEXT: addi a4, a3, -1
-; CHECK-NEXT: andi a5, a4, 1024
-; CHECK-NEXT: xori a4, a5, 1024
+; CHECK-NEXT: addi a3, a4, -1
+; CHECK-NEXT: andi a5, a3, 1024
+; CHECK-NEXT: xori a3, a5, 1024
; CHECK-NEXT: mv a6, a0
; CHECK-NEXT: mv a7, a1
-; CHECK-NEXT: mv t0, a4
+; CHECK-NEXT: mv t0, a3
; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB34_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -2094,16 +2094,16 @@ define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocap
; CHECK-NEXT: vl1re32.v v9, (a7)
; CHECK-NEXT: vfmacc.vf v9, fa0, v8
; CHECK-NEXT: vs1r.v v9, (a6)
-; CHECK-NEXT: sub t0, t0, a3
+; CHECK-NEXT: sub t0, t0, a4
; CHECK-NEXT: add a7, a7, a2
; CHECK-NEXT: add a6, a6, a2
; CHECK-NEXT: bnez t0, .LBB34_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a5, .LBB34_7
; CHECK-NEXT: .LBB34_5: # %for.body.preheader
-; CHECK-NEXT: slli a4, a4, 2
-; CHECK-NEXT: add a2, a1, a4
-; CHECK-NEXT: add a0, a0, a4
+; CHECK-NEXT: slli a3, a3, 2
+; CHECK-NEXT: add a2, a1, a3
+; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: .LBB34_6: # %for.body
@@ -2174,19 +2174,19 @@ define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noali
; CHECK-LABEL: sink_splat_fma_commute_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: srli a3, a2, 2
-; CHECK-NEXT: li a4, 1024
-; CHECK-NEXT: bgeu a4, a3, .LBB35_2
+; CHECK-NEXT: srli a4, a2, 2
+; CHECK-NEXT: li a3, 1024
+; CHECK-NEXT: bgeu a3, a4, .LBB35_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: li a3, 0
; CHECK-NEXT: j .LBB35_5
; CHECK-NEXT: .LBB35_2: # %vector.ph
-; CHECK-NEXT: addi a4, a3, -1
-; CHECK-NEXT: andi a5, a4, 1024
-; CHECK-NEXT: xori a4, a5, 1024
+; CHECK-NEXT: addi a3, a4, -1
+; CHECK-NEXT: andi a5, a3, 1024
+; CHECK-NEXT: xori a3, a5, 1024
; CHECK-NEXT: mv a6, a0
; CHECK-NEXT: mv a7, a1
-; CHECK-NEXT: mv t0, a4
+; CHECK-NEXT: mv t0, a3
; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma
; CHECK-NEXT: .LBB35_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -2194,16 +2194,16 @@ define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noali
; CHECK-NEXT: vl1re32.v v9, (a7)
; CHECK-NEXT: vfmacc.vf v9, fa0, v8
; CHECK-NEXT: vs1r.v v9, (a6)
-; CHECK-NEXT: sub t0, t0, a3
+; CHECK-NEXT: sub t0, t0, a4
; CHECK-NEXT: add a7, a7, a2
; CHECK-NEXT: add a6, a6, a2
; CHECK-NEXT: bnez t0, .LBB35_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a5, .LBB35_7
; CHECK-NEXT: .LBB35_5: # %for.body.preheader
-; CHECK-NEXT: slli a4, a4, 2
-; CHECK-NEXT: add a2, a1, a4
-; CHECK-NEXT: add a0, a0, a4
+; CHECK-NEXT: slli a3, a3, 2
+; CHECK-NEXT: add a2, a1, a3
+; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: .LBB35_6: # %for.body
@@ -2487,32 +2487,32 @@ define void @sink_splat_udiv_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_udiv_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB42_2
+; CHECK-NEXT: srli a3, a5, 1
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB42_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB42_5
; CHECK-NEXT: .LBB42_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: mv a7, a2
; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB42_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vdivu.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
+; CHECK-NEXT: sub a7, a7, a3
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB42_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB42_7
; CHECK-NEXT: .LBB42_5: # %for.body.preheader
-; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: add a0, a0, a3
@@ -2578,32 +2578,32 @@ define void @sink_splat_sdiv_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_sdiv_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB43_2
+; CHECK-NEXT: srli a3, a5, 1
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB43_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB43_5
; CHECK-NEXT: .LBB43_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: mv a7, a2
; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB43_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vdiv.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
+; CHECK-NEXT: sub a7, a7, a3
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB43_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB43_7
; CHECK-NEXT: .LBB43_5: # %for.body.preheader
-; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: add a0, a0, a3
@@ -2669,32 +2669,32 @@ define void @sink_splat_urem_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_urem_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB44_2
+; CHECK-NEXT: srli a3, a5, 1
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB44_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB44_5
; CHECK-NEXT: .LBB44_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: mv a7, a2
; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB44_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vremu.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
+; CHECK-NEXT: sub a7, a7, a3
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB44_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB44_7
; CHECK-NEXT: .LBB44_5: # %for.body.preheader
-; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: add a0, a0, a3
@@ -2760,32 +2760,32 @@ define void @sink_splat_srem_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-LABEL: sink_splat_srem_scalable:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: srli a2, a5, 1
-; CHECK-NEXT: li a3, 1024
-; CHECK-NEXT: bgeu a3, a2, .LBB45_2
+; CHECK-NEXT: srli a3, a5, 1
+; CHECK-NEXT: li a2, 1024
+; CHECK-NEXT: bgeu a2, a3, .LBB45_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: li a2, 0
; CHECK-NEXT: j .LBB45_5
; CHECK-NEXT: .LBB45_2: # %vector.ph
-; CHECK-NEXT: addi a3, a2, -1
-; CHECK-NEXT: andi a4, a3, 1024
-; CHECK-NEXT: xori a3, a4, 1024
+; CHECK-NEXT: addi a2, a3, -1
+; CHECK-NEXT: andi a4, a2, 1024
+; CHECK-NEXT: xori a2, a4, 1024
; CHECK-NEXT: slli a5, a5, 1
; CHECK-NEXT: mv a6, a0
-; CHECK-NEXT: mv a7, a3
+; CHECK-NEXT: mv a7, a2
; CHECK-NEXT: vsetvli t0, zero, e32, m2, ta, ma
; CHECK-NEXT: .LBB45_3: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vl2re32.v v8, (a6)
; CHECK-NEXT: vrem.vx v8, v8, a1
; CHECK-NEXT: vs2r.v v8, (a6)
-; CHECK-NEXT: sub a7, a7, a2
+; CHECK-NEXT: sub a7, a7, a3
; CHECK-NEXT: add a6, a6, a5
; CHECK-NEXT: bnez a7, .LBB45_3
; CHECK-NEXT: # %bb.4: # %middle.block
; CHECK-NEXT: beqz a4, .LBB45_7
; CHECK-NEXT: .LBB45_5: # %for.body.preheader
-; CHECK-NEXT: slli a2, a3, 2
+; CHECK-NEXT: slli a2, a2, 2
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: lui a3, 1
; CHECK-NEXT: add a0, a0, a3
diff --git a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
index 9d0234d2ec2fbc..b7fe722958bfb8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
@@ -33,13 +33,13 @@ define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-NEXT: vmsle.vi v0, v8, -1
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: addi a1, a0, -1
-; CHECK-NEXT: vmv.v.x v10, a1
-; CHECK-NEXT: vsll.vv v11, v8, v9
-; CHECK-NEXT: vsra.vv v9, v11, v9
+; CHECK-NEXT: vsll.vv v10, v8, v9
+; CHECK-NEXT: vsra.vv v9, v10, v9
; CHECK-NEXT: vmsne.vv v8, v8, v9
-; CHECK-NEXT: vmerge.vxm v9, v10, a0, v0
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0
; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v11, v9, v0
+; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0
; CHECK-NEXT: ret
%tmp = call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
ret <4 x i32> %tmp
@@ -52,13 +52,13 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
; CHECK-NEXT: vmsle.vi v0, v8, -1
; CHECK-NEXT: lui a0, 8
; CHECK-NEXT: addi a1, a0, -1
-; CHECK-NEXT: vmv.v.x v10, a1
-; CHECK-NEXT: vsll.vv v11, v8, v9
-; CHECK-NEXT: vsra.vv v9, v11, v9
+; CHECK-NEXT: vsll.vv v10, v8, v9
+; CHECK-NEXT: vsra.vv v9, v10, v9
; CHECK-NEXT: vmsne.vv v8, v8, v9
-; CHECK-NEXT: vmerge.vxm v9, v10, a0, v0
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0
; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v11, v9, v0
+; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0
; CHECK-NEXT: ret
%tmp = call <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
ret <8 x i16> %tmp
@@ -70,14 +70,14 @@ define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vmsle.vi v0, v8, -1
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: li a0, 128
-; CHECK-NEXT: vsll.vv v11, v8, v9
-; CHECK-NEXT: vsra.vv v9, v11, v9
+; CHECK-NEXT: vsll.vv v10, v8, v9
+; CHECK-NEXT: vsra.vv v9, v10, v9
; CHECK-NEXT: vmsne.vv v8, v8, v9
-; CHECK-NEXT: vmerge.vxm v9, v10, a0, v0
+; CHECK-NEXT: vmv.v.x v9, a0
+; CHECK-NEXT: li a0, 128
+; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0
; CHECK-NEXT: vmv.v.v v0, v8
-; CHECK-NEXT: vmerge.vvm v8, v11, v9, v0
+; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0
; CHECK-NEXT: ret
%tmp = call <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
ret <16 x i8> %tmp
@@ -115,13 +115,13 @@ define <vscale x 4 x i32> @vec_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32>
; CHECK-NEXT: vmsle.vi v0, v8, -1
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: addi a1, a0, -1
-; CHECK-NEXT: vmv.v.x v12, a1
-; CHECK-NEXT: vsll.vv v14, v8, v10
-; CHECK-NEXT: vsra.vv v16, v14, v10
-; CHECK-NEXT: vmsne.vv v10, v8, v16
-; CHECK-NEXT: vmerge.vxm v8, v12, a0, v0
+; CHECK-NEXT: vsll.vv v12, v8, v10
+; CHECK-NEXT: vsra.vv v14, v12, v10
+; CHECK-NEXT: vmsne.vv v10, v8, v14
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vmerge.vvm v8, v14, v8, v0
+; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
; CHECK-NEXT: ret
%tmp = call <vscale x 4 x i32> @llvm.sshl.sat.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y)
ret <vscale x 4 x i32> %tmp
@@ -134,13 +134,13 @@ define <vscale x 8 x i16> @vec_nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16>
; CHECK-NEXT: vmsle.vi v0, v8, -1
; CHECK-NEXT: lui a0, 8
; CHECK-NEXT: addi a1, a0, -1
-; CHECK-NEXT: vmv.v.x v12, a1
-; CHECK-NEXT: vsll.vv v14, v8, v10
-; CHECK-NEXT: vsra.vv v16, v14, v10
-; CHECK-NEXT: vmsne.vv v10, v8, v16
-; CHECK-NEXT: vmerge.vxm v8, v12, a0, v0
+; CHECK-NEXT: vsll.vv v12, v8, v10
+; CHECK-NEXT: vsra.vv v14, v12, v10
+; CHECK-NEXT: vmsne.vv v10, v8, v14
+; CHECK-NEXT: vmv.v.x v8, a1
+; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vmerge.vvm v8, v14, v8, v0
+; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
; CHECK-NEXT: ret
%tmp = call <vscale x 8 x i16> @llvm.sshl.sat.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y)
ret <vscale x 8 x i16> %tmp
@@ -152,14 +152,14 @@ define <vscale x 16 x i8> @vec_nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8>
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmsle.vi v0, v8, -1
; CHECK-NEXT: li a0, 127
-; CHECK-NEXT: vmv.v.x v12, a0
+; CHECK-NEXT: vsll.vv v12, v8, v10
+; CHECK-NEXT: vsra.vv v14, v12, v10
+; CHECK-NEXT: vmsne.vv v10, v8, v14
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: li a0, 128
-; CHECK-NEXT: vsll.vv v14, v8, v10
-; CHECK-NEXT: vsra.vv v16, v14, v10
-; CHECK-NEXT: vmsne.vv v10, v8, v16
-; CHECK-NEXT: vmerge.vxm v8, v12, a0, v0
+; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vmerge.vvm v8, v14, v8, v0
+; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
; CHECK-NEXT: ret
%tmp = call <vscale x 16 x i8> @llvm.sshl.sat.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y)
ret <vscale x 16 x i8> %tmp
diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
index 0e76518f67e16a..8dee496b6b00bb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
@@ -765,9 +765,9 @@ declare <vscale x 16 x double> @llvm.experimental.vp.strided.load.nxv16f64.p0.i6
define <vscale x 16 x double> @strided_load_nxv17f64(ptr %ptr, i64 %stride, <vscale x 17 x i1> %mask, i32 zeroext %evl, ptr %hi_ptr) {
; CHECK-RV32-LABEL: strided_load_nxv17f64:
; CHECK-RV32: # %bb.0:
+; CHECK-RV32-NEXT: vmv1r.v v8, v0
; CHECK-RV32-NEXT: csrr a2, vlenb
; CHECK-RV32-NEXT: slli a7, a2, 1
-; CHECK-RV32-NEXT: vmv1r.v v8, v0
; CHECK-RV32-NEXT: mv a6, a3
; CHECK-RV32-NEXT: bltu a3, a7, .LBB57_2
; CHECK-RV32-NEXT: # %bb.1:
@@ -812,9 +812,9 @@ define <vscale x 16 x double> @strided_load_nxv17f64(ptr %ptr, i64 %stride, <vsc
;
; CHECK-RV64-LABEL: strided_load_nxv17f64:
; CHECK-RV64: # %bb.0:
+; CHECK-RV64-NEXT: vmv1r.v v8, v0
; CHECK-RV64-NEXT: csrr a4, vlenb
; CHECK-RV64-NEXT: slli a7, a4, 1
-; CHECK-RV64-NEXT: vmv1r.v v8, v0
; CHECK-RV64-NEXT: mv a6, a2
; CHECK-RV64-NEXT: bltu a2, a7, .LBB57_2
; CHECK-RV64-NEXT: # %bb.1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll
index 7f0ef443eeffc4..45eabec070ed16 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll
@@ -582,19 +582,19 @@ define void @strided_store_nxv16f64(<vscale x 16 x double> %v, ptr %ptr, i32 sig
define void @strided_store_nxv16f64_allones_mask(<vscale x 16 x double> %v, ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
; CHECK-LABEL: strided_store_nxv16f64_allones_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: bltu a2, a3, .LBB47_2
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: bltu a2, a4, .LBB47_2
; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: mv a3, a4
; CHECK-NEXT: .LBB47_2:
-; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
+; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; CHECK-NEXT: vsse64.v v8, (a0), a1
-; CHECK-NEXT: sub a3, a2, a3
-; CHECK-NEXT: sltu a2, a2, a3
+; CHECK-NEXT: sub a4, a2, a4
+; CHECK-NEXT: sltu a2, a2, a4
; CHECK-NEXT: addi a2, a2, -1
-; CHECK-NEXT: and a2, a2, a3
-; CHECK-NEXT: mul a3, a4, a1
+; CHECK-NEXT: and a2, a2, a4
+; CHECK-NEXT: mul a3, a3, a1
; CHECK-NEXT: add a0, a0, a3
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vsse64.v v16, (a0), a1
@@ -609,9 +609,17 @@ declare void @llvm.experimental.vp.strided.store.nxv16f64.p0.i32(<vscale x 16 x
define void @strided_store_nxv17f64(<vscale x 17 x double> %v, ptr %ptr, i32 signext %stride, <vscale x 17 x i1> %mask, i32 zeroext %evl) {
; CHECK-LABEL: strided_store_nxv17f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a4, vlenb
-; CHECK-NEXT: slli a6, a4, 1
+; CHECK-NEXT: slli a4, a4, 3
+; CHECK-NEXT: sub sp, sp, a4
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: addi a4, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: slli a6, a4, 1
; CHECK-NEXT: mv a5, a3
; CHECK-NEXT: bltu a3, a6, .LBB48_2
; CHECK-NEXT: # %bb.1:
@@ -622,16 +630,8 @@ define void @strided_store_nxv17f64(<vscale x 17 x double> %v, ptr %ptr, i32 sig
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a7, a4
; CHECK-NEXT: .LBB48_4:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr t0, vlenb
-; CHECK-NEXT: slli t0, t0, 3
-; CHECK-NEXT: sub sp, sp, t0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: vl8re64.v v0, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vl8re64.v v16, (a0)
; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma
; CHECK-NEXT: vsse64.v v8, (a1), a2, v0.t
; CHECK-NEXT: sub a0, a5, a4
@@ -647,8 +647,10 @@ define void @strided_store_nxv17f64(<vscale x 17 x double> %v, ptr %ptr, i32 sig
; CHECK-NEXT: sltu a3, a3, a0
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a0, a3, a0
+; CHECK-NEXT: addi a3, sp, 16
+; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma
-; CHECK-NEXT: vsse64.v v16, (a7), a2, v0.t
+; CHECK-NEXT: vsse64.v v8, (a7), a2, v0.t
; CHECK-NEXT: bltu a0, a4, .LBB48_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: mv a0, a4
@@ -658,10 +660,8 @@ define void @strided_store_nxv17f64(<vscale x 17 x double> %v, ptr %ptr, i32 sig
; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v24, a4
; CHECK-NEXT: add a1, a1, a3
-; CHECK-NEXT: addi a3, sp, 16
-; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vsse64.v v8, (a1), a2, v0.t
+; CHECK-NEXT: vsse64.v v16, (a1), a2, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
index 201cfaa931b417..b60f5add52e6c1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
@@ -109,40 +109,24 @@ declare {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave2.nxv4
define {<vscale x 64 x i1>, <vscale x 64 x i1>} @vector_deinterleave_nxv64i1_nxv128i1(<vscale x 128 x i1> %vec) {
; CHECK-LABEL: vector_deinterleave_nxv64i1_nxv128i1:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: vmv1r.v v12, v8
; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; CHECK-NEXT: vmv.v.i v24, 0
-; CHECK-NEXT: vmerge.vim v16, v24, 1, v0
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv.v.i v16, 0
+; CHECK-NEXT: vmerge.vim v16, v16, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v16, 0
-; CHECK-NEXT: vmv1r.v v0, v12
+; CHECK-NEXT: vnsrl.wi v24, v16, 0
; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; CHECK-NEXT: vmerge.vim v24, v24, 1, v0
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v24, 0
+; CHECK-NEXT: vnsrl.wi v28, v8, 0
; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmsne.vi v0, v24, 0
; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v16, v8, 8
-; CHECK-NEXT: vnsrl.wi v20, v24, 8
+; CHECK-NEXT: vnsrl.wi v24, v16, 8
+; CHECK-NEXT: vnsrl.wi v28, v8, 8
; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
-; CHECK-NEXT: vmsne.vi v8, v16, 0
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: vmsne.vi v8, v24, 0
; CHECK-NEXT: ret
%retval = call {<vscale x 64 x i1>, <vscale x 64 x i1>} @llvm.vector.deinterleave2.nxv128i1(<vscale x 128 x i1> %vec)
ret {<vscale x 64 x i1>, <vscale x 64 x i1>} %retval
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
index e730ae230d5a0d..a170511937ec68 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
@@ -9,9 +9,9 @@
define <32 x i1> @vector_interleave_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b) {
; CHECK-LABEL: vector_interleave_v32i1_v16i1:
; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v0, v8, 2
-; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
@@ -27,9 +27,9 @@ define <32 x i1> @vector_interleave_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b) {
;
; ZVBB-LABEL: vector_interleave_v32i1_v16i1:
; ZVBB: # %bb.0:
+; ZVBB-NEXT: li a0, 32
; ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVBB-NEXT: vslideup.vi v0, v8, 2
-; ZVBB-NEXT: li a0, 32
; ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; ZVBB-NEXT: vmv.v.i v8, 0
; ZVBB-NEXT: vmerge.vim v8, v8, 1, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
index 16ce25f86462e3..9812673e231c8b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
@@ -10,9 +10,9 @@ define void @vector_interleave_store_nxv32i1_nxv16i1(<vscale x 16 x i1> %a, <vsc
; CHECK-LABEL: vector_interleave_store_nxv32i1_nxv16i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v12, v10, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index 4d14d0013236f2..b13756e45506af 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -12,9 +12,9 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
; CHECK-LABEL: vector_interleave_nxv32i1_nxv16i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v12, v10, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
@@ -33,9 +33,9 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
; ZVBB-LABEL: vector_interleave_nxv32i1_nxv16i1:
; ZVBB: # %bb.0:
; ZVBB-NEXT: vmv1r.v v9, v0
+; ZVBB-NEXT: vmv1r.v v0, v8
; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, mu
; ZVBB-NEXT: vmv.v.i v10, 0
-; ZVBB-NEXT: vmv1r.v v0, v8
; ZVBB-NEXT: vmerge.vim v10, v10, 1, v0
; ZVBB-NEXT: vwsll.vi v12, v10, 8
; ZVBB-NEXT: li a0, 1
@@ -161,9 +161,9 @@ define <vscale x 128 x i1> @vector_interleave_nxv128i1_nxv64i1(<vscale x 64 x i1
; CHECK-LABEL: vector_interleave_nxv128i1_nxv64i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; CHECK-NEXT: vmv.v.i v24, 0
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v16, v24, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vim v8, v24, 1, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll
index 5460caea196cf8..d91854a84fe8f6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll
@@ -12,18 +12,18 @@ define <vscale x 1 x i1> @splice_nxv1i1_offset_negone(<vscale x 1 x i1> %a, <vsc
; CHECK-LABEL: splice_nxv1i1_offset_negone:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v10, v8, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vmerge.vim v9, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vslidedown.vx v9, v9, a0
-; CHECK-NEXT: vslideup.vi v9, v8, 1
-; CHECK-NEXT: vand.vi v8, v9, 1
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vslideup.vi v8, v10, 1
+; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%res = call <vscale x 1 x i1> @llvm.vector.splice.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, i32 -1)
@@ -34,20 +34,20 @@ define <vscale x 1 x i1> @splice_nxv1i1_offset_max(<vscale x 1 x i1> %a, <vscale
; CHECK-LABEL: splice_nxv1i1_offset_max:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v10, v8, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vmerge.vim v9, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v9, 1
+; CHECK-NEXT: vslidedown.vi v8, v8, 1
; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vslideup.vx v9, v8, a0
-; CHECK-NEXT: vand.vi v8, v9, 1
+; CHECK-NEXT: vslideup.vx v8, v10, a0
+; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%res = call <vscale x 1 x i1> @llvm.vector.splice.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, i32 1)
@@ -60,18 +60,18 @@ define <vscale x 2 x i1> @splice_nxv2i1_offset_negone(<vscale x 2 x i1> %a, <vsc
; CHECK-LABEL: splice_nxv2i1_offset_negone:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v10, v8, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vmerge.vim v9, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vslidedown.vx v9, v9, a0
-; CHECK-NEXT: vslideup.vi v9, v8, 1
-; CHECK-NEXT: vand.vi v8, v9, 1
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vslideup.vi v8, v10, 1
+; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%res = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i32 -1)
@@ -82,20 +82,20 @@ define <vscale x 2 x i1> @splice_nxv2i1_offset_max(<vscale x 2 x i1> %a, <vscale
; CHECK-LABEL: splice_nxv2i1_offset_max:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v10, v8, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vmerge.vim v9, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: addi a0, a0, -3
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v9, 3
+; CHECK-NEXT: vslidedown.vi v8, v8, 3
; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vslideup.vx v9, v8, a0
-; CHECK-NEXT: vand.vi v8, v9, 1
+; CHECK-NEXT: vslideup.vx v8, v10, a0
+; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%res = call <vscale x 2 x i1> @llvm.vector.splice.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i32 3)
@@ -108,18 +108,18 @@ define <vscale x 4 x i1> @splice_nxv4i1_offset_negone(<vscale x 4 x i1> %a, <vsc
; CHECK-LABEL: splice_nxv4i1_offset_negone:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v10, v8, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vmerge.vim v9, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vslidedown.vx v9, v9, a0
-; CHECK-NEXT: vslideup.vi v9, v8, 1
-; CHECK-NEXT: vand.vi v8, v9, 1
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vslideup.vi v8, v10, 1
+; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%res = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i32 -1)
@@ -130,20 +130,20 @@ define <vscale x 4 x i1> @splice_nxv4i1_offset_max(<vscale x 4 x i1> %a, <vscale
; CHECK-LABEL: splice_nxv4i1_offset_max:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v10, v8, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vmerge.vim v9, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: addi a0, a0, -7
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v9, 7
+; CHECK-NEXT: vslidedown.vi v8, v8, 7
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v9, v8, a0
-; CHECK-NEXT: vand.vi v8, v9, 1
+; CHECK-NEXT: vslideup.vx v8, v10, a0
+; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%res = call <vscale x 4 x i1> @llvm.vector.splice.nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i32 7)
@@ -156,17 +156,17 @@ define <vscale x 8 x i1> @splice_nxv8i1_offset_negone(<vscale x 8 x i1> %a, <vsc
; CHECK-LABEL: splice_nxv8i1_offset_negone:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v10, v8, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vmerge.vim v9, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vslidedown.vx v9, v9, a0
-; CHECK-NEXT: vslideup.vi v9, v8, 1
-; CHECK-NEXT: vand.vi v8, v9, 1
+; CHECK-NEXT: vslidedown.vx v8, v8, a0
+; CHECK-NEXT: vslideup.vi v8, v10, 1
+; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%res = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i32 -1)
@@ -177,19 +177,19 @@ define <vscale x 8 x i1> @splice_nxv8i1_offset_max(<vscale x 8 x i1> %a, <vscale
; CHECK-LABEL: splice_nxv8i1_offset_max:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v10, v8, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vmerge.vim v9, v10, 1, v0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: addi a0, a0, -15
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v9, 15
+; CHECK-NEXT: vslidedown.vi v8, v8, 15
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v9, v8, a0
-; CHECK-NEXT: vand.vi v8, v9, 1
+; CHECK-NEXT: vslideup.vx v8, v10, a0
+; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%res = call <vscale x 8 x i1> @llvm.vector.splice.nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i32 15)
@@ -202,9 +202,9 @@ define <vscale x 16 x i1> @splice_nxv16i1_offset_negone(<vscale x 16 x i1> %a, <
; CHECK-LABEL: splice_nxv16i1_offset_negone:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v12, v10, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
@@ -224,9 +224,9 @@ define <vscale x 16 x i1> @splice_nxv16i1_offset_max(<vscale x 16 x i1> %a, <vsc
; CHECK-LABEL: splice_nxv16i1_offset_max:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v12, v10, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
@@ -250,9 +250,9 @@ define <vscale x 32 x i1> @splice_nxv32i1_offset_negone(<vscale x 32 x i1> %a, <
; CHECK-LABEL: splice_nxv32i1_offset_negone:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
; CHECK-NEXT: vmv.v.i v12, 0
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v16, v12, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vim v8, v12, 1, v0
@@ -297,9 +297,9 @@ define <vscale x 64 x i1> @splice_nxv64i1_offset_negone(<vscale x 64 x i1> %a, <
; CHECK-LABEL: splice_nxv64i1_offset_negone:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
; CHECK-NEXT: vmv.v.i v24, 0
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v16, v24, 1, v0
; CHECK-NEXT: vmv1r.v v0, v9
; CHECK-NEXT: vmerge.vim v8, v24, 1, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfirst-byte-compare-index.ll b/llvm/test/CodeGen/RISCV/rvv/vfirst-byte-compare-index.ll
index 3107d4e044cae1..61df5336883c81 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfirst-byte-compare-index.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfirst-byte-compare-index.ll
@@ -6,35 +6,35 @@
define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 signext %len, i32 signext %n) {
; CHECK-LABEL: compare_bytes_simple:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addiw a4, a2, 1
-; CHECK-NEXT: bltu a3, a4, .LBB0_7
+; CHECK-NEXT: addiw a5, a2, 1
+; CHECK-NEXT: bltu a3, a5, .LBB0_7
; CHECK-NEXT: # %bb.1: # %mismatch_mem_check
-; CHECK-NEXT: slli a2, a4, 32
+; CHECK-NEXT: slli a2, a5, 32
; CHECK-NEXT: srli a2, a2, 32
-; CHECK-NEXT: slli a5, a3, 32
-; CHECK-NEXT: srli a5, a5, 32
+; CHECK-NEXT: slli a4, a3, 32
+; CHECK-NEXT: srli a4, a4, 32
; CHECK-NEXT: add a6, a0, a2
-; CHECK-NEXT: add a7, a0, a5
+; CHECK-NEXT: add a7, a0, a4
; CHECK-NEXT: srli a6, a6, 12
; CHECK-NEXT: srli a7, a7, 12
; CHECK-NEXT: bne a6, a7, .LBB0_7
; CHECK-NEXT: # %bb.2: # %mismatch_mem_check
; CHECK-NEXT: add a6, a1, a2
-; CHECK-NEXT: add a7, a1, a5
+; CHECK-NEXT: add a7, a1, a4
; CHECK-NEXT: srli a6, a6, 12
; CHECK-NEXT: srli a7, a7, 12
; CHECK-NEXT: bne a6, a7, .LBB0_7
; CHECK-NEXT: .LBB0_3: # %mismatch_vec_loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: sub a4, a5, a2
-; CHECK-NEXT: vsetvli a4, a4, e8, m2, ta, ma
+; CHECK-NEXT: sub a5, a4, a2
+; CHECK-NEXT: vsetvli a5, a5, e8, m2, ta, ma
; CHECK-NEXT: add a6, a0, a2
; CHECK-NEXT: vle8.v v8, (a6)
; CHECK-NEXT: add a6, a1, a2
; CHECK-NEXT: vle8.v v10, (a6)
; CHECK-NEXT: vmsne.vv v12, v8, v10
; CHECK-NEXT: vfirst.m a7, v12
-; CHECK-NEXT: mv a6, a4
+; CHECK-NEXT: mv a6, a5
; CHECK-NEXT: bltz a7, .LBB0_5
; CHECK-NEXT: # %bb.4: # %mismatch_vec_loop
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
@@ -42,30 +42,30 @@ define i32 @compare_bytes_simple(ptr %a, ptr %b, i32 signext %len, i32 signext %
; CHECK-NEXT: .LBB0_5: # %mismatch_vec_loop
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: sext.w a7, a6
-; CHECK-NEXT: bne a7, a4, .LBB0_11
+; CHECK-NEXT: bne a7, a5, .LBB0_11
; CHECK-NEXT: # %bb.6: # %mismatch_vec_loop_inc
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1
-; CHECK-NEXT: add a2, a2, a4
-; CHECK-NEXT: bne a2, a5, .LBB0_3
+; CHECK-NEXT: add a2, a2, a5
+; CHECK-NEXT: bne a2, a4, .LBB0_3
; CHECK-NEXT: j .LBB0_9
; CHECK-NEXT: .LBB0_7: # %mismatch_loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: slli a2, a4, 32
+; CHECK-NEXT: slli a2, a5, 32
; CHECK-NEXT: srli a2, a2, 32
-; CHECK-NEXT: add a5, a0, a2
-; CHECK-NEXT: lbu a5, 0(a5)
+; CHECK-NEXT: add a4, a0, a2
+; CHECK-NEXT: lbu a4, 0(a4)
; CHECK-NEXT: add a2, a1, a2
; CHECK-NEXT: lbu a2, 0(a2)
-; CHECK-NEXT: bne a5, a2, .LBB0_10
+; CHECK-NEXT: bne a4, a2, .LBB0_10
; CHECK-NEXT: # %bb.8: # %mismatch_loop_inc
; CHECK-NEXT: # in Loop: Header=BB0_7 Depth=1
-; CHECK-NEXT: addiw a4, a4, 1
-; CHECK-NEXT: bne a3, a4, .LBB0_7
+; CHECK-NEXT: addiw a5, a5, 1
+; CHECK-NEXT: bne a3, a5, .LBB0_7
; CHECK-NEXT: .LBB0_9: # %while.end
; CHECK-NEXT: mv a0, a3
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_10:
-; CHECK-NEXT: mv a0, a4
+; CHECK-NEXT: mv a0, a5
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB0_11: # %mismatch_vec_loop_found
; CHECK-NEXT: slli a6, a6, 32
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
index 83f59f973d4659..35f507602284ee 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll
@@ -8715,96 +8715,83 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: mv a3, a2
; ZVFHMIN-NEXT: slli a2, a2, 3
-; ZVFHMIN-NEXT: add a3, a3, a2
+; ZVFHMIN-NEXT: mv a3, a2
; ZVFHMIN-NEXT: slli a2, a2, 2
; ZVFHMIN-NEXT: add a2, a2, a3
; ZVFHMIN-NEXT: sub sp, sp, a2
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x29, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 41 * vlenb
-; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a2, a0, 5
-; ZVFHMIN-NEXT: add a0, a2, a0
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: slli a2, a2, 4
+; ZVFHMIN-NEXT: slli a2, a2, 5
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vl8re16.v v16, (a0)
+; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v16, v24, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a0, v0.t
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a2, a0, 1
-; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: mv a4, a3
-; ZVFHMIN-NEXT: slli a3, a3, 3
-; ZVFHMIN-NEXT: add a4, a4, a3
-; ZVFHMIN-NEXT: slli a3, a3, 1
-; ZVFHMIN-NEXT: add a3, a3, a4
-; ZVFHMIN-NEXT: add a3, sp, a3
-; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: slli a4, a3, 4
-; ZVFHMIN-NEXT: add a3, a4, a3
-; ZVFHMIN-NEXT: add a3, sp, a3
-; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: mv a3, a1
+; ZVFHMIN-NEXT: vmv4r.v v4, v20
+; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT: csrr a4, vlenb
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
+; ZVFHMIN-NEXT: add a4, sp, a4
+; ZVFHMIN-NEXT: addi a4, a4, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: bltu a1, a2, .LBB280_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
; ZVFHMIN-NEXT: .LBB280_2:
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vmv8r.v v16, v8
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 3
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: slli a5, a4, 5
-; ZVFHMIN-NEXT: add a4, a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 4
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
+; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: slli a4, a4, 4
+; ZVFHMIN-NEXT: slli a4, a4, 5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: slli a5, a4, 4
-; ZVFHMIN-NEXT: add a4, a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: csrr a4, vlenb
+; ZVFHMIN-NEXT: slli a4, a4, 4
+; ZVFHMIN-NEXT: add a4, sp, a4
+; ZVFHMIN-NEXT: addi a4, a4, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t
; ZVFHMIN-NEXT: addi a3, sp, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4
; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: mv a4, a3
; ZVFHMIN-NEXT: slli a3, a3, 3
-; ZVFHMIN-NEXT: add a4, a4, a3
+; ZVFHMIN-NEXT: mv a4, a3
; ZVFHMIN-NEXT: slli a3, a3, 1
; ZVFHMIN-NEXT: add a3, a3, a4
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
-; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: slli a4, a3, 4
-; ZVFHMIN-NEXT: add a3, a4, a3
-; ZVFHMIN-NEXT: add a3, sp, a3
-; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: sub a2, a1, a2
; ZVFHMIN-NEXT: sltu a1, a1, a2
@@ -8821,29 +8808,28 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: mv a2, a0
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: add a2, a2, a0
-; ZVFHMIN-NEXT: slli a0, a0, 1
-; ZVFHMIN-NEXT: add a0, a0, a2
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a2, a0, 4
-; ZVFHMIN-NEXT: add a0, a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: mv a2, a0
; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: add a2, a2, a0
+; ZVFHMIN-NEXT: mv a2, a0
; ZVFHMIN-NEXT: slli a0, a0, 1
; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t
@@ -8853,9 +8839,8 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: mv a1, a0
; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: add a1, a1, a0
+; ZVFHMIN-NEXT: mv a1, a0
; ZVFHMIN-NEXT: slli a0, a0, 2
; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add sp, sp, a0
@@ -8881,23 +8866,14 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: slli a2, a2, 3
-; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: slli a2, a2, 2
-; ZVFHMIN-NEXT: add a2, a2, a3
+; ZVFHMIN-NEXT: slli a2, a2, 5
; ZVFHMIN-NEXT: sub sp, sp, a2
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; ZVFHMIN-NEXT: vxor.vx v0, v24, a0
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: mv a2, a0
-; ZVFHMIN-NEXT: slli a0, a0, 1
-; ZVFHMIN-NEXT: add a0, a0, a2
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli a0, zero, e8, m4, ta, ma
; ZVFHMIN-NEXT: vmset.m v24
@@ -8913,81 +8889,74 @@ define <vscale x 32 x half> @vfmsub_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: slli a2, a2, 4
+; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: slli a2, a2, 5
+; ZVFHMIN-NEXT: slli a2, a2, 4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vmv4r.v v8, v16
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a4, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vmv8r.v v8, v16
-; ZVFHMIN-NEXT: addi a2, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: slli a2, a2, 4
-; ZVFHMIN-NEXT: add a2, sp, a2
-; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8
+; ZVFHMIN-NEXT: addi a2, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
; ZVFHMIN-NEXT: csrr a2, vlenb
; ZVFHMIN-NEXT: slli a2, a2, 3
-; ZVFHMIN-NEXT: mv a3, a2
-; ZVFHMIN-NEXT: slli a2, a2, 1
-; ZVFHMIN-NEXT: add a2, a2, a3
-; ZVFHMIN-NEXT: add a2, sp, a2
-; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: slli a2, a2, 4
; ZVFHMIN-NEXT: add a2, sp, a2
; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: bltu a1, a0, .LBB281_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a1, a0
; ZVFHMIN-NEXT: .LBB281_2:
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 5
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a2, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a2
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16
+; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v0
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v24
-; ZVFHMIN-NEXT: vmv8r.v v8, v0
+; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24
+; ZVFHMIN-NEXT: vmv8r.v v8, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: mv a1, a0
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: add a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
; ZVFHMIN-NEXT: addi sp, sp, 16
@@ -9029,19 +8998,19 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16(<vscale x 32 x half> %va, half %
; ZVFHMIN-NEXT: vxor.vx v16, v16, a1, v0.t
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a2, a1, 1
-; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: slli a3, a3, 4
-; ZVFHMIN-NEXT: add a3, sp, a3
-; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: slli a3, a3, 3
-; ZVFHMIN-NEXT: add a3, sp, a3
-; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: mv a3, a0
+; ZVFHMIN-NEXT: csrr a4, vlenb
+; ZVFHMIN-NEXT: slli a4, a4, 4
+; ZVFHMIN-NEXT: add a4, sp, a4
+; ZVFHMIN-NEXT: addi a4, a4, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT: csrr a4, vlenb
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: add a4, sp, a4
+; ZVFHMIN-NEXT: addi a4, a4, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: bltu a0, a2, .LBB282_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
@@ -9163,24 +9132,24 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_commute(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vxor.vx v16, v16, a1, v0.t
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a2, a1, 1
-; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: slli a3, a3, 4
-; ZVFHMIN-NEXT: add a3, sp, a3
-; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: slli a3, a3, 3
-; ZVFHMIN-NEXT: add a3, sp, a3
-; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: mv a3, a0
+; ZVFHMIN-NEXT: csrr a4, vlenb
+; ZVFHMIN-NEXT: slli a4, a4, 4
+; ZVFHMIN-NEXT: add a4, sp, a4
+; ZVFHMIN-NEXT: addi a4, a4, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT: csrr a4, vlenb
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: add a4, sp, a4
+; ZVFHMIN-NEXT: addi a4, a4, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: bltu a0, a2, .LBB283_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
; ZVFHMIN-NEXT: .LBB283_2:
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: vmv4r.v v4, v12
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 3
@@ -9189,29 +9158,29 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_commute(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 3
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t
; ZVFHMIN-NEXT: addi a3, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 4
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 3
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: sub a2, a0, a2
; ZVFHMIN-NEXT: sltu a0, a0, a2
; ZVFHMIN-NEXT: addi a0, a0, -1
@@ -9220,7 +9189,7 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_commute(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 3
; ZVFHMIN-NEXT: mv a2, a1
@@ -9229,12 +9198,12 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_commute(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 3
; ZVFHMIN-NEXT: add a1, sp, a1
@@ -9244,14 +9213,14 @@ define <vscale x 32 x half> @vfmsub_vf_nxv32f16_commute(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t
; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: add sp, sp, a0
@@ -9533,36 +9502,37 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: slli a2, a2, 5
; ZVFHMIN-NEXT: sub sp, sp, a2
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; ZVFHMIN-NEXT: csrr a2, vlenb
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a3, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a3
+; ZVFHMIN-NEXT: add a2, sp, a2
+; ZVFHMIN-NEXT: addi a2, a2, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: mv a2, a0
-; ZVFHMIN-NEXT: slli a0, a0, 1
-; ZVFHMIN-NEXT: add a0, a0, a2
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v16, a0, v0.t
-; ZVFHMIN-NEXT: vxor.vx v24, v24, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v8, v24, a0, v0.t
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a2, a0, 1
; ZVFHMIN-NEXT: mv a3, a1
-; ZVFHMIN-NEXT: vmv4r.v v4, v12
+; ZVFHMIN-NEXT: vmv4r.v v4, v20
; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT: vmv8r.v v16, v24
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 4
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 3
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: bltu a1, a2, .LBB286_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
@@ -9586,18 +9556,18 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: addi a3, sp, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 4
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 3
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: sub a2, a1, a2
; ZVFHMIN-NEXT: sltu a1, a1, a2
; ZVFHMIN-NEXT: addi a1, a1, -1
@@ -9614,19 +9584,19 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t
; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: add sp, sp, a0
@@ -9659,12 +9629,12 @@ define <vscale x 32 x half> @vfnmadd_vv_nxv32f16_commuted(<vscale x 32 x half> %
; ZVFHMIN-NEXT: add a2, a2, a3
; ZVFHMIN-NEXT: sub sp, sp, a2
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
+; ZVFHMIN-NEXT: csrr a2, vlenb
+; ZVFHMIN-NEXT: slli a2, a2, 5
+; ZVFHMIN-NEXT: add a2, sp, a2
+; ZVFHMIN-NEXT: addi a2, a2, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 5
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v16, a0, v0.t
@@ -10052,17 +10022,17 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 3
-; ZVFHMIN-NEXT: mv a2, a1
-; ZVFHMIN-NEXT: slli a1, a1, 2
-; ZVFHMIN-NEXT: add a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 5
; ZVFHMIN-NEXT: sub sp, sp, a1
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 5
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 1
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
@@ -10078,68 +10048,57 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half
; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: slli a4, a4, 3
-; ZVFHMIN-NEXT: mv a5, a4
-; ZVFHMIN-NEXT: slli a4, a4, 1
-; ZVFHMIN-NEXT: add a4, a4, a5
+; ZVFHMIN-NEXT: slli a4, a4, 4
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vmv4r.v v4, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: slli a4, a4, 4
+; ZVFHMIN-NEXT: slli a4, a4, 3
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: bltu a0, a2, .LBB290_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
; ZVFHMIN-NEXT: .LBB290_2:
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: slli a4, a4, 5
-; ZVFHMIN-NEXT: add a4, sp, a4
-; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 3
; ZVFHMIN-NEXT: mv a5, a4
; ZVFHMIN-NEXT: slli a4, a4, 1
; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 4
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
+; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: csrr a4, vlenb
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: add a4, sp, a4
+; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 3
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: addi a3, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
-; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: slli a3, a3, 3
-; ZVFHMIN-NEXT: mv a4, a3
-; ZVFHMIN-NEXT: slli a3, a3, 1
-; ZVFHMIN-NEXT: add a3, a3, a4
-; ZVFHMIN-NEXT: add a3, sp, a3
-; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 4
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
; ZVFHMIN-NEXT: sub a2, a0, a2
; ZVFHMIN-NEXT: sltu a0, a0, a2
; ZVFHMIN-NEXT: addi a0, a0, -1
@@ -10148,13 +10107,6 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half
; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 5
-; ZVFHMIN-NEXT: add a1, sp, a1
-; ZVFHMIN-NEXT: addi a1, a1, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
-; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 3
; ZVFHMIN-NEXT: mv a2, a1
; ZVFHMIN-NEXT: slli a1, a1, 1
@@ -10162,11 +10114,13 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t
; ZVFHMIN-NEXT: csrr a0, vlenb
@@ -10178,10 +10132,7 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16(<vscale x 32 x half> %va, half
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: mv a1, a0
-; ZVFHMIN-NEXT: slli a0, a0, 2
-; ZVFHMIN-NEXT: add a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
; ZVFHMIN-NEXT: addi sp, sp, 16
@@ -10646,15 +10597,11 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
-; ZVFHMIN-NEXT: addi a4, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 3
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: addi a4, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t
; ZVFHMIN-NEXT: addi a3, sp, 16
@@ -10663,14 +10610,14 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: slli a3, a3, 4
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 3
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
; ZVFHMIN-NEXT: sub a2, a0, a2
; ZVFHMIN-NEXT: sltu a0, a0, a2
@@ -10690,10 +10637,20 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: csrr a1, vlenb
+; ZVFHMIN-NEXT: slli a1, a1, 4
+; ZVFHMIN-NEXT: add a1, sp, a1
+; ZVFHMIN-NEXT: addi a1, a1, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 3
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: csrr a1, vlenb
+; ZVFHMIN-NEXT: slli a1, a1, 4
+; ZVFHMIN-NEXT: add a1, sp, a1
+; ZVFHMIN-NEXT: addi a1, a1, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t
; ZVFHMIN-NEXT: addi a0, sp, 16
@@ -10777,24 +10734,24 @@ define <vscale x 32 x half> @vfnmadd_vf_nxv32f16_neg_splat_commute(<vscale x 32
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 4
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 3
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 3
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: addi a3, sp, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
@@ -11124,36 +11081,37 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: slli a2, a2, 5
; ZVFHMIN-NEXT: sub sp, sp, a2
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; ZVFHMIN-NEXT: csrr a2, vlenb
+; ZVFHMIN-NEXT: slli a2, a2, 3
+; ZVFHMIN-NEXT: mv a3, a2
+; ZVFHMIN-NEXT: slli a2, a2, 1
+; ZVFHMIN-NEXT: add a2, a2, a3
+; ZVFHMIN-NEXT: add a2, sp, a2
+; ZVFHMIN-NEXT: addi a2, a2, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: mv a2, a0
-; ZVFHMIN-NEXT: slli a0, a0, 1
-; ZVFHMIN-NEXT: add a0, a0, a2
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v16, a0, v0.t
-; ZVFHMIN-NEXT: vxor.vx v24, v24, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v16, v16, a0, v0.t
+; ZVFHMIN-NEXT: vxor.vx v8, v24, a0, v0.t
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a2, a0, 1
; ZVFHMIN-NEXT: mv a3, a1
-; ZVFHMIN-NEXT: vmv4r.v v4, v12
+; ZVFHMIN-NEXT: vmv4r.v v4, v20
; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
+; ZVFHMIN-NEXT: vmv8r.v v16, v24
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 4
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 3
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: bltu a1, a2, .LBB298_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
@@ -11177,18 +11135,18 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: addi a3, sp, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 4
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: csrr a3, vlenb
; ZVFHMIN-NEXT: slli a3, a3, 3
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: sub a2, a1, a2
; ZVFHMIN-NEXT: sltu a1, a1, a2
; ZVFHMIN-NEXT: addi a1, a1, -1
@@ -11205,19 +11163,19 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t
; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: add sp, sp, a0
@@ -11250,12 +11208,12 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16_commuted(<vscale x 32 x half> %
; ZVFHMIN-NEXT: add a2, a2, a3
; ZVFHMIN-NEXT: sub sp, sp, a2
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
+; ZVFHMIN-NEXT: csrr a2, vlenb
+; ZVFHMIN-NEXT: slli a2, a2, 5
+; ZVFHMIN-NEXT: add a2, sp, a2
+; ZVFHMIN-NEXT: addi a2, a2, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 5
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m8, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v16, a0, v0.t
@@ -11662,15 +11620,15 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16(<vscale x 32 x half> %va, half
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a2, a1, 1
+; ZVFHMIN-NEXT: mv a3, a0
; ZVFHMIN-NEXT: vmv4r.v v4, v12
-; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: slli a3, a3, 4
-; ZVFHMIN-NEXT: add a3, sp, a3
-; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: mv a3, a0
+; ZVFHMIN-NEXT: csrr a4, vlenb
+; ZVFHMIN-NEXT: slli a4, a4, 4
+; ZVFHMIN-NEXT: add a4, sp, a4
+; ZVFHMIN-NEXT: addi a4, a4, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: bltu a0, a2, .LBB302_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
@@ -11784,7 +11742,7 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 4
+; ZVFHMIN-NEXT: slli a1, a1, 3
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -11792,10 +11750,7 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 3
-; ZVFHMIN-NEXT: mv a2, a1
-; ZVFHMIN-NEXT: slli a1, a1, 1
-; ZVFHMIN-NEXT: add a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
@@ -11804,25 +11759,34 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a2, a1, 1
+; ZVFHMIN-NEXT: mv a3, a0
; ZVFHMIN-NEXT: vmv4r.v v4, v12
-; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
-; ZVFHMIN-NEXT: mv a3, a0
+; ZVFHMIN-NEXT: csrr a4, vlenb
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
+; ZVFHMIN-NEXT: add a4, sp, a4
+; ZVFHMIN-NEXT: addi a4, a4, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: bltu a0, a2, .LBB303_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
; ZVFHMIN-NEXT: .LBB303_2:
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: slli a4, a4, 4
+; ZVFHMIN-NEXT: slli a4, a4, 3
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: slli a4, a4, 4
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 3
; ZVFHMIN-NEXT: mv a5, a4
@@ -11830,19 +11794,19 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
-; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: slli a4, a4, 3
-; ZVFHMIN-NEXT: add a4, sp, a4
-; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t
-; ZVFHMIN-NEXT: addi a3, sp, 16
+; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t
+; ZVFHMIN-NEXT: csrr a3, vlenb
+; ZVFHMIN-NEXT: slli a3, a3, 3
+; ZVFHMIN-NEXT: mv a4, a3
+; ZVFHMIN-NEXT: slli a3, a3, 1
+; ZVFHMIN-NEXT: add a3, a3, a4
+; ZVFHMIN-NEXT: add a3, sp, a3
+; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
; ZVFHMIN-NEXT: sub a2, a0, a2
; ZVFHMIN-NEXT: sltu a0, a0, a2
; ZVFHMIN-NEXT: addi a0, a0, -1
@@ -11851,38 +11815,35 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_commute(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 4
+; ZVFHMIN-NEXT: slli a1, a1, 3
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 3
-; ZVFHMIN-NEXT: add a1, sp, a1
-; ZVFHMIN-NEXT: addi a1, a1, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: addi a1, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 3
-; ZVFHMIN-NEXT: mv a2, a1
-; ZVFHMIN-NEXT: slli a1, a1, 1
-; ZVFHMIN-NEXT: add a1, a1, a2
+; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 3
-; ZVFHMIN-NEXT: add a1, sp, a1
-; ZVFHMIN-NEXT: addi a1, a1, 16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t
+; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 1
+; ZVFHMIN-NEXT: add a0, a0, a1
+; ZVFHMIN-NEXT: add a0, sp, a0
+; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: add sp, sp, a0
@@ -12169,28 +12130,23 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vmv8r.v v16, v8
+; ZVFHMIN-NEXT: csrr a1, vlenb
+; ZVFHMIN-NEXT: slli a1, a1, 4
+; ZVFHMIN-NEXT: add a1, sp, a1
+; ZVFHMIN-NEXT: addi a1, a1, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: fmv.x.h a1, fa0
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
-; ZVFHMIN-NEXT: vmv.v.x v8, a1
+; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: lui a1, 8
-; ZVFHMIN-NEXT: csrr a2, vlenb
-; ZVFHMIN-NEXT: slli a2, a2, 4
-; ZVFHMIN-NEXT: add a2, sp, a2
-; ZVFHMIN-NEXT: addi a2, a2, 16
-; ZVFHMIN-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vxor.vx v16, v24, a1, v0.t
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a2, a1, 1
-; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: slli a3, a3, 3
-; ZVFHMIN-NEXT: add a3, sp, a3
-; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: mv a3, a0
+; ZVFHMIN-NEXT: vmv4r.v v4, v20
+; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: bltu a0, a2, .LBB306_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
@@ -12203,29 +12159,29 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
-; ZVFHMIN-NEXT: addi a4, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vmv8r.v v0, v16
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: csrr a4, vlenb
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: add a4, sp, a4
+; ZVFHMIN-NEXT: addi a4, a4, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a4, vlenb
; ZVFHMIN-NEXT: slli a4, a4, 4
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: addi a4, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: csrr a4, vlenb
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: add a4, sp, a4
+; ZVFHMIN-NEXT: addi a4, a4, 16
+; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v16, v0.t
; ZVFHMIN-NEXT: addi a3, sp, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: slli a3, a3, 3
-; ZVFHMIN-NEXT: add a3, sp, a3
-; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
; ZVFHMIN-NEXT: sub a2, a0, a2
; ZVFHMIN-NEXT: sltu a0, a0, a2
; ZVFHMIN-NEXT: addi a0, a0, -1
@@ -12242,25 +12198,30 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat(<vscale x 32 x half>
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 4
+; ZVFHMIN-NEXT: slli a1, a1, 3
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a1, a1, 4
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: csrr a1, vlenb
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: add a1, sp, a1
+; ZVFHMIN-NEXT: addi a1, a1, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t
; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
-; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 5
; ZVFHMIN-NEXT: add sp, sp, a0
@@ -12287,16 +12248,14 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a2, a1, 5
-; ZVFHMIN-NEXT: add a1, a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 3
+; ZVFHMIN-NEXT: mv a2, a1
+; ZVFHMIN-NEXT: slli a1, a1, 2
+; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: sub sp, sp, a1
-; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x21, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 33 * vlenb
+; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: mv a2, a1
; ZVFHMIN-NEXT: slli a1, a1, 3
-; ZVFHMIN-NEXT: add a2, a2, a1
-; ZVFHMIN-NEXT: slli a1, a1, 1
-; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -12304,77 +12263,78 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32
; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vmv.v.x v24, a1
; ZVFHMIN-NEXT: lui a1, 8
-; ZVFHMIN-NEXT: addi a2, sp, 16
-; ZVFHMIN-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFHMIN-NEXT: vxor.vx v16, v24, a1, v0.t
; ZVFHMIN-NEXT: csrr a1, vlenb
; ZVFHMIN-NEXT: slli a2, a1, 1
+; ZVFHMIN-NEXT: mv a3, a0
; ZVFHMIN-NEXT: vmv4r.v v28, v20
-; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: slli a4, a3, 4
-; ZVFHMIN-NEXT: add a3, a4, a3
-; ZVFHMIN-NEXT: add a3, sp, a3
-; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: csrr a4, vlenb
+; ZVFHMIN-NEXT: slli a4, a4, 5
+; ZVFHMIN-NEXT: add a4, sp, a4
+; ZVFHMIN-NEXT: addi a4, a4, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
-; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: slli a4, a3, 3
-; ZVFHMIN-NEXT: add a3, a4, a3
-; ZVFHMIN-NEXT: add a3, sp, a3
-; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: mv a3, a0
+; ZVFHMIN-NEXT: csrr a4, vlenb
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
+; ZVFHMIN-NEXT: add a4, sp, a4
+; ZVFHMIN-NEXT: addi a4, a4, 16
+; ZVFHMIN-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: bltu a0, a2, .LBB307_2
; ZVFHMIN-NEXT: # %bb.1:
; ZVFHMIN-NEXT: mv a3, a2
; ZVFHMIN-NEXT: .LBB307_2:
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: mv a5, a4
; ZVFHMIN-NEXT: slli a4, a4, 3
-; ZVFHMIN-NEXT: add a5, a5, a4
-; ZVFHMIN-NEXT: slli a4, a4, 1
-; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
; ZVFHMIN-NEXT: csrr a4, vlenb
+; ZVFHMIN-NEXT: slli a4, a4, 4
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vmv8r.v v0, v8
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0
+; ZVFHMIN-NEXT: vmv4r.v v4, v28
; ZVFHMIN-NEXT: addi a4, sp, 16
-; ZVFHMIN-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: csrr a4, vlenb
-; ZVFHMIN-NEXT: slli a5, a4, 3
-; ZVFHMIN-NEXT: add a4, a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 3
+; ZVFHMIN-NEXT: mv a5, a4
+; ZVFHMIN-NEXT: slli a4, a4, 1
+; ZVFHMIN-NEXT: add a4, a4, a5
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: csrr a4, vlenb
+; ZVFHMIN-NEXT: slli a4, a4, 4
; ZVFHMIN-NEXT: add a4, sp, a4
; ZVFHMIN-NEXT: addi a4, a4, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v24, v0.t
+; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t
; ZVFHMIN-NEXT: csrr a3, vlenb
+; ZVFHMIN-NEXT: slli a3, a3, 4
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: slli a4, a3, 4
-; ZVFHMIN-NEXT: add a3, a4, a3
+; ZVFHMIN-NEXT: slli a3, a3, 5
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli a3, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
; ZVFHMIN-NEXT: csrr a3, vlenb
-; ZVFHMIN-NEXT: slli a4, a3, 3
-; ZVFHMIN-NEXT: add a3, a4, a3
+; ZVFHMIN-NEXT: slli a3, a3, 3
+; ZVFHMIN-NEXT: mv a4, a3
+; ZVFHMIN-NEXT: slli a3, a3, 1
+; ZVFHMIN-NEXT: add a3, a3, a4
; ZVFHMIN-NEXT: add a3, sp, a3
; ZVFHMIN-NEXT: addi a3, a3, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
@@ -12385,27 +12345,23 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32
; ZVFHMIN-NEXT: srli a1, a1, 2
; ZVFHMIN-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT: addi a1, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: mv a2, a1
; ZVFHMIN-NEXT: slli a1, a1, 3
-; ZVFHMIN-NEXT: add a2, a2, a1
+; ZVFHMIN-NEXT: mv a2, a1
; ZVFHMIN-NEXT: slli a1, a1, 1
; ZVFHMIN-NEXT: add a1, a1, a2
; ZVFHMIN-NEXT: add a1, sp, a1
; ZVFHMIN-NEXT: addi a1, a1, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4
-; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a2, a1, 3
-; ZVFHMIN-NEXT: add a1, a2, a1
-; ZVFHMIN-NEXT: add a1, sp, a1
-; ZVFHMIN-NEXT: addi a1, a1, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24, v0.t
; ZVFHMIN-NEXT: csrr a0, vlenb
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
@@ -12413,8 +12369,10 @@ define <vscale x 32 x half> @vfnmsub_vf_nxv32f16_neg_splat_commute(<vscale x 32
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a1, a0, 5
-; ZVFHMIN-NEXT: add a0, a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: mv a1, a0
+; ZVFHMIN-NEXT: slli a0, a0, 2
+; ZVFHMIN-NEXT: add a0, a0, a1
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
; ZVFHMIN-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll
index 7c79706419fcad..d6261d7dcd808e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll
@@ -242,31 +242,30 @@ define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <
; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; CHECK-NEXT: vl8re16.v v0, (a0)
-; CHECK-NEXT: vmv8r.v v24, v16
+; CHECK-NEXT: vmv8r.v v0, v16
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv8r.v v16, v8
+; CHECK-NEXT: vl8re16.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv8r.v v16, v8
-; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v16
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v24
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vmv8r.v v8, v0
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vfwcvtbf16.f.f.v v0, v8
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
@@ -278,41 +277,31 @@ define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; CHECK-NEXT: vfmadd.vv v0, v16, v24
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfmadd.vv v0, v8, v24
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v20
+; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v20
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12
+; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v28
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vfmadd.vv v16, v8, v24
@@ -704,31 +693,30 @@ define <vscale x 32 x half> @vfmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: slli a1, a1, 5
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; ZVFHMIN-NEXT: vl8re16.v v0, (a0)
-; ZVFHMIN-NEXT: vmv8r.v v24, v16
+; ZVFHMIN-NEXT: vmv8r.v v0, v16
+; ZVFHMIN-NEXT: addi a1, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv8r.v v16, v8
+; ZVFHMIN-NEXT: vl8re16.v v8, (a0)
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vmv8r.v v16, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: li a1, 24
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vmv8r.v v8, v0
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: li a1, 24
@@ -740,41 +728,31 @@ define <vscale x 32 x half> @vfmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfmadd.vv v0, v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: li a1, 24
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll
index 088e3a04984fa0..3604ff2f9f2a49 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll
@@ -241,18 +241,18 @@ define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <
; ZVFH-NEXT: slli a1, a1, 5
; ZVFH-NEXT: sub sp, sp, a1
; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; ZVFH-NEXT: vl8re16.v v0, (a0)
-; ZVFH-NEXT: vmv8r.v v24, v16
+; ZVFH-NEXT: vmv8r.v v0, v16
+; ZVFH-NEXT: addi a1, sp, 16
+; ZVFH-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFH-NEXT: vmv8r.v v16, v8
+; ZVFH-NEXT: vl8re16.v v8, (a0)
; ZVFH-NEXT: csrr a0, vlenb
; ZVFH-NEXT: slli a0, a0, 4
; ZVFH-NEXT: add a0, sp, a0
; ZVFH-NEXT: addi a0, a0, 16
-; ZVFH-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFH-NEXT: vmv8r.v v16, v8
-; ZVFH-NEXT: addi a0, sp, 16
; ZVFH-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v16
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v16
; ZVFH-NEXT: csrr a0, vlenb
; ZVFH-NEXT: slli a0, a0, 3
; ZVFH-NEXT: mv a1, a0
@@ -260,14 +260,13 @@ define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <
; ZVFH-NEXT: add a0, a0, a1
; ZVFH-NEXT: add a0, sp, a0
; ZVFH-NEXT: addi a0, a0, 16
-; ZVFH-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v24
+; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v0
; ZVFH-NEXT: csrr a0, vlenb
; ZVFH-NEXT: slli a0, a0, 3
; ZVFH-NEXT: add a0, sp, a0
; ZVFH-NEXT: addi a0, a0, 16
-; ZVFH-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFH-NEXT: vmv8r.v v8, v0
+; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFH-NEXT: vfwcvtbf16.f.f.v v0, v8
; ZVFH-NEXT: csrr a0, vlenb
; ZVFH-NEXT: slli a0, a0, 3
@@ -281,33 +280,28 @@ define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <
; ZVFH-NEXT: slli a0, a0, 3
; ZVFH-NEXT: add a0, sp, a0
; ZVFH-NEXT: addi a0, a0, 16
-; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFH-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFH-NEXT: vfmadd.vv v0, v16, v24
-; ZVFH-NEXT: addi a0, sp, 16
-; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFH-NEXT: vfmadd.vv v0, v8, v24
; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v20
; ZVFH-NEXT: csrr a0, vlenb
; ZVFH-NEXT: slli a0, a0, 3
+; ZVFH-NEXT: mv a1, a0
+; ZVFH-NEXT: slli a0, a0, 1
+; ZVFH-NEXT: add a0, a0, a1
; ZVFH-NEXT: add a0, sp, a0
; ZVFH-NEXT: addi a0, a0, 16
; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFH-NEXT: csrr a0, vlenb
-; ZVFH-NEXT: slli a0, a0, 4
-; ZVFH-NEXT: add a0, sp, a0
-; ZVFH-NEXT: addi a0, a0, 16
+; ZVFH-NEXT: addi a0, sp, 16
; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFH-NEXT: vfwcvtbf16.f.f.v v24, v20
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v8, v20
; ZVFH-NEXT: csrr a0, vlenb
-; ZVFH-NEXT: slli a0, a0, 3
-; ZVFH-NEXT: mv a1, a0
-; ZVFH-NEXT: slli a0, a0, 1
-; ZVFH-NEXT: add a0, a0, a1
+; ZVFH-NEXT: slli a0, a0, 4
; ZVFH-NEXT: add a0, sp, a0
; ZVFH-NEXT: addi a0, a0, 16
-; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFH-NEXT: vfwcvtbf16.f.f.v v16, v28
; ZVFH-NEXT: csrr a0, vlenb
; ZVFH-NEXT: slli a0, a0, 3
; ZVFH-NEXT: mv a1, a0
@@ -315,11 +309,6 @@ define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <
; ZVFH-NEXT: add a0, a0, a1
; ZVFH-NEXT: add a0, sp, a0
; ZVFH-NEXT: addi a0, a0, 16
-; ZVFH-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFH-NEXT: csrr a0, vlenb
-; ZVFH-NEXT: slli a0, a0, 3
-; ZVFH-NEXT: add a0, sp, a0
-; ZVFH-NEXT: addi a0, a0, 16
; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFH-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFH-NEXT: vfmadd.vv v16, v8, v24
@@ -342,31 +331,30 @@ define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <
; ZVFHMIN-NEXT: slli a1, a1, 5
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; ZVFHMIN-NEXT: vl8re16.v v0, (a0)
-; ZVFHMIN-NEXT: vmv8r.v v24, v16
+; ZVFHMIN-NEXT: vmv8r.v v0, v16
+; ZVFHMIN-NEXT: addi a1, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv8r.v v16, v8
+; ZVFHMIN-NEXT: vl8re16.v v8, (a0)
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vmv8r.v v16, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: li a1, 24
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v24
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vmv8r.v v8, v0
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v0, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: li a1, 24
@@ -378,41 +366,31 @@ define <vscale x 32 x bfloat> @vfmadd_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfmadd.vv v0, v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v20
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v24, v20
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v8, v20
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvtbf16.f.f.v v16, v28
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: li a1, 24
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24
@@ -883,31 +861,30 @@ define <vscale x 32 x half> @vfmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: slli a1, a1, 5
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
-; ZVFHMIN-NEXT: vl8re16.v v0, (a0)
-; ZVFHMIN-NEXT: vmv8r.v v24, v16
+; ZVFHMIN-NEXT: vmv8r.v v0, v16
+; ZVFHMIN-NEXT: addi a1, sp, 16
+; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmv8r.v v16, v8
+; ZVFHMIN-NEXT: vl8re16.v v8, (a0)
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vmv8r.v v16, v8
-; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: li a1, 24
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vmv8r.v v8, v0
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: li a1, 24
@@ -919,41 +896,31 @@ define <vscale x 32 x half> @vfmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v0, v16, v24
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfmadd.vv v0, v8, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
+; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v28
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: li a1, 24
; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 3
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v24
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
index 73adbba3df9216..e36812ae37f00a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll
@@ -329,23 +329,23 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: slli a1, a1, 5
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; ZVFHMIN-NEXT: csrr a1, vlenb
+; ZVFHMIN-NEXT: li a2, 24
+; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: add a1, sp, a1
+; ZVFHMIN-NEXT: addi a1, a1, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vxor.vx v24, v24, a0
-; ZVFHMIN-NEXT: csrr a1, vlenb
-; ZVFHMIN-NEXT: slli a1, a1, 4
-; ZVFHMIN-NEXT: add a1, sp, a1
-; ZVFHMIN-NEXT: addi a1, a1, 16
+; ZVFHMIN-NEXT: addi a1, sp, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vxor.vx v0, v16, a0
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
@@ -357,38 +357,36 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24
-; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfmadd.vv v16, v24, v8
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: addi a0, sp, 16
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v28
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
index 0d6d24d247af4b..3490a85de29010 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll
@@ -309,66 +309,63 @@ define <vscale x 32 x half> @vfnmsub_vv_nxv32f16(<vscale x 32 x half> %va, <vsca
; ZVFHMIN-NEXT: slli a1, a1, 5
; ZVFHMIN-NEXT: sub sp, sp, a1
; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; ZVFHMIN-NEXT: csrr a1, vlenb
+; ZVFHMIN-NEXT: li a2, 24
+; ZVFHMIN-NEXT: mul a1, a1, a2
+; ZVFHMIN-NEXT: add a1, sp, a1
+; ZVFHMIN-NEXT: addi a1, a1, 16
+; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
-; ZVFHMIN-NEXT: vmv8r.v v0, v8
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: lui a0, 8
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v24, a0
+; ZVFHMIN-NEXT: vxor.vx v0, v24, a0
; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0
+; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
-; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v8
+; ZVFHMIN-NEXT: vfmadd.vv v24, v8, v0
; ZVFHMIN-NEXT: addi a0, sp, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v20
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: li a1, 24
+; ZVFHMIN-NEXT: mul a0, a0, a1
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll
index b2bfb10d39df30..ba1721a29dc818 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll
@@ -162,7 +162,7 @@ define <vscale x 32 x float> @vfptrunc_nxv32f32_nxv32f64(<vscale x 32 x double>
; CHECK-NEXT: vslidedown.vx v16, v0, a4
; CHECK-NEXT: slli a4, a1, 3
; CHECK-NEXT: add a4, a0, a4
-; CHECK-NEXT: vl8re64.v v8, (a4)
+; CHECK-NEXT: vl8re64.v v24, (a4)
; CHECK-NEXT: slli a4, a1, 1
; CHECK-NEXT: sub a5, a2, a4
; CHECK-NEXT: sltu a6, a2, a5
@@ -171,21 +171,21 @@ define <vscale x 32 x float> @vfptrunc_nxv32f32_nxv32f64(<vscale x 32 x double>
; CHECK-NEXT: sub a6, a5, a1
; CHECK-NEXT: sltu a7, a5, a6
; CHECK-NEXT: addi a7, a7, -1
-; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v8, (a0)
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v16, a3
; CHECK-NEXT: and a0, a7, a6
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vfncvt.f.f.w v20, v8, v0.t
+; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t
; CHECK-NEXT: bltu a5, a1, .LBB8_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a5, a1
; CHECK-NEXT: .LBB8_2:
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vx v6, v7, a3
; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v25, v7, a3
; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma
-; CHECK-NEXT: vfncvt.f.f.w v16, v24, v0.t
+; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t
; CHECK-NEXT: bltu a2, a4, .LBB8_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a2, a4
@@ -194,7 +194,7 @@ define <vscale x 32 x float> @vfptrunc_nxv32f32_nxv32f64(<vscale x 32 x double>
; CHECK-NEXT: sltu a3, a2, a0
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a0, a3, a0
-; CHECK-NEXT: vmv1r.v v0, v6
+; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: addi a3, sp, 16
; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll
index 9496cd82947d4b..2a137099bcb0f4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll
@@ -11,10 +11,10 @@ define <2 x i1> @test_vp_splice_v2i1(<2 x i1> %va, <2 x i1> %vb, i32 zeroext %ev
; CHECK-LABEL: test_vp_splice_v2i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v9
@@ -35,10 +35,10 @@ define <2 x i1> @test_vp_splice_v2i1_negative_offset(<2 x i1> %va, <2 x i1> %vb,
; CHECK-LABEL: test_vp_splice_v2i1_negative_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v9
@@ -59,10 +59,10 @@ define <2 x i1> @test_vp_splice_v2i1_masked(<2 x i1> %va, <2 x i1> %vb, <2 x i1>
; CHECK-LABEL: test_vp_splice_v2i1_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v11, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: vmv1r.v v0, v10
@@ -84,10 +84,10 @@ define <4 x i1> @test_vp_splice_v4i1(<4 x i1> %va, <4 x i1> %vb, i32 zeroext %ev
; CHECK-LABEL: test_vp_splice_v4i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v9
@@ -108,10 +108,10 @@ define <4 x i1> @test_vp_splice_v4i1_negative_offset(<4 x i1> %va, <4 x i1> %vb,
; CHECK-LABEL: test_vp_splice_v4i1_negative_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v9
@@ -132,10 +132,10 @@ define <4 x i1> @test_vp_splice_v4i1_masked(<4 x i1> %va, <4 x i1> %vb, <4 x i1>
; CHECK-LABEL: test_vp_splice_v4i1_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v11, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: vmv1r.v v0, v10
@@ -157,10 +157,10 @@ define <8 x i1> @test_vp_splice_v8i1(<8 x i1> %va, <8 x i1> %vb, i32 zeroext %ev
; CHECK-LABEL: test_vp_splice_v8i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v9
@@ -181,10 +181,10 @@ define <8 x i1> @test_vp_splice_v8i1_negative_offset(<8 x i1> %va, <8 x i1> %vb,
; CHECK-LABEL: test_vp_splice_v8i1_negative_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v9
@@ -205,10 +205,10 @@ define <8 x i1> @test_vp_splice_v8i1_masked(<8 x i1> %va, <8 x i1> %vb, <8 x i1>
; CHECK-LABEL: test_vp_splice_v8i1_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v11, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: vmv1r.v v0, v10
@@ -230,10 +230,10 @@ define <16 x i1> @test_vp_splice_v16i1(<16 x i1> %va, <16 x i1> %vb, i32 zeroext
; CHECK-LABEL: test_vp_splice_v16i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v9
@@ -254,10 +254,10 @@ define <16 x i1> @test_vp_splice_v16i1_negative_offset(<16 x i1> %va, <16 x i1>
; CHECK-LABEL: test_vp_splice_v16i1_negative_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v9
@@ -278,10 +278,10 @@ define <16 x i1> @test_vp_splice_v16i1_masked(<16 x i1> %va, <16 x i1> %vb, <16
; CHECK-LABEL: test_vp_splice_v16i1_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v11, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: vmv1r.v v0, v10
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll
index 90276308252271..fc446d0a3a88ac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll
@@ -14,10 +14,10 @@ define <vscale x 1 x i1> @test_vp_splice_nxv1i1(<vscale x 1 x i1> %va, <vscale x
; CHECK-LABEL: test_vp_splice_nxv1i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v9
@@ -38,10 +38,10 @@ define <vscale x 1 x i1> @test_vp_splice_nxv1i1_negative_offset(<vscale x 1 x i1
; CHECK-LABEL: test_vp_splice_nxv1i1_negative_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v9
@@ -62,10 +62,10 @@ define <vscale x 1 x i1> @test_vp_splice_nxv1i1_masked(<vscale x 1 x i1> %va, <v
; CHECK-LABEL: test_vp_splice_nxv1i1_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v11, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: vmv1r.v v0, v10
@@ -87,10 +87,10 @@ define <vscale x 2 x i1> @test_vp_splice_nxv2i1(<vscale x 2 x i1> %va, <vscale x
; CHECK-LABEL: test_vp_splice_nxv2i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v9
@@ -111,10 +111,10 @@ define <vscale x 2 x i1> @test_vp_splice_nxv2i1_negative_offset(<vscale x 2 x i1
; CHECK-LABEL: test_vp_splice_nxv2i1_negative_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v9
@@ -135,10 +135,10 @@ define <vscale x 2 x i1> @test_vp_splice_nxv2i1_masked(<vscale x 2 x i1> %va, <v
; CHECK-LABEL: test_vp_splice_nxv2i1_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v11, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: vmv1r.v v0, v10
@@ -160,10 +160,10 @@ define <vscale x 4 x i1> @test_vp_splice_nxv4i1(<vscale x 4 x i1> %va, <vscale x
; CHECK-LABEL: test_vp_splice_nxv4i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v9
@@ -184,10 +184,10 @@ define <vscale x 4 x i1> @test_vp_splice_nxv4i1_negative_offset(<vscale x 4 x i1
; CHECK-LABEL: test_vp_splice_nxv4i1_negative_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v9
@@ -208,10 +208,10 @@ define <vscale x 4 x i1> @test_vp_splice_nxv4i1_masked(<vscale x 4 x i1> %va, <v
; CHECK-LABEL: test_vp_splice_nxv4i1_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v11, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: vmv1r.v v0, v10
@@ -233,10 +233,10 @@ define <vscale x 8 x i1> @test_vp_splice_nxv8i1(<vscale x 8 x i1> %va, <vscale x
; CHECK-LABEL: test_vp_splice_nxv8i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v9
@@ -257,10 +257,10 @@ define <vscale x 8 x i1> @test_vp_splice_nxv8i1_negative_offset(<vscale x 8 x i1
; CHECK-LABEL: test_vp_splice_nxv8i1_negative_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv1r.v v0, v9
@@ -281,10 +281,10 @@ define <vscale x 8 x i1> @test_vp_splice_nxv8i1_masked(<vscale x 8 x i1> %va, <v
; CHECK-LABEL: test_vp_splice_nxv8i1_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
-; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: vmv1r.v v0, v8
-; CHECK-NEXT: vmerge.vim v8, v11, 1, v0
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v11, 0
; CHECK-NEXT: vmv1r.v v0, v10
@@ -306,9 +306,9 @@ define <vscale x 16 x i1> @test_vp_splice_nxv16i1(<vscale x 16 x i1> %va, <vscal
; CHECK-LABEL: test_vp_splice_nxv16i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v12, 0
@@ -330,9 +330,9 @@ define <vscale x 16 x i1> @test_vp_splice_nxv16i1_negative_offset(<vscale x 16 x
; CHECK-LABEL: test_vp_splice_nxv16i1_negative_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v12, 0
@@ -354,9 +354,9 @@ define <vscale x 16 x i1> @test_vp_splice_nxv16i1_masked(<vscale x 16 x i1> %va,
; CHECK-LABEL: test_vp_splice_nxv16i1_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v12, 0
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v12, v12, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; CHECK-NEXT: vmv.v.i v14, 0
@@ -380,9 +380,9 @@ define <vscale x 32 x i1> @test_vp_splice_nxv32i1(<vscale x 32 x i1> %va, <vscal
; CHECK-LABEL: test_vp_splice_nxv32i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vmv.v.i v12, 0
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v12, v12, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; CHECK-NEXT: vmv.v.i v16, 0
@@ -404,9 +404,9 @@ define <vscale x 32 x i1> @test_vp_splice_nxv32i1_negative_offset(<vscale x 32 x
; CHECK-LABEL: test_vp_splice_nxv32i1_negative_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vmv.v.i v12, 0
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v12, v12, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; CHECK-NEXT: vmv.v.i v16, 0
@@ -428,9 +428,9 @@ define <vscale x 32 x i1> @test_vp_splice_nxv32i1_masked(<vscale x 32 x i1> %va,
; CHECK-LABEL: test_vp_splice_nxv32i1_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vmv.v.i v12, 0
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v12, v12, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; CHECK-NEXT: vmv.v.i v16, 0
@@ -454,9 +454,9 @@ define <vscale x 64 x i1> @test_vp_splice_nxv64i1(<vscale x 64 x i1> %va, <vscal
; CHECK-LABEL: test_vp_splice_nxv64i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmv.v.i v16, 0
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v16, v16, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vmv.v.i v24, 0
@@ -478,9 +478,9 @@ define <vscale x 64 x i1> @test_vp_splice_nxv64i1_negative_offset(<vscale x 64 x
; CHECK-LABEL: test_vp_splice_nxv64i1_negative_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmv.v.i v16, 0
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v16, v16, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vmv.v.i v24, 0
@@ -502,9 +502,9 @@ define <vscale x 64 x i1> @test_vp_splice_nxv64i1_masked(<vscale x 64 x i1> %va,
; CHECK-LABEL: test_vp_splice_nxv64i1_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
+; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
; CHECK-NEXT: vmv.v.i v16, 0
-; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v16, v16, 1, v0
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vmv.v.i v24, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
index 34f8f35ee98c0c..4f6b05e32e4a48 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
@@ -2460,11 +2460,11 @@ define <vscale x 16 x double> @vpgather_nxv16f64(<vscale x 16 x ptr> %ptrs, <vsc
; RV32-NEXT: vmv1r.v v24, v0
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: sub a2, a0, a1
+; RV32-NEXT: srli a3, a1, 3
+; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
+; RV32-NEXT: vslidedown.vx v0, v0, a3
; RV32-NEXT: sltu a3, a0, a2
; RV32-NEXT: addi a3, a3, -1
-; RV32-NEXT: srli a4, a1, 3
-; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
-; RV32-NEXT: vslidedown.vx v0, v0, a4
; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vluxei32.v v16, (zero), v12, v0.t
@@ -2483,11 +2483,11 @@ define <vscale x 16 x double> @vpgather_nxv16f64(<vscale x 16 x ptr> %ptrs, <vsc
; RV64-NEXT: vmv1r.v v24, v0
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: sub a2, a0, a1
+; RV64-NEXT: srli a3, a1, 3
+; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
+; RV64-NEXT: vslidedown.vx v0, v0, a3
; RV64-NEXT: sltu a3, a0, a2
; RV64-NEXT: addi a3, a3, -1
-; RV64-NEXT: srli a4, a1, 3
-; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vx v0, v0, a4
; RV64-NEXT: and a2, a3, a2
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll
index 5683a7b7588546..59f143170cd5df 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll
@@ -549,9 +549,9 @@ declare <vscale x 16 x double> @llvm.vector.extract.nxv16f64(<vscale x 17 x doub
define <vscale x 16 x double> @vpload_nxv17f64(ptr %ptr, ptr %out, <vscale x 17 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpload_nxv17f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a5, a3, 1
-; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: mv a4, a2
; CHECK-NEXT: bltu a2, a5, .LBB44_2
; CHECK-NEXT: # %bb.1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
index 35cffc0eb3718f..961f7b12d34ec5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
@@ -2323,12 +2323,12 @@ define void @vpscatter_nxv16f64(<vscale x 16 x double> %val, <vscale x 16 x ptr>
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: sub sp, sp, a1
; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a3, a1, 3
; RV64-NEXT: add a3, a0, a3
-; RV64-NEXT: vl8re64.v v24, (a3)
-; RV64-NEXT: addi a3, sp, 16
-; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
+; RV64-NEXT: vl8re64.v v16, (a3)
; RV64-NEXT: vl8re64.v v24, (a0)
; RV64-NEXT: mv a0, a2
; RV64-NEXT: bltu a2, a1, .LBB108_2
@@ -2347,7 +2347,7 @@ define void @vpscatter_nxv16f64(<vscale x 16 x double> %val, <vscale x 16 x ptr>
; RV64-NEXT: addi a1, sp, 16
; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t
+; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 3
; RV64-NEXT: add sp, sp, a0
@@ -2362,16 +2362,25 @@ define void @vpscatter_nxv16f64(<vscale x 16 x double> %val, <vscale x 16 x ptr>
define void @vpscatter_baseidx_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
; RV32: # %bb.0:
-; RV32-NEXT: vl4re16.v v4, (a1)
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: sub sp, sp, a3
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; RV32-NEXT: addi a3, sp, 16
+; RV32-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vl4re16.v v24, (a1)
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV32-NEXT: vsext.vf2 v24, v4
+; RV32-NEXT: vsext.vf2 v0, v24
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: vsll.vi v24, v24, 3
+; RV32-NEXT: vsll.vi v24, v0, 3
; RV32-NEXT: mv a3, a2
; RV32-NEXT: bltu a2, a1, .LBB109_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a3, a1
; RV32-NEXT: .LBB109_2:
+; RV32-NEXT: addi a4, sp, 16
+; RV32-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
; RV32-NEXT: sub a3, a2, a1
@@ -2383,6 +2392,11 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64(<vscale x 16 x double> %val, pt
; RV32-NEXT: and a2, a2, a3
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: .cfi_def_cfa sp, 16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
; RV64-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
@@ -2393,19 +2407,19 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64(<vscale x 16 x double> %val, pt
; RV64-NEXT: slli a3, a3, 4
; RV64-NEXT: sub sp, sp, a3
; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV64-NEXT: csrr a3, vlenb
+; RV64-NEXT: slli a3, a3, 3
+; RV64-NEXT: add a3, sp, a3
+; RV64-NEXT: addi a3, a3, 16
+; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV64-NEXT: vl4re16.v v24, (a1)
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: slli a1, a1, 3
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV64-NEXT: vsext.vf4 v16, v26
; RV64-NEXT: vsll.vi v16, v16, 3
; RV64-NEXT: addi a1, sp, 16
; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: vsext.vf4 v16, v24
; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: vsext.vf4 v16, v24
; RV64-NEXT: vsll.vi v24, v16, 3
; RV64-NEXT: mv a3, a2
; RV64-NEXT: bltu a2, a1, .LBB109_2
@@ -2445,16 +2459,25 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64(<vscale x 16 x double> %val, pt
define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
; RV32: # %bb.0:
-; RV32-NEXT: vl4re16.v v4, (a1)
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: sub sp, sp, a3
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; RV32-NEXT: addi a3, sp, 16
+; RV32-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vl4re16.v v24, (a1)
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV32-NEXT: vsext.vf2 v24, v4
+; RV32-NEXT: vsext.vf2 v0, v24
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: vsll.vi v24, v24, 3
+; RV32-NEXT: vsll.vi v24, v0, 3
; RV32-NEXT: mv a3, a2
; RV32-NEXT: bltu a2, a1, .LBB110_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a3, a1
; RV32-NEXT: .LBB110_2:
+; RV32-NEXT: addi a4, sp, 16
+; RV32-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
; RV32-NEXT: sub a3, a2, a1
@@ -2466,6 +2489,11 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
; RV32-NEXT: and a2, a2, a3
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: .cfi_def_cfa sp, 16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
; RV64-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
@@ -2473,30 +2501,28 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: csrr a3, vlenb
-; RV64-NEXT: slli a4, a3, 3
-; RV64-NEXT: add a3, a4, a3
+; RV64-NEXT: slli a3, a3, 4
; RV64-NEXT: sub sp, sp, a3
-; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb
+; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV64-NEXT: csrr a3, vlenb
+; RV64-NEXT: slli a3, a3, 3
+; RV64-NEXT: add a3, sp, a3
+; RV64-NEXT: addi a3, a3, 16
+; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV64-NEXT: vl4re16.v v24, (a1)
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill
-; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: add a1, sp, a1
-; RV64-NEXT: addi a1, a1, 16
-; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf4 v0, v24
; RV64-NEXT: vsext.vf4 v16, v26
; RV64-NEXT: vsll.vi v16, v16, 3
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: vsll.vi v24, v0, 3
+; RV64-NEXT: vsext.vf4 v16, v24
+; RV64-NEXT: vsll.vi v24, v16, 3
; RV64-NEXT: mv a3, a2
; RV64-NEXT: bltu a2, a1, .LBB110_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a3, a1
; RV64-NEXT: .LBB110_2:
-; RV64-NEXT: addi a4, sp, 16
-; RV64-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t
; RV64-NEXT: sub a3, a2, a1
@@ -2507,14 +2533,16 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
; RV64-NEXT: vslidedown.vx v0, v0, a1
; RV64-NEXT: and a2, a2, a3
; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
; RV64-NEXT: csrr a0, vlenb
-; RV64-NEXT: slli a1, a0, 3
-; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: slli a0, a0, 4
; RV64-NEXT: add sp, sp, a0
; RV64-NEXT: .cfi_def_cfa sp, 16
; RV64-NEXT: addi sp, sp, 16
@@ -2529,16 +2557,25 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
; RV32: # %bb.0:
-; RV32-NEXT: vl4re16.v v4, (a1)
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: csrr a3, vlenb
+; RV32-NEXT: sub sp, sp, a3
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; RV32-NEXT: addi a3, sp, 16
+; RV32-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vl4re16.v v24, (a1)
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV32-NEXT: vzext.vf2 v24, v4
+; RV32-NEXT: vzext.vf2 v0, v24
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: vsll.vi v24, v24, 3
+; RV32-NEXT: vsll.vi v24, v0, 3
; RV32-NEXT: mv a3, a2
; RV32-NEXT: bltu a2, a1, .LBB111_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a3, a1
; RV32-NEXT: .LBB111_2:
+; RV32-NEXT: addi a4, sp, 16
+; RV32-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t
; RV32-NEXT: sub a3, a2, a1
@@ -2550,20 +2587,34 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
; RV32-NEXT: and a2, a2, a3
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: .cfi_def_cfa sp, 16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
; RV64: # %bb.0:
-; RV64-NEXT: vl4re16.v v4, (a1)
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: csrr a3, vlenb
+; RV64-NEXT: sub sp, sp, a3
+; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; RV64-NEXT: addi a3, sp, 16
+; RV64-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill
+; RV64-NEXT: vl4re16.v v24, (a1)
; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; RV64-NEXT: vzext.vf2 v24, v4
+; RV64-NEXT: vzext.vf2 v0, v24
; RV64-NEXT: csrr a1, vlenb
-; RV64-NEXT: vsll.vi v24, v24, 3
+; RV64-NEXT: vsll.vi v24, v0, 3
; RV64-NEXT: mv a3, a2
; RV64-NEXT: bltu a2, a1, .LBB111_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a3, a1
; RV64-NEXT: .LBB111_2:
+; RV64-NEXT: addi a4, sp, 16
+; RV64-NEXT: vl1r.v v0, (a4) # Unknown-size Folded Reload
; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
; RV64-NEXT: vsoxei32.v v8, (a0), v24, v0.t
; RV64-NEXT: sub a3, a2, a1
@@ -2575,6 +2626,11 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
; RV64-NEXT: and a2, a2, a3
; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV64-NEXT: vsoxei32.v v16, (a0), v28, v0.t
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: add sp, sp, a0
+; RV64-NEXT: .cfi_def_cfa sp, 16
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: .cfi_def_cfa_offset 0
; RV64-NEXT: ret
%eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll
index 6d8574c11d1dd6..f13d07fc06a7b4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll
@@ -450,9 +450,17 @@ declare void @llvm.vp.store.nxv17f64.p0(<vscale x 17 x double>, ptr, <vscale x 1
define void @vpstore_nxv17f64(<vscale x 17 x double> %val, ptr %ptr, <vscale x 17 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vpstore_nxv17f64:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a3, vlenb
-; CHECK-NEXT: slli a4, a3, 1
+; CHECK-NEXT: slli a3, a3, 3
+; CHECK-NEXT: sub sp, sp, a3
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: addi a3, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a3, vlenb
+; CHECK-NEXT: slli a4, a3, 1
; CHECK-NEXT: mv a5, a2
; CHECK-NEXT: bltu a2, a4, .LBB35_2
; CHECK-NEXT: # %bb.1:
@@ -463,16 +471,8 @@ define void @vpstore_nxv17f64(<vscale x 17 x double> %val, ptr %ptr, <vscale x 1
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a6, a3
; CHECK-NEXT: .LBB35_4:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a7, vlenb
-; CHECK-NEXT: slli a7, a7, 3
-; CHECK-NEXT: sub sp, sp, a7
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: vl8re64.v v0, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: vl8re64.v v16, (a0)
; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma
; CHECK-NEXT: vse64.v v8, (a1), v0.t
; CHECK-NEXT: sub a0, a5, a3
@@ -488,8 +488,10 @@ define void @vpstore_nxv17f64(<vscale x 17 x double> %val, ptr %ptr, <vscale x 1
; CHECK-NEXT: sltu a2, a2, a0
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a0, a2, a0
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma
-; CHECK-NEXT: vse64.v v16, (a6), v0.t
+; CHECK-NEXT: vse64.v v8, (a6), v0.t
; CHECK-NEXT: bltu a0, a3, .LBB35_6
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: mv a0, a3
@@ -499,10 +501,8 @@ define void @vpstore_nxv17f64(<vscale x 17 x double> %val, ptr %ptr, <vscale x 1
; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v24, a3
; CHECK-NEXT: add a1, a1, a2
-; CHECK-NEXT: addi a2, sp, 16
-; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; CHECK-NEXT: vse64.v v8, (a1), v0.t
+; CHECK-NEXT: vse64.v v16, (a1), v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
index 1f1a62f57664f3..5ec350826d5ada 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
@@ -514,23 +514,38 @@ define <vscale x 16 x double> @vselect_combine_regression(<vscale x 16 x i64> %v
define void @vselect_legalize_regression(<vscale x 16 x double> %a, <vscale x 16 x i1> %ma, <vscale x 16 x i1> %mb, ptr %out) {
; CHECK-LABEL: vselect_legalize_regression:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma
; CHECK-NEXT: vlm.v v24, (a0)
-; CHECK-NEXT: vmand.mm v7, v0, v24
+; CHECK-NEXT: vmand.mm v24, v0, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a2, a0, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vx v0, v7, a2
+; CHECK-NEXT: vslidedown.vx v0, v24, a2
; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv.v.i v24, 0
-; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
-; CHECK-NEXT: vmv1r.v v0, v7
-; CHECK-NEXT: vmv.v.i v24, 0
-; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: addi a2, sp, 16
+; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0
; CHECK-NEXT: vs8r.v v8, (a1)
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, a1, a0
; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: .cfi_def_cfa sp, 16
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: ret
%cond = and <vscale x 16 x i1> %ma, %mb
%sel = select <vscale x 16 x i1> %cond, <vscale x 16 x double> %a, <vscale x 16 x double> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll
index b9697770994ef3..ed95b4be23d5c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll
@@ -303,7 +303,7 @@ define <vscale x 32 x i32> @vtrunc_nxv32i64_nxv32i32(<vscale x 32 x i64> %a, <vs
; CHECK-NEXT: vslidedown.vx v16, v0, a4
; CHECK-NEXT: slli a4, a1, 3
; CHECK-NEXT: add a4, a0, a4
-; CHECK-NEXT: vl8re64.v v8, (a4)
+; CHECK-NEXT: vl8re64.v v24, (a4)
; CHECK-NEXT: slli a4, a1, 1
; CHECK-NEXT: sub a5, a2, a4
; CHECK-NEXT: sltu a6, a2, a5
@@ -312,21 +312,21 @@ define <vscale x 32 x i32> @vtrunc_nxv32i64_nxv32i32(<vscale x 32 x i64> %a, <vs
; CHECK-NEXT: sub a6, a5, a1
; CHECK-NEXT: sltu a7, a5, a6
; CHECK-NEXT: addi a7, a7, -1
-; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v8, (a0)
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v0, v16, a3
; CHECK-NEXT: and a0, a7, a6
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v20, v8, 0, v0.t
+; CHECK-NEXT: vnsrl.wi v20, v24, 0, v0.t
; CHECK-NEXT: bltu a5, a1, .LBB17_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a5, a1
; CHECK-NEXT: .LBB17_2:
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vx v6, v7, a3
; CHECK-NEXT: vmv1r.v v0, v16
+; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vx v25, v7, a3
; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t
+; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t
; CHECK-NEXT: bltu a2, a4, .LBB17_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a2, a4
@@ -335,7 +335,7 @@ define <vscale x 32 x i32> @vtrunc_nxv32i64_nxv32i32(<vscale x 32 x i64> %a, <vs
; CHECK-NEXT: sltu a3, a2, a0
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a0, a3, a0
-; CHECK-NEXT: vmv1r.v v0, v6
+; CHECK-NEXT: vmv1r.v v0, v25
; CHECK-NEXT: addi a3, sp, 16
; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
index 336d86d57f3e62..ddc27f7562cdb1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
@@ -1377,9 +1377,9 @@ define <vscale x 1 x i64> @i1_zext(<vscale x 1 x i1> %va, <vscale x 1 x i64> %vb
;
; RV64-LABEL: i1_zext:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu
-; RV64-NEXT: vadd.vi v8, v8, 1, v0.t
; RV64-NEXT: li a1, 42
+; RV64-NEXT: vsetvli a2, zero, e64, m1, ta, mu
+; RV64-NEXT: vadd.vi v8, v8, 1, v0.t
; RV64-NEXT: sh a1, 0(a0)
; RV64-NEXT: ret
%vc = zext <vscale x 1 x i1> %va to <vscale x 1 x i64>
diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll
index 2bec1fca5c43b4..c761b0d826fae5 100644
--- a/llvm/test/CodeGen/RISCV/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/shifts.ll
@@ -395,56 +395,56 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind {
; RV32I-LABEL: fshr128_minsize:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a2, 0(a2)
-; RV32I-NEXT: lw t2, 0(a1)
+; RV32I-NEXT: lw t1, 0(a1)
; RV32I-NEXT: lw a7, 4(a1)
-; RV32I-NEXT: lw a3, 8(a1)
+; RV32I-NEXT: lw a4, 8(a1)
; RV32I-NEXT: lw a1, 12(a1)
-; RV32I-NEXT: andi t1, a2, 64
+; RV32I-NEXT: andi t2, a2, 64
; RV32I-NEXT: mv t0, a7
-; RV32I-NEXT: mv a4, t2
-; RV32I-NEXT: beqz t1, .LBB10_2
+; RV32I-NEXT: mv a3, t1
+; RV32I-NEXT: beqz t2, .LBB10_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: mv t0, a1
-; RV32I-NEXT: mv a4, a3
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: .LBB10_2:
; RV32I-NEXT: andi a6, a2, 32
-; RV32I-NEXT: mv a5, a4
+; RV32I-NEXT: mv a5, a3
; RV32I-NEXT: bnez a6, .LBB10_13
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: bnez t1, .LBB10_14
+; RV32I-NEXT: bnez t2, .LBB10_14
; RV32I-NEXT: .LBB10_4:
; RV32I-NEXT: beqz a6, .LBB10_6
; RV32I-NEXT: .LBB10_5:
-; RV32I-NEXT: mv t0, a3
+; RV32I-NEXT: mv t0, a4
; RV32I-NEXT: .LBB10_6:
; RV32I-NEXT: slli t3, t0, 1
-; RV32I-NEXT: not t2, a2
-; RV32I-NEXT: beqz t1, .LBB10_8
+; RV32I-NEXT: not t1, a2
+; RV32I-NEXT: beqz t2, .LBB10_8
; RV32I-NEXT: # %bb.7:
; RV32I-NEXT: mv a1, a7
; RV32I-NEXT: .LBB10_8:
; RV32I-NEXT: srl a7, a5, a2
-; RV32I-NEXT: sll t1, t3, t2
+; RV32I-NEXT: sll t2, t3, t1
; RV32I-NEXT: srl t0, t0, a2
; RV32I-NEXT: beqz a6, .LBB10_10
; RV32I-NEXT: # %bb.9:
-; RV32I-NEXT: mv a3, a1
+; RV32I-NEXT: mv a4, a1
; RV32I-NEXT: .LBB10_10:
-; RV32I-NEXT: or a7, t1, a7
-; RV32I-NEXT: slli t1, a3, 1
-; RV32I-NEXT: sll t1, t1, t2
-; RV32I-NEXT: or t0, t1, t0
-; RV32I-NEXT: srl a3, a3, a2
+; RV32I-NEXT: or a7, t2, a7
+; RV32I-NEXT: slli t2, a4, 1
+; RV32I-NEXT: sll t2, t2, t1
+; RV32I-NEXT: or t0, t2, t0
+; RV32I-NEXT: srl a4, a4, a2
; RV32I-NEXT: beqz a6, .LBB10_12
; RV32I-NEXT: # %bb.11:
-; RV32I-NEXT: mv a1, a4
+; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: .LBB10_12:
-; RV32I-NEXT: slli a4, a1, 1
-; RV32I-NEXT: sll a4, a4, t2
-; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: slli a3, a1, 1
+; RV32I-NEXT: sll a3, a3, t1
+; RV32I-NEXT: or a3, a3, a4
; RV32I-NEXT: srl a1, a1, a2
; RV32I-NEXT: slli a5, a5, 1
-; RV32I-NEXT: sll a2, a5, t2
+; RV32I-NEXT: sll a2, a5, t1
; RV32I-NEXT: or a1, a2, a1
; RV32I-NEXT: sw a7, 0(a0)
; RV32I-NEXT: sw t0, 4(a0)
@@ -453,9 +453,9 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind {
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB10_13:
; RV32I-NEXT: mv a5, t0
-; RV32I-NEXT: beqz t1, .LBB10_4
+; RV32I-NEXT: beqz t2, .LBB10_4
; RV32I-NEXT: .LBB10_14:
-; RV32I-NEXT: mv a3, t2
+; RV32I-NEXT: mv a4, t1
; RV32I-NEXT: bnez a6, .LBB10_5
; RV32I-NEXT: j .LBB10_6
;
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
index c0c11fefafb555..b7b88584f3bdb8 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
@@ -69,39 +69,39 @@ define signext i32 @test3(i32 signext %v, i32 signext %w, i32 signext %x, i32 si
;
; RV64SFB-LABEL: test3:
; RV64SFB: # %bb.0:
-; RV64SFB-NEXT: bnez a4, .LBB2_2
+; RV64SFB-NEXT: beqz a4, .LBB2_2
; RV64SFB-NEXT: # %bb.1:
-; RV64SFB-NEXT: mv a0, a1
+; RV64SFB-NEXT: mv a2, a3
; RV64SFB-NEXT: .LBB2_2:
-; RV64SFB-NEXT: beqz a4, .LBB2_4
+; RV64SFB-NEXT: bnez a4, .LBB2_4
; RV64SFB-NEXT: # %bb.3:
-; RV64SFB-NEXT: mv a2, a3
+; RV64SFB-NEXT: mv a0, a1
; RV64SFB-NEXT: .LBB2_4:
; RV64SFB-NEXT: addw a0, a0, a2
; RV64SFB-NEXT: ret
;
; ZICOND-LABEL: test3:
; ZICOND: # %bb.0:
-; ZICOND-NEXT: bnez a4, .LBB2_2
+; ZICOND-NEXT: beqz a4, .LBB2_2
; ZICOND-NEXT: # %bb.1:
-; ZICOND-NEXT: mv a0, a1
+; ZICOND-NEXT: mv a2, a3
; ZICOND-NEXT: .LBB2_2:
-; ZICOND-NEXT: beqz a4, .LBB2_4
+; ZICOND-NEXT: bnez a4, .LBB2_4
; ZICOND-NEXT: # %bb.3:
-; ZICOND-NEXT: mv a2, a3
+; ZICOND-NEXT: mv a0, a1
; ZICOND-NEXT: .LBB2_4:
; ZICOND-NEXT: addw a0, a0, a2
; ZICOND-NEXT: ret
;
; RV32SFB-LABEL: test3:
; RV32SFB: # %bb.0:
-; RV32SFB-NEXT: bnez a4, .LBB2_2
+; RV32SFB-NEXT: beqz a4, .LBB2_2
; RV32SFB-NEXT: # %bb.1:
-; RV32SFB-NEXT: mv a0, a1
+; RV32SFB-NEXT: mv a2, a3
; RV32SFB-NEXT: .LBB2_2:
-; RV32SFB-NEXT: beqz a4, .LBB2_4
+; RV32SFB-NEXT: bnez a4, .LBB2_4
; RV32SFB-NEXT: # %bb.3:
-; RV32SFB-NEXT: mv a2, a3
+; RV32SFB-NEXT: mv a0, a1
; RV32SFB-NEXT: .LBB2_4:
; RV32SFB-NEXT: add a0, a0, a2
; RV32SFB-NEXT: ret
@@ -566,18 +566,18 @@ define void @sextw_removal_ccaddw(i1 %c, i32 signext %arg, i32 signext %arg1, i3
; RV64SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64SFB-NEXT: mv s0, a1
+; RV64SFB-NEXT: mv s1, a1
; RV64SFB-NEXT: andi a0, a0, 1
-; RV64SFB-NEXT: mv s1, a2
+; RV64SFB-NEXT: mv s0, a2
; RV64SFB-NEXT: beqz a0, .LBB16_4
; RV64SFB-NEXT: # %bb.3: # %bb
-; RV64SFB-NEXT: addw s0, a1, a3
+; RV64SFB-NEXT: addw s1, a1, a3
; RV64SFB-NEXT: .LBB16_4: # %bb
; RV64SFB-NEXT: .LBB16_1: # %bb2
; RV64SFB-NEXT: # =>This Inner Loop Header: Depth=1
-; RV64SFB-NEXT: mv a0, s0
+; RV64SFB-NEXT: mv a0, s1
; RV64SFB-NEXT: call bar
-; RV64SFB-NEXT: sllw s0, s0, s1
+; RV64SFB-NEXT: sllw s1, s1, s0
; RV64SFB-NEXT: bnez a0, .LBB16_1
; RV64SFB-NEXT: # %bb.2: # %bb7
; RV64SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -592,18 +592,18 @@ define void @sextw_removal_ccaddw(i1 %c, i32 signext %arg, i32 signext %arg1, i3
; ZICOND-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; ZICOND-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; ZICOND-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; ZICOND-NEXT: mv s0, a1
+; ZICOND-NEXT: mv s1, a1
; ZICOND-NEXT: andi a0, a0, 1
-; ZICOND-NEXT: mv s1, a2
+; ZICOND-NEXT: mv s0, a2
; ZICOND-NEXT: beqz a0, .LBB16_4
; ZICOND-NEXT: # %bb.3: # %bb
-; ZICOND-NEXT: addw s0, a1, a3
+; ZICOND-NEXT: addw s1, a1, a3
; ZICOND-NEXT: .LBB16_4: # %bb
; ZICOND-NEXT: .LBB16_1: # %bb2
; ZICOND-NEXT: # =>This Inner Loop Header: Depth=1
-; ZICOND-NEXT: mv a0, s0
+; ZICOND-NEXT: mv a0, s1
; ZICOND-NEXT: call bar
-; ZICOND-NEXT: sllw s0, s0, s1
+; ZICOND-NEXT: sllw s1, s1, s0
; ZICOND-NEXT: bnez a0, .LBB16_1
; ZICOND-NEXT: # %bb.2: # %bb7
; ZICOND-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -618,18 +618,18 @@ define void @sextw_removal_ccaddw(i1 %c, i32 signext %arg, i32 signext %arg1, i3
; RV32SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32SFB-NEXT: mv s0, a1
+; RV32SFB-NEXT: mv s1, a1
; RV32SFB-NEXT: andi a0, a0, 1
-; RV32SFB-NEXT: mv s1, a2
+; RV32SFB-NEXT: mv s0, a2
; RV32SFB-NEXT: beqz a0, .LBB16_4
; RV32SFB-NEXT: # %bb.3: # %bb
-; RV32SFB-NEXT: add s0, a1, a3
+; RV32SFB-NEXT: add s1, a1, a3
; RV32SFB-NEXT: .LBB16_4: # %bb
; RV32SFB-NEXT: .LBB16_1: # %bb2
; RV32SFB-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32SFB-NEXT: mv a0, s0
+; RV32SFB-NEXT: mv a0, s1
; RV32SFB-NEXT: call bar
-; RV32SFB-NEXT: sll s0, s0, s1
+; RV32SFB-NEXT: sll s1, s1, s0
; RV32SFB-NEXT: bnez a0, .LBB16_1
; RV32SFB-NEXT: # %bb.2: # %bb7
; RV32SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -1223,14 +1223,14 @@ define i64 @select_slli(i64 %A, i64 %C, i1 zeroext %cond) {
; RV32SFB-LABEL: select_slli:
; RV32SFB: # %bb.0: # %entry
; RV32SFB-NEXT: mv a1, a0
-; RV32SFB-NEXT: mv a0, a2
-; RV32SFB-NEXT: beqz a4, .LBB28_2
+; RV32SFB-NEXT: bnez a4, .LBB28_2
; RV32SFB-NEXT: # %bb.1: # %entry
-; RV32SFB-NEXT: mv a1, a3
+; RV32SFB-NEXT: li a2, 0
; RV32SFB-NEXT: .LBB28_2: # %entry
-; RV32SFB-NEXT: bnez a4, .LBB28_4
+; RV32SFB-NEXT: mv a0, a2
+; RV32SFB-NEXT: beqz a4, .LBB28_4
; RV32SFB-NEXT: # %bb.3: # %entry
-; RV32SFB-NEXT: li a0, 0
+; RV32SFB-NEXT: mv a1, a3
; RV32SFB-NEXT: .LBB28_4: # %entry
; RV32SFB-NEXT: ret
entry:
@@ -1567,11 +1567,11 @@ define i64 @select_andn(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
; RV32SFB: # %bb.0: # %entry
; RV32SFB-NEXT: bnez a6, .LBB36_2
; RV32SFB-NEXT: # %bb.1: # %entry
-; RV32SFB-NEXT: andn a4, a0, a2
+; RV32SFB-NEXT: andn a5, a1, a3
; RV32SFB-NEXT: .LBB36_2: # %entry
; RV32SFB-NEXT: bnez a6, .LBB36_4
; RV32SFB-NEXT: # %bb.3: # %entry
-; RV32SFB-NEXT: andn a5, a1, a3
+; RV32SFB-NEXT: andn a4, a0, a2
; RV32SFB-NEXT: .LBB36_4: # %entry
; RV32SFB-NEXT: mv a0, a4
; RV32SFB-NEXT: mv a1, a5
@@ -1615,11 +1615,11 @@ define i64 @select_orn(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
; RV32SFB: # %bb.0: # %entry
; RV32SFB-NEXT: bnez a6, .LBB37_2
; RV32SFB-NEXT: # %bb.1: # %entry
-; RV32SFB-NEXT: orn a4, a0, a2
+; RV32SFB-NEXT: orn a5, a1, a3
; RV32SFB-NEXT: .LBB37_2: # %entry
; RV32SFB-NEXT: bnez a6, .LBB37_4
; RV32SFB-NEXT: # %bb.3: # %entry
-; RV32SFB-NEXT: orn a5, a1, a3
+; RV32SFB-NEXT: orn a4, a0, a2
; RV32SFB-NEXT: .LBB37_4: # %entry
; RV32SFB-NEXT: mv a0, a4
; RV32SFB-NEXT: mv a1, a5
@@ -1663,11 +1663,11 @@ define i64 @select_xnor(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
; RV32SFB: # %bb.0: # %entry
; RV32SFB-NEXT: bnez a6, .LBB38_2
; RV32SFB-NEXT: # %bb.1: # %entry
-; RV32SFB-NEXT: xnor a4, a0, a2
+; RV32SFB-NEXT: xnor a5, a1, a3
; RV32SFB-NEXT: .LBB38_2: # %entry
; RV32SFB-NEXT: bnez a6, .LBB38_4
; RV32SFB-NEXT: # %bb.3: # %entry
-; RV32SFB-NEXT: xnor a5, a1, a3
+; RV32SFB-NEXT: xnor a4, a0, a2
; RV32SFB-NEXT: .LBB38_4: # %entry
; RV32SFB-NEXT: mv a0, a4
; RV32SFB-NEXT: mv a1, a5
diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
index c7e57021b90dc1..3f0f9dc0e1e27c 100644
--- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
@@ -931,8 +931,8 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lh a2, 4(a1)
; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: lh a2, 4(a1)
; RV32I-NEXT: lh a0, 8(a1)
; RV32I-NEXT: lh s1, 12(a1)
; RV32I-NEXT: srli a1, a2, 17
@@ -1003,8 +1003,8 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lh a2, 8(a1)
; RV64I-NEXT: mv s0, a0
+; RV64I-NEXT: lh a2, 8(a1)
; RV64I-NEXT: lh a0, 16(a1)
; RV64I-NEXT: lh s1, 24(a1)
; RV64I-NEXT: srli a1, a2, 49
diff --git a/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll b/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll
index a4f56b6d28409c..9a312d9daca8db 100644
--- a/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll
+++ b/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll
@@ -39,9 +39,9 @@ define void @foo2(ptr nocapture %p, double %d) nounwind {
; RV32ZDINX-LABEL: foo2:
; RV32ZDINX: # %bb.0: # %entry
; RV32ZDINX-NEXT: mv a3, a2
+; RV32ZDINX-NEXT: addi a0, a0, 2047
; RV32ZDINX-NEXT: mv a2, a1
; RV32ZDINX-NEXT: fadd.d a2, a2, a2
-; RV32ZDINX-NEXT: addi a0, a0, 2047
; RV32ZDINX-NEXT: sw a2, -3(a0)
; RV32ZDINX-NEXT: sw a3, 1(a0)
; RV32ZDINX-NEXT: ret
@@ -49,9 +49,9 @@ define void @foo2(ptr nocapture %p, double %d) nounwind {
; RV32ZDINXUALIGNED-LABEL: foo2:
; RV32ZDINXUALIGNED: # %bb.0: # %entry
; RV32ZDINXUALIGNED-NEXT: mv a3, a2
+; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047
; RV32ZDINXUALIGNED-NEXT: mv a2, a1
; RV32ZDINXUALIGNED-NEXT: fadd.d a2, a2, a2
-; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047
; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0)
; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0)
; RV32ZDINXUALIGNED-NEXT: ret
@@ -183,10 +183,10 @@ entry:
define void @foo6(ptr %p, double %d) nounwind {
; RV32ZDINX-LABEL: foo6:
; RV32ZDINX: # %bb.0: # %entry
-; RV32ZDINX-NEXT: lui a3, %hi(.LCPI5_0)
-; RV32ZDINX-NEXT: lw a4, %lo(.LCPI5_0)(a3)
-; RV32ZDINX-NEXT: lw a5, %lo(.LCPI5_0+4)(a3)
; RV32ZDINX-NEXT: mv a3, a2
+; RV32ZDINX-NEXT: lui a2, %hi(.LCPI5_0)
+; RV32ZDINX-NEXT: lw a4, %lo(.LCPI5_0)(a2)
+; RV32ZDINX-NEXT: lw a5, %lo(.LCPI5_0+4)(a2)
; RV32ZDINX-NEXT: mv a2, a1
; RV32ZDINX-NEXT: fadd.d a2, a2, a4
; RV32ZDINX-NEXT: addi a0, a0, 2047
@@ -196,10 +196,10 @@ define void @foo6(ptr %p, double %d) nounwind {
;
; RV32ZDINXUALIGNED-LABEL: foo6:
; RV32ZDINXUALIGNED: # %bb.0: # %entry
-; RV32ZDINXUALIGNED-NEXT: lui a3, %hi(.LCPI5_0)
-; RV32ZDINXUALIGNED-NEXT: lw a4, %lo(.LCPI5_0)(a3)
-; RV32ZDINXUALIGNED-NEXT: lw a5, %lo(.LCPI5_0+4)(a3)
; RV32ZDINXUALIGNED-NEXT: mv a3, a2
+; RV32ZDINXUALIGNED-NEXT: lui a2, %hi(.LCPI5_0)
+; RV32ZDINXUALIGNED-NEXT: lw a4, %lo(.LCPI5_0)(a2)
+; RV32ZDINXUALIGNED-NEXT: lw a5, %lo(.LCPI5_0+4)(a2)
; RV32ZDINXUALIGNED-NEXT: mv a2, a1
; RV32ZDINXUALIGNED-NEXT: fadd.d a2, a2, a4
; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047
More information about the llvm-commits
mailing list