[llvm] [RISCV][RFC] Enable store clustering by default (PR #73796)
Alex Bradbury via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 10 07:28:32 PDT 2024
https://github.com/asb updated https://github.com/llvm/llvm-project/pull/73796
>From 2832ed348a5d7df0ba36690412b04f27f4fc90f8 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Thu, 10 Oct 2024 15:25:07 +0100
Subject: [PATCH] [RISCV] Enable store clustering by default
After #73789 enabled load clustering, do the same for store clustering.
---
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 2 +
llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll | 42 +-
llvm/test/CodeGen/RISCV/abds-neg.ll | 16 +-
llvm/test/CodeGen/RISCV/abds.ll | 148 +-
llvm/test/CodeGen/RISCV/abdu-neg.ll | 8 +-
llvm/test/CodeGen/RISCV/add-before-shl.ll | 4 +-
llvm/test/CodeGen/RISCV/alloca.ll | 16 +-
llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll | 40 +-
llvm/test/CodeGen/RISCV/callee-saved-gprs.ll | 112 +-
.../RISCV/calling-conv-ilp32-ilp32f-common.ll | 72 +-
...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 176 +-
.../test/CodeGen/RISCV/calling-conv-ilp32d.ll | 4 +-
.../test/CodeGen/RISCV/calling-conv-ilp32e.ll | 512 +--
.../calling-conv-lp64-lp64f-lp64d-common.ll | 80 +-
llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll | 20 +-
.../CodeGen/RISCV/calling-conv-rv32f-ilp32.ll | 9 +-
.../RISCV/calling-conv-rv32f-ilp32e.ll | 21 +-
.../RISCV/calling-conv-vector-float.ll | 12 +-
llvm/test/CodeGen/RISCV/calls.ll | 24 +-
.../test/CodeGen/RISCV/double-calling-conv.ll | 28 +-
.../CodeGen/RISCV/double-convert-strict.ll | 4 +-
llvm/test/CodeGen/RISCV/double-convert.ll | 4 +-
llvm/test/CodeGen/RISCV/fastcc-bf16.ll | 18 +-
llvm/test/CodeGen/RISCV/fastcc-float.ll | 18 +-
llvm/test/CodeGen/RISCV/fastcc-half.ll | 18 +-
llvm/test/CodeGen/RISCV/fastcc-int.ll | 12 +-
.../CodeGen/RISCV/fastcc-without-f-reg.ll | 808 ++--
.../test/CodeGen/RISCV/fold-addi-loadstore.ll | 4 +-
llvm/test/CodeGen/RISCV/forced-atomics.ll | 58 +-
llvm/test/CodeGen/RISCV/fp128.ll | 68 +-
llvm/test/CodeGen/RISCV/frame.ll | 12 +-
.../CodeGen/RISCV/get-setcc-result-type.ll | 6 +-
.../CodeGen/RISCV/hoist-global-addr-base.ll | 4 +-
.../RISCV/intrinsic-cttz-elts-vscale.ll | 2 +-
llvm/test/CodeGen/RISCV/legalize-fneg.ll | 12 +-
llvm/test/CodeGen/RISCV/llvm.exp10.ll | 54 +-
llvm/test/CodeGen/RISCV/llvm.frexp.ll | 468 +--
llvm/test/CodeGen/RISCV/memcpy.ll | 174 +-
llvm/test/CodeGen/RISCV/memset-inline.ll | 1034 ++---
llvm/test/CodeGen/RISCV/neg-abs.ll | 10 +-
llvm/test/CodeGen/RISCV/nontemporal.ll | 2960 +++++++-------
.../test/CodeGen/RISCV/overflow-intrinsics.ll | 4 +-
llvm/test/CodeGen/RISCV/pr63816.ll | 12 +-
llvm/test/CodeGen/RISCV/push-pop-popret.ll | 54 +-
.../CodeGen/RISCV/riscv-codegenprepare-asm.ll | 2 +-
.../CodeGen/RISCV/riscv-shifted-extend.ll | 8 +-
llvm/test/CodeGen/RISCV/rv32zbb.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/binop-splats.ll | 12 +-
.../CodeGen/RISCV/rvv/bitreverse-sdnode.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll | 20 +-
llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll | 22 +-
llvm/test/CodeGen/RISCV/rvv/combine-store.ll | 8 +-
.../RISCV/rvv/fixed-vectors-binop-splats.ll | 8 +-
.../RISCV/rvv/fixed-vectors-bitreverse-vp.ll | 44 +-
.../RISCV/rvv/fixed-vectors-bitreverse.ll | 4 +-
.../RISCV/rvv/fixed-vectors-bswap-vp.ll | 20 +-
.../CodeGen/RISCV/rvv/fixed-vectors-bswap.ll | 4 +-
.../rvv/fixed-vectors-calling-conv-fastcc.ll | 6 +-
.../RISCV/rvv/fixed-vectors-ctlz-vp.ll | 96 +-
.../RISCV/rvv/fixed-vectors-ctpop-vp.ll | 139 +-
.../RISCV/rvv/fixed-vectors-cttz-vp.ll | 96 +-
.../CodeGen/RISCV/rvv/fixed-vectors-elen.ll | 8 +-
.../RISCV/rvv/fixed-vectors-expandload-int.ll | 8 +-
.../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 352 +-
.../CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll | 48 +-
.../CodeGen/RISCV/rvv/fixed-vectors-insert.ll | 12 +-
.../RISCV/rvv/fixed-vectors-int-buildvec.ll | 66 +-
.../RISCV/rvv/fixed-vectors-int-splat.ll | 6 +-
.../CodeGen/RISCV/rvv/fixed-vectors-int.ll | 4 +-
.../CodeGen/RISCV/rvv/fixed-vectors-llrint.ll | 140 +-
.../CodeGen/RISCV/rvv/fixed-vectors-lrint.ll | 128 +-
.../RISCV/rvv/fixed-vectors-masked-gather.ll | 520 +--
.../RISCV/rvv/fixed-vectors-masked-scatter.ll | 132 +-
.../rvv/fixed-vectors-reduction-int-vp.ll | 32 +-
.../RISCV/rvv/fixed-vectors-setcc-int-vp.ll | 36 +-
.../rvv/fixed-vectors-shuffle-vslide1down.ll | 4 +-
.../rvv/fixed-vectors-shuffle-vslide1up.ll | 4 +-
.../rvv/fixed-vectors-shufflevector-vnsrl.ll | 32 +-
.../fixed-vectors-strided-load-store-asm.ll | 12 +-
.../RISCV/rvv/fixed-vectors-unaligned.ll | 48 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll | 4 +-
.../RISCV/rvv/fixed-vectors-vadd-vp.ll | 16 +-
.../RISCV/rvv/fixed-vectors-vand-vp.ll | 20 +-
.../RISCV/rvv/fixed-vectors-vdiv-vp.ll | 16 +-
.../RISCV/rvv/fixed-vectors-vdivu-vp.ll | 16 +-
.../RISCV/rvv/fixed-vectors-vmacc-vp.ll | 18 +-
.../RISCV/rvv/fixed-vectors-vmax-vp.ll | 16 +-
.../RISCV/rvv/fixed-vectors-vmaxu-vp.ll | 16 +-
.../RISCV/rvv/fixed-vectors-vmin-vp.ll | 16 +-
.../RISCV/rvv/fixed-vectors-vminu-vp.ll | 16 +-
.../RISCV/rvv/fixed-vectors-vmul-vp.ll | 16 +-
.../RISCV/rvv/fixed-vectors-vnmsac-vp.ll | 18 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll | 16 +-
.../RISCV/rvv/fixed-vectors-vp-splat.ll | 8 +-
.../RISCV/rvv/fixed-vectors-vpmerge.ll | 16 +-
.../RISCV/rvv/fixed-vectors-vrem-vp.ll | 16 +-
.../RISCV/rvv/fixed-vectors-vremu-vp.ll | 16 +-
.../RISCV/rvv/fixed-vectors-vrsub-vp.ll | 16 +-
.../RISCV/rvv/fixed-vectors-vsadd-vp.ll | 16 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll | 8 +-
.../RISCV/rvv/fixed-vectors-vsaddu-vp.ll | 16 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll | 8 +-
.../RISCV/rvv/fixed-vectors-vssub-vp.ll | 16 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vssub.ll | 8 +-
.../RISCV/rvv/fixed-vectors-vssubu-vp.ll | 16 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll | 8 +-
.../RISCV/rvv/fixed-vectors-vsub-vp.ll | 16 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll | 8 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll | 14 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll | 8 +-
.../RISCV/rvv/fixed-vectors-vwmulsu.ll | 6 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll | 8 +-
.../CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll | 14 +-
.../RISCV/rvv/fixed-vectors-vxor-vp.ll | 16 +-
.../CodeGen/RISCV/rvv/fpclamptosat_vec.ll | 360 +-
llvm/test/CodeGen/RISCV/rvv/masked-tama.ll | 4 +-
llvm/test/CodeGen/RISCV/rvv/memset-inline.ll | 88 +-
.../CodeGen/RISCV/rvv/no-reserved-frame.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/pr104480.ll | 4 +-
.../test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll | 2 +-
llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll | 72 +-
llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll | 40 +-
.../rvv/splat-vector-split-i64-vl-sdnode.ll | 2 +-
llvm/test/CodeGen/RISCV/rvv/stepvector.ll | 34 +-
llvm/test/CodeGen/RISCV/rvv/unmasked-ta.ll | 10 +-
llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll | 18 +-
llvm/test/CodeGen/RISCV/rvv/vaadd.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll | 4 +-
llvm/test/CodeGen/RISCV/rvv/vaaddu.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vadc.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll | 10 +-
llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vadd.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll | 10 +-
llvm/test/CodeGen/RISCV/rvv/vand-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vand.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll | 32 +-
llvm/test/CodeGen/RISCV/rvv/vandn-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vandn.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vasub.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vasubu.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vclmul.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vclmulh.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll | 32 +-
llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vdiv.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll | 24 +-
llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vdivu.ll | 16 +-
.../CodeGen/RISCV/rvv/vec3-setcc-crash.ll | 4 +-
llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll | 24 +-
llvm/test/CodeGen/RISCV/rvv/vmacc.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vmadc.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll | 24 +-
llvm/test/CodeGen/RISCV/rvv/vmadd.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vmax.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vmaxu.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vmin.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vminu.ll | 16 +-
.../test/CodeGen/RISCV/rvv/vmsbc.borrow.in.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vmsbc.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vmseq.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/vmsge.ll | 18 +-
llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll | 18 +-
llvm/test/CodeGen/RISCV/rvv/vmsgt.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/vmsle.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/vmsleu.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/vmslt.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/vmsltu.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/vmsne.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll | 14 +-
llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vmul.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vmulh.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vmulhsu.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vmulhu.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vmv.s.x.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vmv.v.x.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll | 24 +-
llvm/test/CodeGen/RISCV/rvv/vnmsac.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vnmsub.ll | 12 +-
llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll | 10 +-
llvm/test/CodeGen/RISCV/rvv/vor-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vor.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vp-splat.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 8 +-
.../CodeGen/RISCV/rvv/vreductions-int-vp.ll | 60 +-
llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll | 32 +-
llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vrem.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll | 24 +-
llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vremu.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vrsub.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vsadd.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vsaddu.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vsbc.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vselect-int.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vsmul.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vssub.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll | 8 +-
llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vssubu.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll | 10 +-
llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vsub.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll | 4 +-
llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll | 10 +-
llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll | 16 +-
llvm/test/CodeGen/RISCV/rvv/vxor.ll | 16 +-
.../CodeGen/RISCV/select-optimize-multiple.ll | 14 +-
llvm/test/CodeGen/RISCV/shifts.ll | 72 +-
llvm/test/CodeGen/RISCV/split-offsets.ll | 12 +-
llvm/test/CodeGen/RISCV/split-store.ll | 4 +-
.../CodeGen/RISCV/srem-seteq-illegal-types.ll | 96 +-
llvm/test/CodeGen/RISCV/srem-vector-lkk.ll | 230 +-
llvm/test/CodeGen/RISCV/stack-slot-size.ll | 32 +-
llvm/test/CodeGen/RISCV/stack-store-check.ll | 484 +--
llvm/test/CodeGen/RISCV/tail-calls.ll | 32 +-
.../CodeGen/RISCV/unaligned-load-store.ll | 140 +-
.../CodeGen/RISCV/urem-seteq-illegal-types.ll | 36 +-
llvm/test/CodeGen/RISCV/urem-vector-lkk.ll | 216 +-
llvm/test/CodeGen/RISCV/vararg-ilp32e.ll | 12 +-
llvm/test/CodeGen/RISCV/vararg.ll | 1052 ++---
...lar-shift-by-byte-multiple-legalization.ll | 3522 +++++++++--------
.../RISCV/wide-scalar-shift-legalization.ll | 1296 +++---
llvm/test/CodeGen/RISCV/xtheadmempair.ll | 4 +-
249 files changed, 9573 insertions(+), 9586 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 2dcac1320417c2..0d05bd06eba2e4 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -351,6 +351,8 @@ class RISCVPassConfig : public TargetPassConfig {
DAG = createGenericSchedLive(C);
DAG->addMutation(createLoadClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
+ DAG->addMutation(createStoreClusterDAGMutation(
+ DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
}
return DAG;
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll
index 3df4aca40ec942..6c0e322a252c72 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll
@@ -69,12 +69,12 @@ define i32 @va1(ptr %fmt, ...) {
; RV64-NEXT: sd a2, 32(sp)
; RV64-NEXT: sd a3, 40(sp)
; RV64-NEXT: sd a4, 48(sp)
-; RV64-NEXT: sd a5, 56(sp)
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: addi a1, sp, 24
; RV64-NEXT: sd a1, 8(sp)
; RV64-NEXT: lw a0, 4(a0)
; RV64-NEXT: lwu a1, 8(sp)
+; RV64-NEXT: sd a5, 56(sp)
; RV64-NEXT: sd a6, 64(sp)
; RV64-NEXT: sd a7, 72(sp)
; RV64-NEXT: slli a0, a0, 32
@@ -129,12 +129,12 @@ define i32 @va1(ptr %fmt, ...) {
; RV64-WITHFP-NEXT: sd a2, 16(s0)
; RV64-WITHFP-NEXT: sd a3, 24(s0)
; RV64-WITHFP-NEXT: sd a4, 32(s0)
-; RV64-WITHFP-NEXT: sd a5, 40(s0)
; RV64-WITHFP-NEXT: addi a0, s0, -24
; RV64-WITHFP-NEXT: addi a1, s0, 8
; RV64-WITHFP-NEXT: sd a1, -24(s0)
; RV64-WITHFP-NEXT: lw a0, 4(a0)
; RV64-WITHFP-NEXT: lwu a1, -24(s0)
+; RV64-WITHFP-NEXT: sd a5, 40(s0)
; RV64-WITHFP-NEXT: sd a6, 48(s0)
; RV64-WITHFP-NEXT: sd a7, 56(s0)
; RV64-WITHFP-NEXT: slli a0, a0, 32
@@ -844,11 +844,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; ILP32-LABEL: va3:
; ILP32: # %bb.0:
; ILP32-NEXT: addi sp, sp, -32
-; ILP32-NEXT: sw a3, 12(sp)
-; ILP32-NEXT: sw a4, 16(sp)
; ILP32-NEXT: addi a0, sp, 12
; ILP32-NEXT: sw a0, 4(sp)
; ILP32-NEXT: lw a0, 4(sp)
+; ILP32-NEXT: sw a3, 12(sp)
+; ILP32-NEXT: sw a4, 16(sp)
; ILP32-NEXT: sw a5, 20(sp)
; ILP32-NEXT: sw a6, 24(sp)
; ILP32-NEXT: sw a7, 28(sp)
@@ -868,11 +868,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV32D-ILP32-LABEL: va3:
; RV32D-ILP32: # %bb.0:
; RV32D-ILP32-NEXT: addi sp, sp, -48
-; RV32D-ILP32-NEXT: sw a3, 28(sp)
-; RV32D-ILP32-NEXT: sw a4, 32(sp)
; RV32D-ILP32-NEXT: addi a0, sp, 28
; RV32D-ILP32-NEXT: sw a0, 20(sp)
; RV32D-ILP32-NEXT: lw a0, 20(sp)
+; RV32D-ILP32-NEXT: sw a3, 28(sp)
+; RV32D-ILP32-NEXT: sw a4, 32(sp)
; RV32D-ILP32-NEXT: sw a5, 36(sp)
; RV32D-ILP32-NEXT: sw a6, 40(sp)
; RV32D-ILP32-NEXT: sw a7, 44(sp)
@@ -894,11 +894,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV32D-ILP32F-LABEL: va3:
; RV32D-ILP32F: # %bb.0:
; RV32D-ILP32F-NEXT: addi sp, sp, -48
-; RV32D-ILP32F-NEXT: sw a3, 28(sp)
-; RV32D-ILP32F-NEXT: sw a4, 32(sp)
; RV32D-ILP32F-NEXT: addi a0, sp, 28
; RV32D-ILP32F-NEXT: sw a0, 20(sp)
; RV32D-ILP32F-NEXT: lw a0, 20(sp)
+; RV32D-ILP32F-NEXT: sw a3, 28(sp)
+; RV32D-ILP32F-NEXT: sw a4, 32(sp)
; RV32D-ILP32F-NEXT: sw a5, 36(sp)
; RV32D-ILP32F-NEXT: sw a6, 40(sp)
; RV32D-ILP32F-NEXT: sw a7, 44(sp)
@@ -920,11 +920,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV32D-ILP32D-LABEL: va3:
; RV32D-ILP32D: # %bb.0:
; RV32D-ILP32D-NEXT: addi sp, sp, -48
-; RV32D-ILP32D-NEXT: sw a3, 28(sp)
-; RV32D-ILP32D-NEXT: sw a4, 32(sp)
; RV32D-ILP32D-NEXT: addi a0, sp, 28
; RV32D-ILP32D-NEXT: sw a0, 20(sp)
; RV32D-ILP32D-NEXT: lw a0, 20(sp)
+; RV32D-ILP32D-NEXT: sw a3, 28(sp)
+; RV32D-ILP32D-NEXT: sw a4, 32(sp)
; RV32D-ILP32D-NEXT: sw a5, 36(sp)
; RV32D-ILP32D-NEXT: sw a6, 40(sp)
; RV32D-ILP32D-NEXT: sw a7, 44(sp)
@@ -946,12 +946,12 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV64-LABEL: va3:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -64
-; RV64-NEXT: sd a2, 16(sp)
-; RV64-NEXT: sd a3, 24(sp)
-; RV64-NEXT: sd a4, 32(sp)
; RV64-NEXT: addi a0, sp, 16
; RV64-NEXT: sd a0, 8(sp)
; RV64-NEXT: ld a0, 8(sp)
+; RV64-NEXT: sd a2, 16(sp)
+; RV64-NEXT: sd a3, 24(sp)
+; RV64-NEXT: sd a4, 32(sp)
; RV64-NEXT: sd a5, 40(sp)
; RV64-NEXT: sd a6, 48(sp)
; RV64-NEXT: sd a7, 56(sp)
@@ -970,11 +970,11 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV32-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill
; RV32-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill
; RV32-WITHFP-NEXT: addi s0, sp, 24
-; RV32-WITHFP-NEXT: sw a3, 4(s0)
-; RV32-WITHFP-NEXT: sw a4, 8(s0)
; RV32-WITHFP-NEXT: addi a0, s0, 4
; RV32-WITHFP-NEXT: sw a0, -12(s0)
; RV32-WITHFP-NEXT: lw a0, -12(s0)
+; RV32-WITHFP-NEXT: sw a3, 4(s0)
+; RV32-WITHFP-NEXT: sw a4, 8(s0)
; RV32-WITHFP-NEXT: sw a5, 12(s0)
; RV32-WITHFP-NEXT: sw a6, 16(s0)
; RV32-WITHFP-NEXT: sw a7, 20(s0)
@@ -999,12 +999,12 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV64-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64-WITHFP-NEXT: addi s0, sp, 32
-; RV64-WITHFP-NEXT: sd a2, 0(s0)
-; RV64-WITHFP-NEXT: sd a3, 8(s0)
-; RV64-WITHFP-NEXT: sd a4, 16(s0)
; RV64-WITHFP-NEXT: mv a0, s0
; RV64-WITHFP-NEXT: sd a0, -24(s0)
; RV64-WITHFP-NEXT: ld a0, -24(s0)
+; RV64-WITHFP-NEXT: sd a2, 0(s0)
+; RV64-WITHFP-NEXT: sd a3, 8(s0)
+; RV64-WITHFP-NEXT: sd a4, 16(s0)
; RV64-WITHFP-NEXT: sd a5, 24(s0)
; RV64-WITHFP-NEXT: sd a6, 32(s0)
; RV64-WITHFP-NEXT: sd a7, 40(s0)
@@ -1622,9 +1622,6 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; RV64-NEXT: lui a0, 24414
; RV64-NEXT: add a0, sp, a0
; RV64-NEXT: sd a4, 304(a0)
-; RV64-NEXT: lui a0, 24414
-; RV64-NEXT: add a0, sp, a0
-; RV64-NEXT: sd a5, 312(a0)
; RV64-NEXT: addi a0, sp, 8
; RV64-NEXT: lui a1, 24414
; RV64-NEXT: addiw a1, a1, 280
@@ -1634,6 +1631,9 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; RV64-NEXT: lwu a1, 8(sp)
; RV64-NEXT: lui a2, 24414
; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: sd a5, 312(a2)
+; RV64-NEXT: lui a2, 24414
+; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: sd a6, 320(a2)
; RV64-NEXT: lui a2, 24414
; RV64-NEXT: add a2, sp, a2
diff --git a/llvm/test/CodeGen/RISCV/abds-neg.ll b/llvm/test/CodeGen/RISCV/abds-neg.ll
index b6064198935a61..e7fd87bd783876 100644
--- a/llvm/test/CodeGen/RISCV/abds-neg.ll
+++ b/llvm/test/CodeGen/RISCV/abds-neg.ll
@@ -705,8 +705,8 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a4, a4, a3
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: sw a1, 0(a0)
-; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a2, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a5, 12(a0)
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
@@ -824,8 +824,8 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a4, a4, a3
; RV32ZBB-NEXT: neg a1, a1
; RV32ZBB-NEXT: sw a1, 0(a0)
-; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a2, 4(a0)
+; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a5, 12(a0)
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBB-NEXT: addi sp, sp, 16
@@ -952,8 +952,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a4, a4, a3
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: sw a1, 0(a0)
-; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a2, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a5, 12(a0)
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
@@ -1071,8 +1071,8 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a4, a4, a3
; RV32ZBB-NEXT: neg a1, a1
; RV32ZBB-NEXT: sw a1, 0(a0)
-; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a2, 4(a0)
+; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a5, 12(a0)
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBB-NEXT: addi sp, sp, 16
@@ -1918,9 +1918,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a1, a1, t2
; RV32I-NEXT: sub a2, a2, a3
; RV32I-NEXT: .LBB22_11:
-; RV32I-NEXT: sw a6, 8(a0)
-; RV32I-NEXT: sw a1, 4(a0)
; RV32I-NEXT: sw a2, 0(a0)
+; RV32I-NEXT: sw a1, 4(a0)
+; RV32I-NEXT: sw a6, 8(a0)
; RV32I-NEXT: sw a5, 12(a0)
; RV32I-NEXT: ret
;
@@ -2005,9 +2005,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a1, a1, t2
; RV32ZBB-NEXT: sub a2, a2, a3
; RV32ZBB-NEXT: .LBB22_11:
-; RV32ZBB-NEXT: sw a6, 8(a0)
-; RV32ZBB-NEXT: sw a1, 4(a0)
; RV32ZBB-NEXT: sw a2, 0(a0)
+; RV32ZBB-NEXT: sw a1, 4(a0)
+; RV32ZBB-NEXT: sw a6, 8(a0)
; RV32ZBB-NEXT: sw a5, 12(a0)
; RV32ZBB-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/abds.ll b/llvm/test/CodeGen/RISCV/abds.ll
index 91b044902a5201..e639d4b30d4c94 100644
--- a/llvm/test/CodeGen/RISCV/abds.ll
+++ b/llvm/test/CodeGen/RISCV/abds.ll
@@ -599,9 +599,9 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a1, a4, t5
; RV32I-NEXT: sub a4, a6, t6
; RV32I-NEXT: .LBB11_13:
-; RV32I-NEXT: sw a4, 8(a0)
-; RV32I-NEXT: sw a1, 4(a0)
; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a1, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a2, 12(a0)
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
@@ -695,9 +695,9 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a1, a4, t5
; RV32ZBB-NEXT: sub a4, a6, t6
; RV32ZBB-NEXT: .LBB11_13:
-; RV32ZBB-NEXT: sw a4, 8(a0)
-; RV32ZBB-NEXT: sw a1, 4(a0)
; RV32ZBB-NEXT: sw a3, 0(a0)
+; RV32ZBB-NEXT: sw a1, 4(a0)
+; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a2, 12(a0)
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBB-NEXT: addi sp, sp, 16
@@ -799,9 +799,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a1, a4, t5
; RV32I-NEXT: sub a4, a6, t6
; RV32I-NEXT: .LBB12_13:
-; RV32I-NEXT: sw a4, 8(a0)
-; RV32I-NEXT: sw a1, 4(a0)
; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a1, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a2, 12(a0)
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
@@ -895,9 +895,9 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a1, a4, t5
; RV32ZBB-NEXT: sub a4, a6, t6
; RV32ZBB-NEXT: .LBB12_13:
-; RV32ZBB-NEXT: sw a4, 8(a0)
-; RV32ZBB-NEXT: sw a1, 4(a0)
; RV32ZBB-NEXT: sw a3, 0(a0)
+; RV32ZBB-NEXT: sw a1, 4(a0)
+; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a2, 12(a0)
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBB-NEXT: addi sp, sp, 16
@@ -1188,9 +1188,9 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a1, a4, t5
; RV32I-NEXT: sub a4, a6, t6
; RV32I-NEXT: .LBB17_13:
-; RV32I-NEXT: sw a4, 8(a0)
-; RV32I-NEXT: sw a1, 4(a0)
; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a1, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a2, 12(a0)
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
@@ -1284,9 +1284,9 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a1, a4, t5
; RV32ZBB-NEXT: sub a4, a6, t6
; RV32ZBB-NEXT: .LBB17_13:
-; RV32ZBB-NEXT: sw a4, 8(a0)
-; RV32ZBB-NEXT: sw a1, 4(a0)
; RV32ZBB-NEXT: sw a3, 0(a0)
+; RV32ZBB-NEXT: sw a1, 4(a0)
+; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a2, 12(a0)
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBB-NEXT: addi sp, sp, 16
@@ -1579,9 +1579,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a1, a4, t5
; RV32I-NEXT: sub a4, a6, t6
; RV32I-NEXT: .LBB22_13:
-; RV32I-NEXT: sw a4, 8(a0)
-; RV32I-NEXT: sw a1, 4(a0)
; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a1, 4(a0)
+; RV32I-NEXT: sw a4, 8(a0)
; RV32I-NEXT: sw a2, 12(a0)
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
@@ -1675,9 +1675,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a1, a4, t5
; RV32ZBB-NEXT: sub a4, a6, t6
; RV32ZBB-NEXT: .LBB22_13:
-; RV32ZBB-NEXT: sw a4, 8(a0)
-; RV32ZBB-NEXT: sw a1, 4(a0)
; RV32ZBB-NEXT: sw a3, 0(a0)
+; RV32ZBB-NEXT: sw a1, 4(a0)
+; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a2, 12(a0)
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBB-NEXT: addi sp, sp, 16
@@ -2043,8 +2043,8 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind {
define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_subnsw_i128:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a3, 0(a2)
-; RV32I-NEXT: lw a4, 4(a2)
+; RV32I-NEXT: lw a4, 0(a2)
+; RV32I-NEXT: lw a3, 4(a2)
; RV32I-NEXT: lw a6, 8(a2)
; RV32I-NEXT: lw t0, 12(a2)
; RV32I-NEXT: lw a2, 8(a1)
@@ -2053,24 +2053,24 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: lw a7, 4(a1)
; RV32I-NEXT: sltu a1, a2, a6
; RV32I-NEXT: sub t1, t1, t0
-; RV32I-NEXT: sltu t0, a5, a3
+; RV32I-NEXT: sltu t0, a5, a4
; RV32I-NEXT: sub a1, t1, a1
; RV32I-NEXT: mv t1, t0
-; RV32I-NEXT: beq a7, a4, .LBB31_2
+; RV32I-NEXT: beq a7, a3, .LBB31_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu t1, a7, a4
+; RV32I-NEXT: sltu t1, a7, a3
; RV32I-NEXT: .LBB31_2:
; RV32I-NEXT: sub a2, a2, a6
; RV32I-NEXT: sltu a6, a2, t1
; RV32I-NEXT: sub a1, a1, a6
; RV32I-NEXT: sub a2, a2, t1
-; RV32I-NEXT: sub a4, a7, a4
-; RV32I-NEXT: sub a4, a4, t0
-; RV32I-NEXT: sub a3, a5, a3
+; RV32I-NEXT: sub a3, a7, a3
+; RV32I-NEXT: sub a3, a3, t0
+; RV32I-NEXT: sub a4, a5, a4
; RV32I-NEXT: bgez a1, .LBB31_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: snez a5, a4
-; RV32I-NEXT: snez a6, a3
+; RV32I-NEXT: snez a5, a3
+; RV32I-NEXT: snez a6, a4
; RV32I-NEXT: or a5, a6, a5
; RV32I-NEXT: neg a7, a2
; RV32I-NEXT: sltu t0, a7, a5
@@ -2079,12 +2079,12 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: sub a1, a1, t0
; RV32I-NEXT: sub a2, a7, a5
-; RV32I-NEXT: neg a3, a3
-; RV32I-NEXT: add a4, a4, a6
; RV32I-NEXT: neg a4, a4
+; RV32I-NEXT: add a3, a3, a6
+; RV32I-NEXT: neg a3, a3
; RV32I-NEXT: .LBB31_4:
-; RV32I-NEXT: sw a4, 4(a0)
-; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a4, 0(a0)
+; RV32I-NEXT: sw a3, 4(a0)
; RV32I-NEXT: sw a2, 8(a0)
; RV32I-NEXT: sw a1, 12(a0)
; RV32I-NEXT: ret
@@ -2106,8 +2106,8 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
;
; RV32ZBB-LABEL: abd_subnsw_i128:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a3, 0(a2)
-; RV32ZBB-NEXT: lw a4, 4(a2)
+; RV32ZBB-NEXT: lw a4, 0(a2)
+; RV32ZBB-NEXT: lw a3, 4(a2)
; RV32ZBB-NEXT: lw a6, 8(a2)
; RV32ZBB-NEXT: lw t0, 12(a2)
; RV32ZBB-NEXT: lw a2, 8(a1)
@@ -2116,24 +2116,24 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: lw a7, 4(a1)
; RV32ZBB-NEXT: sltu a1, a2, a6
; RV32ZBB-NEXT: sub t1, t1, t0
-; RV32ZBB-NEXT: sltu t0, a5, a3
+; RV32ZBB-NEXT: sltu t0, a5, a4
; RV32ZBB-NEXT: sub a1, t1, a1
; RV32ZBB-NEXT: mv t1, t0
-; RV32ZBB-NEXT: beq a7, a4, .LBB31_2
+; RV32ZBB-NEXT: beq a7, a3, .LBB31_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu t1, a7, a4
+; RV32ZBB-NEXT: sltu t1, a7, a3
; RV32ZBB-NEXT: .LBB31_2:
; RV32ZBB-NEXT: sub a2, a2, a6
; RV32ZBB-NEXT: sltu a6, a2, t1
; RV32ZBB-NEXT: sub a1, a1, a6
; RV32ZBB-NEXT: sub a2, a2, t1
-; RV32ZBB-NEXT: sub a4, a7, a4
-; RV32ZBB-NEXT: sub a4, a4, t0
-; RV32ZBB-NEXT: sub a3, a5, a3
+; RV32ZBB-NEXT: sub a3, a7, a3
+; RV32ZBB-NEXT: sub a3, a3, t0
+; RV32ZBB-NEXT: sub a4, a5, a4
; RV32ZBB-NEXT: bgez a1, .LBB31_4
; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: snez a5, a4
-; RV32ZBB-NEXT: snez a6, a3
+; RV32ZBB-NEXT: snez a5, a3
+; RV32ZBB-NEXT: snez a6, a4
; RV32ZBB-NEXT: or a5, a6, a5
; RV32ZBB-NEXT: neg a7, a2
; RV32ZBB-NEXT: sltu t0, a7, a5
@@ -2142,12 +2142,12 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: neg a1, a1
; RV32ZBB-NEXT: sub a1, a1, t0
; RV32ZBB-NEXT: sub a2, a7, a5
-; RV32ZBB-NEXT: neg a3, a3
-; RV32ZBB-NEXT: add a4, a4, a6
; RV32ZBB-NEXT: neg a4, a4
+; RV32ZBB-NEXT: add a3, a3, a6
+; RV32ZBB-NEXT: neg a3, a3
; RV32ZBB-NEXT: .LBB31_4:
-; RV32ZBB-NEXT: sw a4, 4(a0)
-; RV32ZBB-NEXT: sw a3, 0(a0)
+; RV32ZBB-NEXT: sw a4, 0(a0)
+; RV32ZBB-NEXT: sw a3, 4(a0)
; RV32ZBB-NEXT: sw a2, 8(a0)
; RV32ZBB-NEXT: sw a1, 12(a0)
; RV32ZBB-NEXT: ret
@@ -2174,8 +2174,8 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_subnsw_i128_undef:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a3, 0(a2)
-; RV32I-NEXT: lw a4, 4(a2)
+; RV32I-NEXT: lw a4, 0(a2)
+; RV32I-NEXT: lw a3, 4(a2)
; RV32I-NEXT: lw a6, 8(a2)
; RV32I-NEXT: lw t0, 12(a2)
; RV32I-NEXT: lw a2, 8(a1)
@@ -2184,24 +2184,24 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: lw a7, 4(a1)
; RV32I-NEXT: sltu a1, a2, a6
; RV32I-NEXT: sub t1, t1, t0
-; RV32I-NEXT: sltu t0, a5, a3
+; RV32I-NEXT: sltu t0, a5, a4
; RV32I-NEXT: sub a1, t1, a1
; RV32I-NEXT: mv t1, t0
-; RV32I-NEXT: beq a7, a4, .LBB32_2
+; RV32I-NEXT: beq a7, a3, .LBB32_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu t1, a7, a4
+; RV32I-NEXT: sltu t1, a7, a3
; RV32I-NEXT: .LBB32_2:
; RV32I-NEXT: sub a2, a2, a6
; RV32I-NEXT: sltu a6, a2, t1
; RV32I-NEXT: sub a1, a1, a6
; RV32I-NEXT: sub a2, a2, t1
-; RV32I-NEXT: sub a4, a7, a4
-; RV32I-NEXT: sub a4, a4, t0
-; RV32I-NEXT: sub a3, a5, a3
+; RV32I-NEXT: sub a3, a7, a3
+; RV32I-NEXT: sub a3, a3, t0
+; RV32I-NEXT: sub a4, a5, a4
; RV32I-NEXT: bgez a1, .LBB32_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: snez a5, a4
-; RV32I-NEXT: snez a6, a3
+; RV32I-NEXT: snez a5, a3
+; RV32I-NEXT: snez a6, a4
; RV32I-NEXT: or a5, a6, a5
; RV32I-NEXT: neg a7, a2
; RV32I-NEXT: sltu t0, a7, a5
@@ -2210,12 +2210,12 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: sub a1, a1, t0
; RV32I-NEXT: sub a2, a7, a5
-; RV32I-NEXT: neg a3, a3
-; RV32I-NEXT: add a4, a4, a6
; RV32I-NEXT: neg a4, a4
+; RV32I-NEXT: add a3, a3, a6
+; RV32I-NEXT: neg a3, a3
; RV32I-NEXT: .LBB32_4:
-; RV32I-NEXT: sw a4, 4(a0)
-; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a4, 0(a0)
+; RV32I-NEXT: sw a3, 4(a0)
; RV32I-NEXT: sw a2, 8(a0)
; RV32I-NEXT: sw a1, 12(a0)
; RV32I-NEXT: ret
@@ -2237,8 +2237,8 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
;
; RV32ZBB-LABEL: abd_subnsw_i128_undef:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a3, 0(a2)
-; RV32ZBB-NEXT: lw a4, 4(a2)
+; RV32ZBB-NEXT: lw a4, 0(a2)
+; RV32ZBB-NEXT: lw a3, 4(a2)
; RV32ZBB-NEXT: lw a6, 8(a2)
; RV32ZBB-NEXT: lw t0, 12(a2)
; RV32ZBB-NEXT: lw a2, 8(a1)
@@ -2247,24 +2247,24 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: lw a7, 4(a1)
; RV32ZBB-NEXT: sltu a1, a2, a6
; RV32ZBB-NEXT: sub t1, t1, t0
-; RV32ZBB-NEXT: sltu t0, a5, a3
+; RV32ZBB-NEXT: sltu t0, a5, a4
; RV32ZBB-NEXT: sub a1, t1, a1
; RV32ZBB-NEXT: mv t1, t0
-; RV32ZBB-NEXT: beq a7, a4, .LBB32_2
+; RV32ZBB-NEXT: beq a7, a3, .LBB32_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu t1, a7, a4
+; RV32ZBB-NEXT: sltu t1, a7, a3
; RV32ZBB-NEXT: .LBB32_2:
; RV32ZBB-NEXT: sub a2, a2, a6
; RV32ZBB-NEXT: sltu a6, a2, t1
; RV32ZBB-NEXT: sub a1, a1, a6
; RV32ZBB-NEXT: sub a2, a2, t1
-; RV32ZBB-NEXT: sub a4, a7, a4
-; RV32ZBB-NEXT: sub a4, a4, t0
-; RV32ZBB-NEXT: sub a3, a5, a3
+; RV32ZBB-NEXT: sub a3, a7, a3
+; RV32ZBB-NEXT: sub a3, a3, t0
+; RV32ZBB-NEXT: sub a4, a5, a4
; RV32ZBB-NEXT: bgez a1, .LBB32_4
; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: snez a5, a4
-; RV32ZBB-NEXT: snez a6, a3
+; RV32ZBB-NEXT: snez a5, a3
+; RV32ZBB-NEXT: snez a6, a4
; RV32ZBB-NEXT: or a5, a6, a5
; RV32ZBB-NEXT: neg a7, a2
; RV32ZBB-NEXT: sltu t0, a7, a5
@@ -2273,12 +2273,12 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: neg a1, a1
; RV32ZBB-NEXT: sub a1, a1, t0
; RV32ZBB-NEXT: sub a2, a7, a5
-; RV32ZBB-NEXT: neg a3, a3
-; RV32ZBB-NEXT: add a4, a4, a6
; RV32ZBB-NEXT: neg a4, a4
+; RV32ZBB-NEXT: add a3, a3, a6
+; RV32ZBB-NEXT: neg a3, a3
; RV32ZBB-NEXT: .LBB32_4:
-; RV32ZBB-NEXT: sw a4, 4(a0)
-; RV32ZBB-NEXT: sw a3, 0(a0)
+; RV32ZBB-NEXT: sw a4, 0(a0)
+; RV32ZBB-NEXT: sw a3, 4(a0)
; RV32ZBB-NEXT: sw a2, 8(a0)
; RV32ZBB-NEXT: sw a1, 12(a0)
; RV32ZBB-NEXT: ret
@@ -2711,9 +2711,9 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a1, a4, t5
; RV32ZBB-NEXT: sub a4, a6, t6
; RV32ZBB-NEXT: .LBB38_13:
-; RV32ZBB-NEXT: sw a4, 8(a0)
-; RV32ZBB-NEXT: sw a1, 4(a0)
; RV32ZBB-NEXT: sw a3, 0(a0)
+; RV32ZBB-NEXT: sw a1, 4(a0)
+; RV32ZBB-NEXT: sw a4, 8(a0)
; RV32ZBB-NEXT: sw a2, 12(a0)
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBB-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/abdu-neg.ll b/llvm/test/CodeGen/RISCV/abdu-neg.ll
index 54075f41694392..b39285c3d343f5 100644
--- a/llvm/test/CodeGen/RISCV/abdu-neg.ll
+++ b/llvm/test/CodeGen/RISCV/abdu-neg.ll
@@ -1855,9 +1855,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a1, a1, t2
; RV32I-NEXT: sub a2, a2, a3
; RV32I-NEXT: .LBB22_11:
-; RV32I-NEXT: sw a6, 8(a0)
-; RV32I-NEXT: sw a1, 4(a0)
; RV32I-NEXT: sw a2, 0(a0)
+; RV32I-NEXT: sw a1, 4(a0)
+; RV32I-NEXT: sw a6, 8(a0)
; RV32I-NEXT: sw a5, 12(a0)
; RV32I-NEXT: ret
;
@@ -1942,9 +1942,9 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: sub a1, a1, t2
; RV32ZBB-NEXT: sub a2, a2, a3
; RV32ZBB-NEXT: .LBB22_11:
-; RV32ZBB-NEXT: sw a6, 8(a0)
-; RV32ZBB-NEXT: sw a1, 4(a0)
; RV32ZBB-NEXT: sw a2, 0(a0)
+; RV32ZBB-NEXT: sw a1, 4(a0)
+; RV32ZBB-NEXT: sw a6, 8(a0)
; RV32ZBB-NEXT: sw a5, 12(a0)
; RV32ZBB-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll
index 823918f1c42e7a..db7498340d3951 100644
--- a/llvm/test/CodeGen/RISCV/add-before-shl.ll
+++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll
@@ -182,8 +182,8 @@ define i128 @add_wide_operand(i128 %a) nounwind {
; RV32I-NEXT: lui a4, 128
; RV32I-NEXT: add a1, a1, a4
; RV32I-NEXT: sw a2, 0(a0)
-; RV32I-NEXT: sw a3, 8(a0)
; RV32I-NEXT: sw a5, 4(a0)
+; RV32I-NEXT: sw a3, 8(a0)
; RV32I-NEXT: sw a1, 12(a0)
; RV32I-NEXT: jalr zero, 0(ra)
;
@@ -217,8 +217,8 @@ define i128 @add_wide_operand(i128 %a) nounwind {
; RV32C-NEXT: c.or a1, a3
; RV32C-NEXT: c.slli a6, 3
; RV32C-NEXT: sw a6, 0(a0)
-; RV32C-NEXT: c.sw a1, 8(a0)
; RV32C-NEXT: c.sw a4, 4(a0)
+; RV32C-NEXT: c.sw a1, 8(a0)
; RV32C-NEXT: c.sw a2, 12(a0)
; RV32C-NEXT: c.jr ra
;
diff --git a/llvm/test/CodeGen/RISCV/alloca.ll b/llvm/test/CodeGen/RISCV/alloca.ll
index bcb0592c18f59f..975fc93c830af2 100644
--- a/llvm/test/CodeGen/RISCV/alloca.ll
+++ b/llvm/test/CodeGen/RISCV/alloca.ll
@@ -76,13 +76,10 @@ define void @alloca_callframe(i32 %n) nounwind {
; RV32I-NEXT: sub a0, sp, a0
; RV32I-NEXT: mv sp, a0
; RV32I-NEXT: addi sp, sp, -16
-; RV32I-NEXT: li a1, 12
-; RV32I-NEXT: sw a1, 12(sp)
-; RV32I-NEXT: li a1, 11
-; RV32I-NEXT: sw a1, 8(sp)
-; RV32I-NEXT: li a1, 10
-; RV32I-NEXT: sw a1, 4(sp)
-; RV32I-NEXT: li t0, 9
+; RV32I-NEXT: li t0, 12
+; RV32I-NEXT: li t1, 11
+; RV32I-NEXT: li t2, 10
+; RV32I-NEXT: li t3, 9
; RV32I-NEXT: li a1, 2
; RV32I-NEXT: li a2, 3
; RV32I-NEXT: li a3, 4
@@ -90,7 +87,10 @@ define void @alloca_callframe(i32 %n) nounwind {
; RV32I-NEXT: li a5, 6
; RV32I-NEXT: li a6, 7
; RV32I-NEXT: li a7, 8
-; RV32I-NEXT: sw t0, 0(sp)
+; RV32I-NEXT: sw t3, 0(sp)
+; RV32I-NEXT: sw t2, 4(sp)
+; RV32I-NEXT: sw t1, 8(sp)
+; RV32I-NEXT: sw t0, 12(sp)
; RV32I-NEXT: call func
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: addi sp, s0, -16
diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
index 9908503adb9c30..a87b49e61a8dbc 100644
--- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
@@ -4882,8 +4882,8 @@ define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounw
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a4
@@ -4898,8 +4898,8 @@ define void @cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounw
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -16
; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
; RV32IA-NEXT: mv a2, a3
; RV32IA-NEXT: mv a3, a4
@@ -4964,8 +4964,8 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a5, a4
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: mv a2, a3
@@ -4981,8 +4981,8 @@ define void @cmpxchg_i64_acquire_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV32IA-NEXT: addi sp, sp, -16
; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: mv a2, a3
@@ -5057,8 +5057,8 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a6, a4
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: li a5, 2
@@ -5074,8 +5074,8 @@ define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV32IA-NEXT: addi sp, sp, -16
; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a6, a4
-; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: li a5, 2
@@ -5150,8 +5150,8 @@ define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a5, a4
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: li a4, 3
; RV32I-NEXT: mv a2, a3
@@ -5167,8 +5167,8 @@ define void @cmpxchg_i64_release_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV32IA-NEXT: addi sp, sp, -16
; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
; RV32IA-NEXT: li a4, 3
; RV32IA-NEXT: mv a2, a3
@@ -5243,8 +5243,8 @@ define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a6, a4
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: li a4, 3
; RV32I-NEXT: li a5, 2
@@ -5260,8 +5260,8 @@ define void @cmpxchg_i64_release_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV32IA-NEXT: addi sp, sp, -16
; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a6, a4
-; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
; RV32IA-NEXT: li a4, 3
; RV32IA-NEXT: li a5, 2
@@ -5336,8 +5336,8 @@ define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a5, a4
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: mv a2, a3
@@ -5353,8 +5353,8 @@ define void @cmpxchg_i64_acq_rel_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV32IA-NEXT: addi sp, sp, -16
; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: mv a2, a3
@@ -5429,8 +5429,8 @@ define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a6, a4
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: li a5, 2
@@ -5446,8 +5446,8 @@ define void @cmpxchg_i64_acq_rel_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV32IA-NEXT: addi sp, sp, -16
; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a6, a4
-; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: li a5, 2
@@ -5522,8 +5522,8 @@ define void @cmpxchg_i64_seq_cst_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a5, a4
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a2, a3
@@ -5539,8 +5539,8 @@ define void @cmpxchg_i64_seq_cst_monotonic(ptr %ptr, i64 %cmp, i64 %val) nounwin
; RV32IA-NEXT: addi sp, sp, -16
; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: mv a2, a3
@@ -5615,8 +5615,8 @@ define void @cmpxchg_i64_seq_cst_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a6, a4
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 2
@@ -5632,8 +5632,8 @@ define void @cmpxchg_i64_seq_cst_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV32IA-NEXT: addi sp, sp, -16
; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a6, a4
-; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 2
@@ -5708,8 +5708,8 @@ define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv a6, a4
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: sw a1, 0(sp)
+; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
@@ -5725,8 +5725,8 @@ define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind
; RV32IA-NEXT: addi sp, sp, -16
; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv a6, a4
-; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: sw a1, 0(sp)
+; RV32IA-NEXT: sw a2, 4(sp)
; RV32IA-NEXT: mv a1, sp
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
index 6d2263f74062df..f26425c297ab07 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
@@ -2767,14 +2767,14 @@ define void @varargs(...) {
; RV32I-NEXT: .cfi_def_cfa_offset 48
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -36
-; RV32I-NEXT: sw a7, 44(sp)
-; RV32I-NEXT: sw a6, 40(sp)
-; RV32I-NEXT: sw a5, 36(sp)
; RV32I-NEXT: sw a4, 32(sp)
-; RV32I-NEXT: sw a3, 28(sp)
-; RV32I-NEXT: sw a2, 24(sp)
-; RV32I-NEXT: sw a1, 20(sp)
+; RV32I-NEXT: sw a5, 36(sp)
+; RV32I-NEXT: sw a6, 40(sp)
+; RV32I-NEXT: sw a7, 44(sp)
; RV32I-NEXT: sw a0, 16(sp)
+; RV32I-NEXT: sw a1, 20(sp)
+; RV32I-NEXT: sw a2, 24(sp)
+; RV32I-NEXT: sw a3, 28(sp)
; RV32I-NEXT: call callee
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 48
@@ -2786,12 +2786,12 @@ define void @varargs(...) {
; RV32I-ILP32E-NEXT: .cfi_def_cfa_offset 28
; RV32I-ILP32E-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
; RV32I-ILP32E-NEXT: .cfi_offset ra, -28
-; RV32I-ILP32E-NEXT: sw a5, 24(sp)
; RV32I-ILP32E-NEXT: sw a4, 20(sp)
-; RV32I-ILP32E-NEXT: sw a3, 16(sp)
-; RV32I-ILP32E-NEXT: sw a2, 12(sp)
-; RV32I-ILP32E-NEXT: sw a1, 8(sp)
+; RV32I-ILP32E-NEXT: sw a5, 24(sp)
; RV32I-ILP32E-NEXT: sw a0, 4(sp)
+; RV32I-ILP32E-NEXT: sw a1, 8(sp)
+; RV32I-ILP32E-NEXT: sw a2, 12(sp)
+; RV32I-ILP32E-NEXT: sw a3, 16(sp)
; RV32I-ILP32E-NEXT: call callee
; RV32I-ILP32E-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
; RV32I-ILP32E-NEXT: addi sp, sp, 28
@@ -2807,14 +2807,14 @@ define void @varargs(...) {
; RV32I-WITH-FP-NEXT: .cfi_offset s0, -40
; RV32I-WITH-FP-NEXT: addi s0, sp, 16
; RV32I-WITH-FP-NEXT: .cfi_def_cfa s0, 32
-; RV32I-WITH-FP-NEXT: sw a7, 28(s0)
-; RV32I-WITH-FP-NEXT: sw a6, 24(s0)
-; RV32I-WITH-FP-NEXT: sw a5, 20(s0)
; RV32I-WITH-FP-NEXT: sw a4, 16(s0)
-; RV32I-WITH-FP-NEXT: sw a3, 12(s0)
-; RV32I-WITH-FP-NEXT: sw a2, 8(s0)
-; RV32I-WITH-FP-NEXT: sw a1, 4(s0)
+; RV32I-WITH-FP-NEXT: sw a5, 20(s0)
+; RV32I-WITH-FP-NEXT: sw a6, 24(s0)
+; RV32I-WITH-FP-NEXT: sw a7, 28(s0)
; RV32I-WITH-FP-NEXT: sw a0, 0(s0)
+; RV32I-WITH-FP-NEXT: sw a1, 4(s0)
+; RV32I-WITH-FP-NEXT: sw a2, 8(s0)
+; RV32I-WITH-FP-NEXT: sw a3, 12(s0)
; RV32I-WITH-FP-NEXT: call callee
; RV32I-WITH-FP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-WITH-FP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -2827,14 +2827,14 @@ define void @varargs(...) {
; RV32IZCMP-NEXT: .cfi_def_cfa_offset 48
; RV32IZCMP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32IZCMP-NEXT: .cfi_offset ra, -36
-; RV32IZCMP-NEXT: sw a7, 44(sp)
-; RV32IZCMP-NEXT: sw a6, 40(sp)
-; RV32IZCMP-NEXT: sw a5, 36(sp)
; RV32IZCMP-NEXT: sw a4, 32(sp)
-; RV32IZCMP-NEXT: sw a3, 28(sp)
-; RV32IZCMP-NEXT: sw a2, 24(sp)
-; RV32IZCMP-NEXT: sw a1, 20(sp)
+; RV32IZCMP-NEXT: sw a5, 36(sp)
+; RV32IZCMP-NEXT: sw a6, 40(sp)
+; RV32IZCMP-NEXT: sw a7, 44(sp)
; RV32IZCMP-NEXT: sw a0, 16(sp)
+; RV32IZCMP-NEXT: sw a1, 20(sp)
+; RV32IZCMP-NEXT: sw a2, 24(sp)
+; RV32IZCMP-NEXT: sw a3, 28(sp)
; RV32IZCMP-NEXT: call callee
; RV32IZCMP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZCMP-NEXT: addi sp, sp, 48
@@ -2850,14 +2850,14 @@ define void @varargs(...) {
; RV32IZCMP-WITH-FP-NEXT: .cfi_offset s0, -40
; RV32IZCMP-WITH-FP-NEXT: addi s0, sp, 16
; RV32IZCMP-WITH-FP-NEXT: .cfi_def_cfa s0, 32
-; RV32IZCMP-WITH-FP-NEXT: sw a7, 28(s0)
-; RV32IZCMP-WITH-FP-NEXT: sw a6, 24(s0)
-; RV32IZCMP-WITH-FP-NEXT: sw a5, 20(s0)
; RV32IZCMP-WITH-FP-NEXT: sw a4, 16(s0)
-; RV32IZCMP-WITH-FP-NEXT: sw a3, 12(s0)
-; RV32IZCMP-WITH-FP-NEXT: sw a2, 8(s0)
-; RV32IZCMP-WITH-FP-NEXT: sw a1, 4(s0)
+; RV32IZCMP-WITH-FP-NEXT: sw a5, 20(s0)
+; RV32IZCMP-WITH-FP-NEXT: sw a6, 24(s0)
+; RV32IZCMP-WITH-FP-NEXT: sw a7, 28(s0)
; RV32IZCMP-WITH-FP-NEXT: sw a0, 0(s0)
+; RV32IZCMP-WITH-FP-NEXT: sw a1, 4(s0)
+; RV32IZCMP-WITH-FP-NEXT: sw a2, 8(s0)
+; RV32IZCMP-WITH-FP-NEXT: sw a3, 12(s0)
; RV32IZCMP-WITH-FP-NEXT: call callee
; RV32IZCMP-WITH-FP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32IZCMP-WITH-FP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -2870,14 +2870,14 @@ define void @varargs(...) {
; RV64I-NEXT: .cfi_def_cfa_offset 80
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -72
-; RV64I-NEXT: sd a7, 72(sp)
-; RV64I-NEXT: sd a6, 64(sp)
-; RV64I-NEXT: sd a5, 56(sp)
; RV64I-NEXT: sd a4, 48(sp)
-; RV64I-NEXT: sd a3, 40(sp)
-; RV64I-NEXT: sd a2, 32(sp)
-; RV64I-NEXT: sd a1, 24(sp)
+; RV64I-NEXT: sd a5, 56(sp)
+; RV64I-NEXT: sd a6, 64(sp)
+; RV64I-NEXT: sd a7, 72(sp)
; RV64I-NEXT: sd a0, 16(sp)
+; RV64I-NEXT: sd a1, 24(sp)
+; RV64I-NEXT: sd a2, 32(sp)
+; RV64I-NEXT: sd a3, 40(sp)
; RV64I-NEXT: call callee
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 80
@@ -2889,12 +2889,12 @@ define void @varargs(...) {
; RV64I-LP64E-NEXT: .cfi_def_cfa_offset 56
; RV64I-LP64E-NEXT: sd ra, 0(sp) # 8-byte Folded Spill
; RV64I-LP64E-NEXT: .cfi_offset ra, -56
-; RV64I-LP64E-NEXT: sd a5, 48(sp)
; RV64I-LP64E-NEXT: sd a4, 40(sp)
-; RV64I-LP64E-NEXT: sd a3, 32(sp)
-; RV64I-LP64E-NEXT: sd a2, 24(sp)
-; RV64I-LP64E-NEXT: sd a1, 16(sp)
+; RV64I-LP64E-NEXT: sd a5, 48(sp)
; RV64I-LP64E-NEXT: sd a0, 8(sp)
+; RV64I-LP64E-NEXT: sd a1, 16(sp)
+; RV64I-LP64E-NEXT: sd a2, 24(sp)
+; RV64I-LP64E-NEXT: sd a3, 32(sp)
; RV64I-LP64E-NEXT: call callee
; RV64I-LP64E-NEXT: ld ra, 0(sp) # 8-byte Folded Reload
; RV64I-LP64E-NEXT: addi sp, sp, 56
@@ -2910,14 +2910,14 @@ define void @varargs(...) {
; RV64I-WITH-FP-NEXT: .cfi_offset s0, -80
; RV64I-WITH-FP-NEXT: addi s0, sp, 16
; RV64I-WITH-FP-NEXT: .cfi_def_cfa s0, 64
-; RV64I-WITH-FP-NEXT: sd a7, 56(s0)
-; RV64I-WITH-FP-NEXT: sd a6, 48(s0)
-; RV64I-WITH-FP-NEXT: sd a5, 40(s0)
; RV64I-WITH-FP-NEXT: sd a4, 32(s0)
-; RV64I-WITH-FP-NEXT: sd a3, 24(s0)
-; RV64I-WITH-FP-NEXT: sd a2, 16(s0)
-; RV64I-WITH-FP-NEXT: sd a1, 8(s0)
+; RV64I-WITH-FP-NEXT: sd a5, 40(s0)
+; RV64I-WITH-FP-NEXT: sd a6, 48(s0)
+; RV64I-WITH-FP-NEXT: sd a7, 56(s0)
; RV64I-WITH-FP-NEXT: sd a0, 0(s0)
+; RV64I-WITH-FP-NEXT: sd a1, 8(s0)
+; RV64I-WITH-FP-NEXT: sd a2, 16(s0)
+; RV64I-WITH-FP-NEXT: sd a3, 24(s0)
; RV64I-WITH-FP-NEXT: call callee
; RV64I-WITH-FP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64I-WITH-FP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
@@ -2930,14 +2930,14 @@ define void @varargs(...) {
; RV64IZCMP-NEXT: .cfi_def_cfa_offset 80
; RV64IZCMP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64IZCMP-NEXT: .cfi_offset ra, -72
-; RV64IZCMP-NEXT: sd a7, 72(sp)
-; RV64IZCMP-NEXT: sd a6, 64(sp)
-; RV64IZCMP-NEXT: sd a5, 56(sp)
; RV64IZCMP-NEXT: sd a4, 48(sp)
-; RV64IZCMP-NEXT: sd a3, 40(sp)
-; RV64IZCMP-NEXT: sd a2, 32(sp)
-; RV64IZCMP-NEXT: sd a1, 24(sp)
+; RV64IZCMP-NEXT: sd a5, 56(sp)
+; RV64IZCMP-NEXT: sd a6, 64(sp)
+; RV64IZCMP-NEXT: sd a7, 72(sp)
; RV64IZCMP-NEXT: sd a0, 16(sp)
+; RV64IZCMP-NEXT: sd a1, 24(sp)
+; RV64IZCMP-NEXT: sd a2, 32(sp)
+; RV64IZCMP-NEXT: sd a3, 40(sp)
; RV64IZCMP-NEXT: call callee
; RV64IZCMP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IZCMP-NEXT: addi sp, sp, 80
@@ -2953,14 +2953,14 @@ define void @varargs(...) {
; RV64IZCMP-WITH-FP-NEXT: .cfi_offset s0, -80
; RV64IZCMP-WITH-FP-NEXT: addi s0, sp, 16
; RV64IZCMP-WITH-FP-NEXT: .cfi_def_cfa s0, 64
-; RV64IZCMP-WITH-FP-NEXT: sd a7, 56(s0)
-; RV64IZCMP-WITH-FP-NEXT: sd a6, 48(s0)
-; RV64IZCMP-WITH-FP-NEXT: sd a5, 40(s0)
; RV64IZCMP-WITH-FP-NEXT: sd a4, 32(s0)
-; RV64IZCMP-WITH-FP-NEXT: sd a3, 24(s0)
-; RV64IZCMP-WITH-FP-NEXT: sd a2, 16(s0)
-; RV64IZCMP-WITH-FP-NEXT: sd a1, 8(s0)
+; RV64IZCMP-WITH-FP-NEXT: sd a5, 40(s0)
+; RV64IZCMP-WITH-FP-NEXT: sd a6, 48(s0)
+; RV64IZCMP-WITH-FP-NEXT: sd a7, 56(s0)
; RV64IZCMP-WITH-FP-NEXT: sd a0, 0(s0)
+; RV64IZCMP-WITH-FP-NEXT: sd a1, 8(s0)
+; RV64IZCMP-WITH-FP-NEXT: sd a2, 16(s0)
+; RV64IZCMP-WITH-FP-NEXT: sd a3, 24(s0)
; RV64IZCMP-WITH-FP-NEXT: call callee
; RV64IZCMP-WITH-FP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64IZCMP-WITH-FP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll
index 278187f62cd75e..e97a3bff32fac7 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll
@@ -146,30 +146,27 @@ define void @caller_aligned_stack() nounwind {
; RV32I-FPELIM-NEXT: addi sp, sp, -64
; RV32I-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
; RV32I-FPELIM-NEXT: li a0, 18
+; RV32I-FPELIM-NEXT: li a1, 17
+; RV32I-FPELIM-NEXT: sw a1, 20(sp)
; RV32I-FPELIM-NEXT: sw a0, 24(sp)
-; RV32I-FPELIM-NEXT: li a0, 17
-; RV32I-FPELIM-NEXT: sw a0, 20(sp)
; RV32I-FPELIM-NEXT: li a0, 16
+; RV32I-FPELIM-NEXT: lui a1, 262236
+; RV32I-FPELIM-NEXT: addi a1, a1, 655
+; RV32I-FPELIM-NEXT: lui a2, 377487
+; RV32I-FPELIM-NEXT: addi a2, a2, 1475
+; RV32I-FPELIM-NEXT: li a3, 15
+; RV32I-FPELIM-NEXT: sw a3, 0(sp)
+; RV32I-FPELIM-NEXT: sw a2, 8(sp)
+; RV32I-FPELIM-NEXT: sw a1, 12(sp)
; RV32I-FPELIM-NEXT: sw a0, 16(sp)
-; RV32I-FPELIM-NEXT: lui a0, 262236
-; RV32I-FPELIM-NEXT: addi a0, a0, 655
-; RV32I-FPELIM-NEXT: sw a0, 12(sp)
-; RV32I-FPELIM-NEXT: lui a0, 377487
-; RV32I-FPELIM-NEXT: addi a0, a0, 1475
-; RV32I-FPELIM-NEXT: sw a0, 8(sp)
-; RV32I-FPELIM-NEXT: li a0, 15
-; RV32I-FPELIM-NEXT: sw a0, 0(sp)
; RV32I-FPELIM-NEXT: lui a0, 262153
-; RV32I-FPELIM-NEXT: addi a0, a0, 491
-; RV32I-FPELIM-NEXT: sw a0, 44(sp)
+; RV32I-FPELIM-NEXT: addi t0, a0, 491
; RV32I-FPELIM-NEXT: lui a0, 545260
-; RV32I-FPELIM-NEXT: addi a0, a0, -1967
-; RV32I-FPELIM-NEXT: sw a0, 40(sp)
+; RV32I-FPELIM-NEXT: addi t1, a0, -1967
; RV32I-FPELIM-NEXT: lui a0, 964690
-; RV32I-FPELIM-NEXT: addi a0, a0, -328
-; RV32I-FPELIM-NEXT: sw a0, 36(sp)
+; RV32I-FPELIM-NEXT: addi t2, a0, -328
; RV32I-FPELIM-NEXT: lui a0, 335544
-; RV32I-FPELIM-NEXT: addi t0, a0, 1311
+; RV32I-FPELIM-NEXT: addi t3, a0, 1311
; RV32I-FPELIM-NEXT: lui a0, 688509
; RV32I-FPELIM-NEXT: addi a5, a0, -2048
; RV32I-FPELIM-NEXT: li a0, 1
@@ -179,7 +176,10 @@ define void @caller_aligned_stack() nounwind {
; RV32I-FPELIM-NEXT: li a4, 13
; RV32I-FPELIM-NEXT: li a6, 4
; RV32I-FPELIM-NEXT: li a7, 14
-; RV32I-FPELIM-NEXT: sw t0, 32(sp)
+; RV32I-FPELIM-NEXT: sw t3, 32(sp)
+; RV32I-FPELIM-NEXT: sw t2, 36(sp)
+; RV32I-FPELIM-NEXT: sw t1, 40(sp)
+; RV32I-FPELIM-NEXT: sw t0, 44(sp)
; RV32I-FPELIM-NEXT: call callee_aligned_stack
; RV32I-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32I-FPELIM-NEXT: addi sp, sp, 64
@@ -192,30 +192,27 @@ define void @caller_aligned_stack() nounwind {
; RV32I-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
; RV32I-WITHFP-NEXT: addi s0, sp, 64
; RV32I-WITHFP-NEXT: li a0, 18
+; RV32I-WITHFP-NEXT: li a1, 17
+; RV32I-WITHFP-NEXT: sw a1, 20(sp)
; RV32I-WITHFP-NEXT: sw a0, 24(sp)
-; RV32I-WITHFP-NEXT: li a0, 17
-; RV32I-WITHFP-NEXT: sw a0, 20(sp)
; RV32I-WITHFP-NEXT: li a0, 16
+; RV32I-WITHFP-NEXT: lui a1, 262236
+; RV32I-WITHFP-NEXT: addi a1, a1, 655
+; RV32I-WITHFP-NEXT: lui a2, 377487
+; RV32I-WITHFP-NEXT: addi a2, a2, 1475
+; RV32I-WITHFP-NEXT: li a3, 15
+; RV32I-WITHFP-NEXT: sw a3, 0(sp)
+; RV32I-WITHFP-NEXT: sw a2, 8(sp)
+; RV32I-WITHFP-NEXT: sw a1, 12(sp)
; RV32I-WITHFP-NEXT: sw a0, 16(sp)
-; RV32I-WITHFP-NEXT: lui a0, 262236
-; RV32I-WITHFP-NEXT: addi a0, a0, 655
-; RV32I-WITHFP-NEXT: sw a0, 12(sp)
-; RV32I-WITHFP-NEXT: lui a0, 377487
-; RV32I-WITHFP-NEXT: addi a0, a0, 1475
-; RV32I-WITHFP-NEXT: sw a0, 8(sp)
-; RV32I-WITHFP-NEXT: li a0, 15
-; RV32I-WITHFP-NEXT: sw a0, 0(sp)
; RV32I-WITHFP-NEXT: lui a0, 262153
-; RV32I-WITHFP-NEXT: addi a0, a0, 491
-; RV32I-WITHFP-NEXT: sw a0, -20(s0)
+; RV32I-WITHFP-NEXT: addi t0, a0, 491
; RV32I-WITHFP-NEXT: lui a0, 545260
-; RV32I-WITHFP-NEXT: addi a0, a0, -1967
-; RV32I-WITHFP-NEXT: sw a0, -24(s0)
+; RV32I-WITHFP-NEXT: addi t1, a0, -1967
; RV32I-WITHFP-NEXT: lui a0, 964690
-; RV32I-WITHFP-NEXT: addi a0, a0, -328
-; RV32I-WITHFP-NEXT: sw a0, -28(s0)
+; RV32I-WITHFP-NEXT: addi t2, a0, -328
; RV32I-WITHFP-NEXT: lui a0, 335544
-; RV32I-WITHFP-NEXT: addi t0, a0, 1311
+; RV32I-WITHFP-NEXT: addi t3, a0, 1311
; RV32I-WITHFP-NEXT: lui a0, 688509
; RV32I-WITHFP-NEXT: addi a5, a0, -2048
; RV32I-WITHFP-NEXT: li a0, 1
@@ -225,7 +222,10 @@ define void @caller_aligned_stack() nounwind {
; RV32I-WITHFP-NEXT: li a4, 13
; RV32I-WITHFP-NEXT: li a6, 4
; RV32I-WITHFP-NEXT: li a7, 14
-; RV32I-WITHFP-NEXT: sw t0, -32(s0)
+; RV32I-WITHFP-NEXT: sw t3, -32(s0)
+; RV32I-WITHFP-NEXT: sw t2, -28(s0)
+; RV32I-WITHFP-NEXT: sw t1, -24(s0)
+; RV32I-WITHFP-NEXT: sw t0, -20(s0)
; RV32I-WITHFP-NEXT: call callee_aligned_stack
; RV32I-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32I-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index bb082b0314d599..9e4c8a6e3320c2 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -142,8 +142,7 @@ define i32 @caller_many_scalars() nounwind {
; RV32I-FPELIM: # %bb.0:
; RV32I-FPELIM-NEXT: addi sp, sp, -16
; RV32I-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32I-FPELIM-NEXT: li a0, 8
-; RV32I-FPELIM-NEXT: sw a0, 4(sp)
+; RV32I-FPELIM-NEXT: li a4, 8
; RV32I-FPELIM-NEXT: li a0, 1
; RV32I-FPELIM-NEXT: li a1, 2
; RV32I-FPELIM-NEXT: li a2, 3
@@ -152,6 +151,7 @@ define i32 @caller_many_scalars() nounwind {
; RV32I-FPELIM-NEXT: li a6, 6
; RV32I-FPELIM-NEXT: li a7, 7
; RV32I-FPELIM-NEXT: sw zero, 0(sp)
+; RV32I-FPELIM-NEXT: sw a4, 4(sp)
; RV32I-FPELIM-NEXT: li a4, 0
; RV32I-FPELIM-NEXT: call callee_many_scalars
; RV32I-FPELIM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -164,8 +164,7 @@ define i32 @caller_many_scalars() nounwind {
; RV32I-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-WITHFP-NEXT: addi s0, sp, 16
-; RV32I-WITHFP-NEXT: li a0, 8
-; RV32I-WITHFP-NEXT: sw a0, 4(sp)
+; RV32I-WITHFP-NEXT: li a4, 8
; RV32I-WITHFP-NEXT: li a0, 1
; RV32I-WITHFP-NEXT: li a1, 2
; RV32I-WITHFP-NEXT: li a2, 3
@@ -174,6 +173,7 @@ define i32 @caller_many_scalars() nounwind {
; RV32I-WITHFP-NEXT: li a6, 6
; RV32I-WITHFP-NEXT: li a7, 7
; RV32I-WITHFP-NEXT: sw zero, 0(sp)
+; RV32I-WITHFP-NEXT: sw a4, 4(sp)
; RV32I-WITHFP-NEXT: li a4, 0
; RV32I-WITHFP-NEXT: call callee_many_scalars
; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -246,17 +246,17 @@ define i32 @caller_large_scalars() nounwind {
; RV32I-FPELIM-NEXT: addi sp, sp, -48
; RV32I-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32I-FPELIM-NEXT: lui a0, 524272
-; RV32I-FPELIM-NEXT: sw a0, 12(sp)
-; RV32I-FPELIM-NEXT: sw zero, 8(sp)
-; RV32I-FPELIM-NEXT: sw zero, 4(sp)
; RV32I-FPELIM-NEXT: sw zero, 0(sp)
-; RV32I-FPELIM-NEXT: sw zero, 36(sp)
-; RV32I-FPELIM-NEXT: sw zero, 32(sp)
-; RV32I-FPELIM-NEXT: sw zero, 28(sp)
+; RV32I-FPELIM-NEXT: sw zero, 4(sp)
+; RV32I-FPELIM-NEXT: sw zero, 8(sp)
+; RV32I-FPELIM-NEXT: sw a0, 12(sp)
; RV32I-FPELIM-NEXT: li a2, 1
; RV32I-FPELIM-NEXT: addi a0, sp, 24
; RV32I-FPELIM-NEXT: mv a1, sp
; RV32I-FPELIM-NEXT: sw a2, 24(sp)
+; RV32I-FPELIM-NEXT: sw zero, 28(sp)
+; RV32I-FPELIM-NEXT: sw zero, 32(sp)
+; RV32I-FPELIM-NEXT: sw zero, 36(sp)
; RV32I-FPELIM-NEXT: call callee_large_scalars
; RV32I-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-FPELIM-NEXT: addi sp, sp, 48
@@ -269,17 +269,17 @@ define i32 @caller_large_scalars() nounwind {
; RV32I-WITHFP-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
; RV32I-WITHFP-NEXT: addi s0, sp, 48
; RV32I-WITHFP-NEXT: lui a0, 524272
-; RV32I-WITHFP-NEXT: sw a0, -36(s0)
-; RV32I-WITHFP-NEXT: sw zero, -40(s0)
-; RV32I-WITHFP-NEXT: sw zero, -44(s0)
; RV32I-WITHFP-NEXT: sw zero, -48(s0)
-; RV32I-WITHFP-NEXT: sw zero, -12(s0)
-; RV32I-WITHFP-NEXT: sw zero, -16(s0)
-; RV32I-WITHFP-NEXT: sw zero, -20(s0)
+; RV32I-WITHFP-NEXT: sw zero, -44(s0)
+; RV32I-WITHFP-NEXT: sw zero, -40(s0)
+; RV32I-WITHFP-NEXT: sw a0, -36(s0)
; RV32I-WITHFP-NEXT: li a2, 1
; RV32I-WITHFP-NEXT: addi a0, s0, -24
; RV32I-WITHFP-NEXT: addi a1, s0, -48
; RV32I-WITHFP-NEXT: sw a2, -24(s0)
+; RV32I-WITHFP-NEXT: sw zero, -20(s0)
+; RV32I-WITHFP-NEXT: sw zero, -16(s0)
+; RV32I-WITHFP-NEXT: sw zero, -12(s0)
; RV32I-WITHFP-NEXT: call callee_large_scalars
; RV32I-WITHFP-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-WITHFP-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
@@ -354,18 +354,15 @@ define i32 @caller_large_scalars_exhausted_regs() nounwind {
; RV32I-FPELIM-NEXT: addi sp, sp, -64
; RV32I-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
; RV32I-FPELIM-NEXT: addi a0, sp, 16
+; RV32I-FPELIM-NEXT: li a1, 9
+; RV32I-FPELIM-NEXT: sw a1, 0(sp)
; RV32I-FPELIM-NEXT: sw a0, 4(sp)
-; RV32I-FPELIM-NEXT: li a0, 9
-; RV32I-FPELIM-NEXT: sw a0, 0(sp)
; RV32I-FPELIM-NEXT: lui a0, 524272
-; RV32I-FPELIM-NEXT: sw a0, 28(sp)
-; RV32I-FPELIM-NEXT: sw zero, 24(sp)
-; RV32I-FPELIM-NEXT: sw zero, 20(sp)
; RV32I-FPELIM-NEXT: sw zero, 16(sp)
-; RV32I-FPELIM-NEXT: sw zero, 52(sp)
-; RV32I-FPELIM-NEXT: sw zero, 48(sp)
-; RV32I-FPELIM-NEXT: li a0, 8
-; RV32I-FPELIM-NEXT: sw a0, 40(sp)
+; RV32I-FPELIM-NEXT: sw zero, 20(sp)
+; RV32I-FPELIM-NEXT: sw zero, 24(sp)
+; RV32I-FPELIM-NEXT: sw a0, 28(sp)
+; RV32I-FPELIM-NEXT: li t0, 8
; RV32I-FPELIM-NEXT: li a0, 1
; RV32I-FPELIM-NEXT: li a1, 2
; RV32I-FPELIM-NEXT: li a2, 3
@@ -374,7 +371,10 @@ define i32 @caller_large_scalars_exhausted_regs() nounwind {
; RV32I-FPELIM-NEXT: li a5, 6
; RV32I-FPELIM-NEXT: li a6, 7
; RV32I-FPELIM-NEXT: addi a7, sp, 40
+; RV32I-FPELIM-NEXT: sw t0, 40(sp)
; RV32I-FPELIM-NEXT: sw zero, 44(sp)
+; RV32I-FPELIM-NEXT: sw zero, 48(sp)
+; RV32I-FPELIM-NEXT: sw zero, 52(sp)
; RV32I-FPELIM-NEXT: call callee_large_scalars_exhausted_regs
; RV32I-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32I-FPELIM-NEXT: addi sp, sp, 64
@@ -387,18 +387,15 @@ define i32 @caller_large_scalars_exhausted_regs() nounwind {
; RV32I-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
; RV32I-WITHFP-NEXT: addi s0, sp, 64
; RV32I-WITHFP-NEXT: addi a0, s0, -48
+; RV32I-WITHFP-NEXT: li a1, 9
+; RV32I-WITHFP-NEXT: sw a1, 0(sp)
; RV32I-WITHFP-NEXT: sw a0, 4(sp)
-; RV32I-WITHFP-NEXT: li a0, 9
-; RV32I-WITHFP-NEXT: sw a0, 0(sp)
; RV32I-WITHFP-NEXT: lui a0, 524272
-; RV32I-WITHFP-NEXT: sw a0, -36(s0)
-; RV32I-WITHFP-NEXT: sw zero, -40(s0)
-; RV32I-WITHFP-NEXT: sw zero, -44(s0)
; RV32I-WITHFP-NEXT: sw zero, -48(s0)
-; RV32I-WITHFP-NEXT: sw zero, -12(s0)
-; RV32I-WITHFP-NEXT: sw zero, -16(s0)
-; RV32I-WITHFP-NEXT: li a0, 8
-; RV32I-WITHFP-NEXT: sw a0, -24(s0)
+; RV32I-WITHFP-NEXT: sw zero, -44(s0)
+; RV32I-WITHFP-NEXT: sw zero, -40(s0)
+; RV32I-WITHFP-NEXT: sw a0, -36(s0)
+; RV32I-WITHFP-NEXT: li t0, 8
; RV32I-WITHFP-NEXT: li a0, 1
; RV32I-WITHFP-NEXT: li a1, 2
; RV32I-WITHFP-NEXT: li a2, 3
@@ -407,7 +404,10 @@ define i32 @caller_large_scalars_exhausted_regs() nounwind {
; RV32I-WITHFP-NEXT: li a5, 6
; RV32I-WITHFP-NEXT: li a6, 7
; RV32I-WITHFP-NEXT: addi a7, s0, -24
+; RV32I-WITHFP-NEXT: sw t0, -24(s0)
; RV32I-WITHFP-NEXT: sw zero, -20(s0)
+; RV32I-WITHFP-NEXT: sw zero, -16(s0)
+; RV32I-WITHFP-NEXT: sw zero, -12(s0)
; RV32I-WITHFP-NEXT: call callee_large_scalars_exhausted_regs
; RV32I-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32I-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
@@ -553,12 +553,12 @@ define i32 @caller_large_struct() nounwind {
; RV32I-FPELIM-NEXT: addi sp, sp, -48
; RV32I-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32I-FPELIM-NEXT: li a0, 1
-; RV32I-FPELIM-NEXT: sw a0, 24(sp)
; RV32I-FPELIM-NEXT: li a1, 2
-; RV32I-FPELIM-NEXT: sw a1, 28(sp)
; RV32I-FPELIM-NEXT: li a2, 3
-; RV32I-FPELIM-NEXT: sw a2, 32(sp)
; RV32I-FPELIM-NEXT: li a3, 4
+; RV32I-FPELIM-NEXT: sw a0, 24(sp)
+; RV32I-FPELIM-NEXT: sw a1, 28(sp)
+; RV32I-FPELIM-NEXT: sw a2, 32(sp)
; RV32I-FPELIM-NEXT: sw a3, 36(sp)
; RV32I-FPELIM-NEXT: sw a0, 8(sp)
; RV32I-FPELIM-NEXT: sw a1, 12(sp)
@@ -577,12 +577,12 @@ define i32 @caller_large_struct() nounwind {
; RV32I-WITHFP-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
; RV32I-WITHFP-NEXT: addi s0, sp, 48
; RV32I-WITHFP-NEXT: li a0, 1
-; RV32I-WITHFP-NEXT: sw a0, -24(s0)
; RV32I-WITHFP-NEXT: li a1, 2
-; RV32I-WITHFP-NEXT: sw a1, -20(s0)
; RV32I-WITHFP-NEXT: li a2, 3
-; RV32I-WITHFP-NEXT: sw a2, -16(s0)
; RV32I-WITHFP-NEXT: li a3, 4
+; RV32I-WITHFP-NEXT: sw a0, -24(s0)
+; RV32I-WITHFP-NEXT: sw a1, -20(s0)
+; RV32I-WITHFP-NEXT: sw a2, -16(s0)
; RV32I-WITHFP-NEXT: sw a3, -12(s0)
; RV32I-WITHFP-NEXT: sw a0, -40(s0)
; RV32I-WITHFP-NEXT: sw a1, -36(s0)
@@ -665,27 +665,24 @@ define void @caller_aligned_stack() nounwind {
; RV32I-FPELIM-NEXT: addi sp, sp, -64
; RV32I-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
; RV32I-FPELIM-NEXT: li a0, 19
+; RV32I-FPELIM-NEXT: li a1, 18
+; RV32I-FPELIM-NEXT: sw a1, 20(sp)
; RV32I-FPELIM-NEXT: sw a0, 24(sp)
-; RV32I-FPELIM-NEXT: li a0, 18
-; RV32I-FPELIM-NEXT: sw a0, 20(sp)
; RV32I-FPELIM-NEXT: li a0, 17
-; RV32I-FPELIM-NEXT: sw a0, 16(sp)
+; RV32I-FPELIM-NEXT: li a1, 16
+; RV32I-FPELIM-NEXT: li a2, 15
+; RV32I-FPELIM-NEXT: sw a2, 0(sp)
+; RV32I-FPELIM-NEXT: sw a1, 8(sp)
; RV32I-FPELIM-NEXT: sw zero, 12(sp)
-; RV32I-FPELIM-NEXT: li a0, 16
-; RV32I-FPELIM-NEXT: sw a0, 8(sp)
-; RV32I-FPELIM-NEXT: li a0, 15
-; RV32I-FPELIM-NEXT: sw a0, 0(sp)
+; RV32I-FPELIM-NEXT: sw a0, 16(sp)
; RV32I-FPELIM-NEXT: lui a0, 262153
-; RV32I-FPELIM-NEXT: addi a0, a0, 491
-; RV32I-FPELIM-NEXT: sw a0, 44(sp)
+; RV32I-FPELIM-NEXT: addi t0, a0, 491
; RV32I-FPELIM-NEXT: lui a0, 545260
-; RV32I-FPELIM-NEXT: addi a0, a0, -1967
-; RV32I-FPELIM-NEXT: sw a0, 40(sp)
+; RV32I-FPELIM-NEXT: addi t1, a0, -1967
; RV32I-FPELIM-NEXT: lui a0, 964690
-; RV32I-FPELIM-NEXT: addi a0, a0, -328
-; RV32I-FPELIM-NEXT: sw a0, 36(sp)
+; RV32I-FPELIM-NEXT: addi t2, a0, -328
; RV32I-FPELIM-NEXT: lui a0, 335544
-; RV32I-FPELIM-NEXT: addi t0, a0, 1311
+; RV32I-FPELIM-NEXT: addi t3, a0, 1311
; RV32I-FPELIM-NEXT: lui a0, 688509
; RV32I-FPELIM-NEXT: addi a5, a0, -2048
; RV32I-FPELIM-NEXT: li a0, 1
@@ -695,7 +692,10 @@ define void @caller_aligned_stack() nounwind {
; RV32I-FPELIM-NEXT: li a4, 13
; RV32I-FPELIM-NEXT: li a6, 4
; RV32I-FPELIM-NEXT: li a7, 14
-; RV32I-FPELIM-NEXT: sw t0, 32(sp)
+; RV32I-FPELIM-NEXT: sw t3, 32(sp)
+; RV32I-FPELIM-NEXT: sw t2, 36(sp)
+; RV32I-FPELIM-NEXT: sw t1, 40(sp)
+; RV32I-FPELIM-NEXT: sw t0, 44(sp)
; RV32I-FPELIM-NEXT: call callee_aligned_stack
; RV32I-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32I-FPELIM-NEXT: addi sp, sp, 64
@@ -708,27 +708,24 @@ define void @caller_aligned_stack() nounwind {
; RV32I-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
; RV32I-WITHFP-NEXT: addi s0, sp, 64
; RV32I-WITHFP-NEXT: li a0, 19
+; RV32I-WITHFP-NEXT: li a1, 18
+; RV32I-WITHFP-NEXT: sw a1, 20(sp)
; RV32I-WITHFP-NEXT: sw a0, 24(sp)
-; RV32I-WITHFP-NEXT: li a0, 18
-; RV32I-WITHFP-NEXT: sw a0, 20(sp)
; RV32I-WITHFP-NEXT: li a0, 17
-; RV32I-WITHFP-NEXT: sw a0, 16(sp)
+; RV32I-WITHFP-NEXT: li a1, 16
+; RV32I-WITHFP-NEXT: li a2, 15
+; RV32I-WITHFP-NEXT: sw a2, 0(sp)
+; RV32I-WITHFP-NEXT: sw a1, 8(sp)
; RV32I-WITHFP-NEXT: sw zero, 12(sp)
-; RV32I-WITHFP-NEXT: li a0, 16
-; RV32I-WITHFP-NEXT: sw a0, 8(sp)
-; RV32I-WITHFP-NEXT: li a0, 15
-; RV32I-WITHFP-NEXT: sw a0, 0(sp)
+; RV32I-WITHFP-NEXT: sw a0, 16(sp)
; RV32I-WITHFP-NEXT: lui a0, 262153
-; RV32I-WITHFP-NEXT: addi a0, a0, 491
-; RV32I-WITHFP-NEXT: sw a0, -20(s0)
+; RV32I-WITHFP-NEXT: addi t0, a0, 491
; RV32I-WITHFP-NEXT: lui a0, 545260
-; RV32I-WITHFP-NEXT: addi a0, a0, -1967
-; RV32I-WITHFP-NEXT: sw a0, -24(s0)
+; RV32I-WITHFP-NEXT: addi t1, a0, -1967
; RV32I-WITHFP-NEXT: lui a0, 964690
-; RV32I-WITHFP-NEXT: addi a0, a0, -328
-; RV32I-WITHFP-NEXT: sw a0, -28(s0)
+; RV32I-WITHFP-NEXT: addi t2, a0, -328
; RV32I-WITHFP-NEXT: lui a0, 335544
-; RV32I-WITHFP-NEXT: addi t0, a0, 1311
+; RV32I-WITHFP-NEXT: addi t3, a0, 1311
; RV32I-WITHFP-NEXT: lui a0, 688509
; RV32I-WITHFP-NEXT: addi a5, a0, -2048
; RV32I-WITHFP-NEXT: li a0, 1
@@ -738,7 +735,10 @@ define void @caller_aligned_stack() nounwind {
; RV32I-WITHFP-NEXT: li a4, 13
; RV32I-WITHFP-NEXT: li a6, 4
; RV32I-WITHFP-NEXT: li a7, 14
-; RV32I-WITHFP-NEXT: sw t0, -32(s0)
+; RV32I-WITHFP-NEXT: sw t3, -32(s0)
+; RV32I-WITHFP-NEXT: sw t2, -28(s0)
+; RV32I-WITHFP-NEXT: sw t1, -24(s0)
+; RV32I-WITHFP-NEXT: sw t0, -20(s0)
; RV32I-WITHFP-NEXT: call callee_aligned_stack
; RV32I-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32I-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
@@ -881,10 +881,10 @@ define fp128 @callee_large_scalar_ret() nounwind {
; RV32I-FPELIM-LABEL: callee_large_scalar_ret:
; RV32I-FPELIM: # %bb.0:
; RV32I-FPELIM-NEXT: lui a1, 524272
-; RV32I-FPELIM-NEXT: sw a1, 12(a0)
-; RV32I-FPELIM-NEXT: sw zero, 8(a0)
-; RV32I-FPELIM-NEXT: sw zero, 4(a0)
; RV32I-FPELIM-NEXT: sw zero, 0(a0)
+; RV32I-FPELIM-NEXT: sw zero, 4(a0)
+; RV32I-FPELIM-NEXT: sw zero, 8(a0)
+; RV32I-FPELIM-NEXT: sw a1, 12(a0)
; RV32I-FPELIM-NEXT: ret
;
; RV32I-WITHFP-LABEL: callee_large_scalar_ret:
@@ -894,10 +894,10 @@ define fp128 @callee_large_scalar_ret() nounwind {
; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-WITHFP-NEXT: addi s0, sp, 16
; RV32I-WITHFP-NEXT: lui a1, 524272
-; RV32I-WITHFP-NEXT: sw a1, 12(a0)
-; RV32I-WITHFP-NEXT: sw zero, 8(a0)
-; RV32I-WITHFP-NEXT: sw zero, 4(a0)
; RV32I-WITHFP-NEXT: sw zero, 0(a0)
+; RV32I-WITHFP-NEXT: sw zero, 4(a0)
+; RV32I-WITHFP-NEXT: sw zero, 8(a0)
+; RV32I-WITHFP-NEXT: sw a1, 12(a0)
; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-WITHFP-NEXT: addi sp, sp, 16
@@ -938,13 +938,13 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result
; RV32I-FPELIM-LABEL: callee_large_struct_ret:
; RV32I-FPELIM: # %bb.0:
; RV32I-FPELIM-NEXT: li a1, 1
+; RV32I-FPELIM-NEXT: li a2, 2
+; RV32I-FPELIM-NEXT: li a3, 3
+; RV32I-FPELIM-NEXT: li a4, 4
; RV32I-FPELIM-NEXT: sw a1, 0(a0)
-; RV32I-FPELIM-NEXT: li a1, 2
-; RV32I-FPELIM-NEXT: sw a1, 4(a0)
-; RV32I-FPELIM-NEXT: li a1, 3
-; RV32I-FPELIM-NEXT: sw a1, 8(a0)
-; RV32I-FPELIM-NEXT: li a1, 4
-; RV32I-FPELIM-NEXT: sw a1, 12(a0)
+; RV32I-FPELIM-NEXT: sw a2, 4(a0)
+; RV32I-FPELIM-NEXT: sw a3, 8(a0)
+; RV32I-FPELIM-NEXT: sw a4, 12(a0)
; RV32I-FPELIM-NEXT: ret
;
; RV32I-WITHFP-LABEL: callee_large_struct_ret:
@@ -954,13 +954,13 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result
; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-WITHFP-NEXT: addi s0, sp, 16
; RV32I-WITHFP-NEXT: li a1, 1
+; RV32I-WITHFP-NEXT: li a2, 2
+; RV32I-WITHFP-NEXT: li a3, 3
+; RV32I-WITHFP-NEXT: li a4, 4
; RV32I-WITHFP-NEXT: sw a1, 0(a0)
-; RV32I-WITHFP-NEXT: li a1, 2
-; RV32I-WITHFP-NEXT: sw a1, 4(a0)
-; RV32I-WITHFP-NEXT: li a1, 3
-; RV32I-WITHFP-NEXT: sw a1, 8(a0)
-; RV32I-WITHFP-NEXT: li a1, 4
-; RV32I-WITHFP-NEXT: sw a1, 12(a0)
+; RV32I-WITHFP-NEXT: sw a2, 4(a0)
+; RV32I-WITHFP-NEXT: sw a3, 8(a0)
+; RV32I-WITHFP-NEXT: sw a4, 12(a0)
; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-WITHFP-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll
index bcceea7ac35b3e..1321413fbc57e8 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll
@@ -203,8 +203,7 @@ define i32 @caller_double_on_stack_exhausted_gprs_fprs() nounwind {
; RV32-ILP32D: # %bb.0:
; RV32-ILP32D-NEXT: addi sp, sp, -16
; RV32-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-ILP32D-NEXT: lui a0, 262816
-; RV32-ILP32D-NEXT: sw a0, 4(sp)
+; RV32-ILP32D-NEXT: lui a1, 262816
; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_0)
; RV32-ILP32D-NEXT: fld fa0, %lo(.LCPI9_0)(a0)
; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_1)
@@ -226,6 +225,7 @@ define i32 @caller_double_on_stack_exhausted_gprs_fprs() nounwind {
; RV32-ILP32D-NEXT: li a4, 5
; RV32-ILP32D-NEXT: li a6, 7
; RV32-ILP32D-NEXT: sw zero, 0(sp)
+; RV32-ILP32D-NEXT: sw a1, 4(sp)
; RV32-ILP32D-NEXT: li a1, 0
; RV32-ILP32D-NEXT: li a3, 0
; RV32-ILP32D-NEXT: li a5, 0
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll
index 708cb00d1c45c6..948645948f64d2 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll
@@ -199,14 +199,14 @@ define i32 @caller_float_on_stack() {
; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 16
; ILP32E-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
-; ILP32E-FPELIM-NEXT: lui a0, 264704
-; ILP32E-FPELIM-NEXT: sw a0, 8(sp)
-; ILP32E-FPELIM-NEXT: sw zero, 4(sp)
-; ILP32E-FPELIM-NEXT: li a1, 4
+; ILP32E-FPELIM-NEXT: lui a1, 264704
+; ILP32E-FPELIM-NEXT: li a3, 4
; ILP32E-FPELIM-NEXT: li a0, 1
; ILP32E-FPELIM-NEXT: li a2, 2
; ILP32E-FPELIM-NEXT: li a4, 3
-; ILP32E-FPELIM-NEXT: sw a1, 0(sp)
+; ILP32E-FPELIM-NEXT: sw a3, 0(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 4(sp)
+; ILP32E-FPELIM-NEXT: sw a1, 8(sp)
; ILP32E-FPELIM-NEXT: li a1, 0
; ILP32E-FPELIM-NEXT: li a3, 0
; ILP32E-FPELIM-NEXT: li a5, 0
@@ -225,14 +225,14 @@ define i32 @caller_float_on_stack() {
; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
; ILP32E-WITHFP-NEXT: addi s0, sp, 20
; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
-; ILP32E-WITHFP-NEXT: lui a0, 264704
-; ILP32E-WITHFP-NEXT: sw a0, 8(sp)
-; ILP32E-WITHFP-NEXT: sw zero, 4(sp)
-; ILP32E-WITHFP-NEXT: li a1, 4
+; ILP32E-WITHFP-NEXT: lui a1, 264704
+; ILP32E-WITHFP-NEXT: li a3, 4
; ILP32E-WITHFP-NEXT: li a0, 1
; ILP32E-WITHFP-NEXT: li a2, 2
; ILP32E-WITHFP-NEXT: li a4, 3
-; ILP32E-WITHFP-NEXT: sw a1, 0(sp)
+; ILP32E-WITHFP-NEXT: sw a3, 0(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 4(sp)
+; ILP32E-WITHFP-NEXT: sw a1, 8(sp)
; ILP32E-WITHFP-NEXT: li a1, 0
; ILP32E-WITHFP-NEXT: li a3, 0
; ILP32E-WITHFP-NEXT: li a5, 0
@@ -248,14 +248,14 @@ define i32 @caller_float_on_stack() {
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -12
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 16
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 264704
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 8(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a1, 264704
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 2
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 3
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 0(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 0(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 8(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 0
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 0
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 0
@@ -272,14 +272,14 @@ define i32 @caller_float_on_stack() {
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 20
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 264704
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 8(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a1, 264704
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 2
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 3
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 0(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 0(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 8(sp)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 0
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 0
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 0
@@ -630,34 +630,31 @@ define void @caller_aligned_stack() {
; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0
; ILP32E-FPELIM-NEXT: andi sp, sp, -16
; ILP32E-FPELIM-NEXT: li a0, 18
+; ILP32E-FPELIM-NEXT: li a1, 17
+; ILP32E-FPELIM-NEXT: li a2, 16
+; ILP32E-FPELIM-NEXT: lui a3, 262236
+; ILP32E-FPELIM-NEXT: addi a3, a3, 655
+; ILP32E-FPELIM-NEXT: sw a3, 16(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 20(sp)
+; ILP32E-FPELIM-NEXT: sw a1, 24(sp)
; ILP32E-FPELIM-NEXT: sw a0, 28(sp)
-; ILP32E-FPELIM-NEXT: li a0, 17
-; ILP32E-FPELIM-NEXT: sw a0, 24(sp)
-; ILP32E-FPELIM-NEXT: li a0, 16
-; ILP32E-FPELIM-NEXT: sw a0, 20(sp)
-; ILP32E-FPELIM-NEXT: lui a0, 262236
-; ILP32E-FPELIM-NEXT: addi a0, a0, 655
-; ILP32E-FPELIM-NEXT: sw a0, 16(sp)
; ILP32E-FPELIM-NEXT: lui a0, 377487
; ILP32E-FPELIM-NEXT: addi a0, a0, 1475
+; ILP32E-FPELIM-NEXT: li a1, 15
+; ILP32E-FPELIM-NEXT: li a2, 14
+; ILP32E-FPELIM-NEXT: li a3, 4
+; ILP32E-FPELIM-NEXT: sw a3, 0(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 4(sp)
+; ILP32E-FPELIM-NEXT: sw a1, 8(sp)
; ILP32E-FPELIM-NEXT: sw a0, 12(sp)
-; ILP32E-FPELIM-NEXT: li a0, 15
-; ILP32E-FPELIM-NEXT: sw a0, 8(sp)
-; ILP32E-FPELIM-NEXT: li a0, 14
-; ILP32E-FPELIM-NEXT: sw a0, 4(sp)
-; ILP32E-FPELIM-NEXT: li a0, 4
-; ILP32E-FPELIM-NEXT: sw a0, 0(sp)
; ILP32E-FPELIM-NEXT: lui a0, 262153
-; ILP32E-FPELIM-NEXT: addi a0, a0, 491
-; ILP32E-FPELIM-NEXT: sw a0, 44(sp)
+; ILP32E-FPELIM-NEXT: addi a6, a0, 491
; ILP32E-FPELIM-NEXT: lui a0, 545260
-; ILP32E-FPELIM-NEXT: addi a0, a0, -1967
-; ILP32E-FPELIM-NEXT: sw a0, 40(sp)
+; ILP32E-FPELIM-NEXT: addi a7, a0, -1967
; ILP32E-FPELIM-NEXT: lui a0, 964690
-; ILP32E-FPELIM-NEXT: addi a0, a0, -328
-; ILP32E-FPELIM-NEXT: sw a0, 36(sp)
+; ILP32E-FPELIM-NEXT: addi t0, a0, -328
; ILP32E-FPELIM-NEXT: lui a0, 335544
-; ILP32E-FPELIM-NEXT: addi a6, a0, 1311
+; ILP32E-FPELIM-NEXT: addi t1, a0, 1311
; ILP32E-FPELIM-NEXT: lui a0, 688509
; ILP32E-FPELIM-NEXT: addi a5, a0, -2048
; ILP32E-FPELIM-NEXT: li a0, 1
@@ -665,7 +662,10 @@ define void @caller_aligned_stack() {
; ILP32E-FPELIM-NEXT: addi a2, sp, 32
; ILP32E-FPELIM-NEXT: li a3, 12
; ILP32E-FPELIM-NEXT: li a4, 13
-; ILP32E-FPELIM-NEXT: sw a6, 32(sp)
+; ILP32E-FPELIM-NEXT: sw t1, 32(sp)
+; ILP32E-FPELIM-NEXT: sw t0, 36(sp)
+; ILP32E-FPELIM-NEXT: sw a7, 40(sp)
+; ILP32E-FPELIM-NEXT: sw a6, 44(sp)
; ILP32E-FPELIM-NEXT: call callee_aligned_stack
; ILP32E-FPELIM-NEXT: addi sp, s0, -64
; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
@@ -685,34 +685,31 @@ define void @caller_aligned_stack() {
; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
; ILP32E-WITHFP-NEXT: andi sp, sp, -16
; ILP32E-WITHFP-NEXT: li a0, 18
+; ILP32E-WITHFP-NEXT: li a1, 17
+; ILP32E-WITHFP-NEXT: li a2, 16
+; ILP32E-WITHFP-NEXT: lui a3, 262236
+; ILP32E-WITHFP-NEXT: addi a3, a3, 655
+; ILP32E-WITHFP-NEXT: sw a3, 16(sp)
+; ILP32E-WITHFP-NEXT: sw a2, 20(sp)
+; ILP32E-WITHFP-NEXT: sw a1, 24(sp)
; ILP32E-WITHFP-NEXT: sw a0, 28(sp)
-; ILP32E-WITHFP-NEXT: li a0, 17
-; ILP32E-WITHFP-NEXT: sw a0, 24(sp)
-; ILP32E-WITHFP-NEXT: li a0, 16
-; ILP32E-WITHFP-NEXT: sw a0, 20(sp)
-; ILP32E-WITHFP-NEXT: lui a0, 262236
-; ILP32E-WITHFP-NEXT: addi a0, a0, 655
-; ILP32E-WITHFP-NEXT: sw a0, 16(sp)
; ILP32E-WITHFP-NEXT: lui a0, 377487
; ILP32E-WITHFP-NEXT: addi a0, a0, 1475
+; ILP32E-WITHFP-NEXT: li a1, 15
+; ILP32E-WITHFP-NEXT: li a2, 14
+; ILP32E-WITHFP-NEXT: li a3, 4
+; ILP32E-WITHFP-NEXT: sw a3, 0(sp)
+; ILP32E-WITHFP-NEXT: sw a2, 4(sp)
+; ILP32E-WITHFP-NEXT: sw a1, 8(sp)
; ILP32E-WITHFP-NEXT: sw a0, 12(sp)
-; ILP32E-WITHFP-NEXT: li a0, 15
-; ILP32E-WITHFP-NEXT: sw a0, 8(sp)
-; ILP32E-WITHFP-NEXT: li a0, 14
-; ILP32E-WITHFP-NEXT: sw a0, 4(sp)
-; ILP32E-WITHFP-NEXT: li a0, 4
-; ILP32E-WITHFP-NEXT: sw a0, 0(sp)
; ILP32E-WITHFP-NEXT: lui a0, 262153
-; ILP32E-WITHFP-NEXT: addi a0, a0, 491
-; ILP32E-WITHFP-NEXT: sw a0, 44(sp)
+; ILP32E-WITHFP-NEXT: addi a6, a0, 491
; ILP32E-WITHFP-NEXT: lui a0, 545260
-; ILP32E-WITHFP-NEXT: addi a0, a0, -1967
-; ILP32E-WITHFP-NEXT: sw a0, 40(sp)
+; ILP32E-WITHFP-NEXT: addi a7, a0, -1967
; ILP32E-WITHFP-NEXT: lui a0, 964690
-; ILP32E-WITHFP-NEXT: addi a0, a0, -328
-; ILP32E-WITHFP-NEXT: sw a0, 36(sp)
+; ILP32E-WITHFP-NEXT: addi t0, a0, -328
; ILP32E-WITHFP-NEXT: lui a0, 335544
-; ILP32E-WITHFP-NEXT: addi a6, a0, 1311
+; ILP32E-WITHFP-NEXT: addi t1, a0, 1311
; ILP32E-WITHFP-NEXT: lui a0, 688509
; ILP32E-WITHFP-NEXT: addi a5, a0, -2048
; ILP32E-WITHFP-NEXT: li a0, 1
@@ -720,7 +717,10 @@ define void @caller_aligned_stack() {
; ILP32E-WITHFP-NEXT: addi a2, sp, 32
; ILP32E-WITHFP-NEXT: li a3, 12
; ILP32E-WITHFP-NEXT: li a4, 13
-; ILP32E-WITHFP-NEXT: sw a6, 32(sp)
+; ILP32E-WITHFP-NEXT: sw t1, 32(sp)
+; ILP32E-WITHFP-NEXT: sw t0, 36(sp)
+; ILP32E-WITHFP-NEXT: sw a7, 40(sp)
+; ILP32E-WITHFP-NEXT: sw a6, 44(sp)
; ILP32E-WITHFP-NEXT: call callee_aligned_stack
; ILP32E-WITHFP-NEXT: addi sp, s0, -64
; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
@@ -739,34 +739,31 @@ define void @caller_aligned_stack() {
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 18
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 17
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a3, 262236
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a3, a3, 655
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 16(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 20(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 24(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 28(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 17
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 24(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 16
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 20(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 262236
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, 655
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 16(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 377487
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, 1475
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 15
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 14
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 0(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 4(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 8(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 15
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 8(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 14
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 4(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 4
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 0(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 262153
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, 491
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 44(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a6, a0, 491
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 545260
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, -1967
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 40(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a7, a0, -1967
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 964690
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, a0, -328
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 36(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi t0, a0, -328
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 335544
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a6, a0, 1311
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi t1, a0, 1311
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 688509
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a5, a0, -2048
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1
@@ -774,7 +771,10 @@ define void @caller_aligned_stack() {
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a2, sp, 32
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 12
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 13
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 32(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t1, 32(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t0, 36(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a7, 40(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 44(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_aligned_stack
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -64
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 56
@@ -791,34 +791,31 @@ define void @caller_aligned_stack() {
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 18
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 17
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a3, 262236
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a3, a3, 655
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 16(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 20(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 24(sp)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 28(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 17
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 24(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 16
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 20(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 262236
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, 655
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 16(sp)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 377487
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, 1475
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 15
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 14
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 0(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 4(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 8(sp)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 15
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 8(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 14
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 4(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 4
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 0(sp)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 262153
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, 491
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 44(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a6, a0, 491
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 545260
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, -1967
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 40(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a7, a0, -1967
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 964690
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, a0, -328
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 36(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi t0, a0, -328
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 335544
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a6, a0, 1311
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi t1, a0, 1311
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 688509
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a5, a0, -2048
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1
@@ -826,7 +823,10 @@ define void @caller_aligned_stack() {
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a2, sp, 32
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 12
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 13
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 32(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t1, 32(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t0, 36(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a7, 40(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 44(sp)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_aligned_stack
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -64
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 56
@@ -1163,18 +1163,18 @@ define i32 @caller_many_scalars() {
; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 20
; ILP32E-FPELIM-NEXT: sw ra, 16(sp) # 4-byte Folded Spill
; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
-; ILP32E-FPELIM-NEXT: li a0, 8
-; ILP32E-FPELIM-NEXT: sw a0, 12(sp)
-; ILP32E-FPELIM-NEXT: sw zero, 8(sp)
-; ILP32E-FPELIM-NEXT: li a0, 7
-; ILP32E-FPELIM-NEXT: sw a0, 4(sp)
-; ILP32E-FPELIM-NEXT: li a4, 6
+; ILP32E-FPELIM-NEXT: li a4, 8
+; ILP32E-FPELIM-NEXT: li a6, 7
+; ILP32E-FPELIM-NEXT: li a7, 6
; ILP32E-FPELIM-NEXT: li a0, 1
; ILP32E-FPELIM-NEXT: li a1, 2
; ILP32E-FPELIM-NEXT: li a2, 3
; ILP32E-FPELIM-NEXT: li a3, 4
; ILP32E-FPELIM-NEXT: li a5, 5
-; ILP32E-FPELIM-NEXT: sw a4, 0(sp)
+; ILP32E-FPELIM-NEXT: sw a7, 0(sp)
+; ILP32E-FPELIM-NEXT: sw a6, 4(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 8(sp)
+; ILP32E-FPELIM-NEXT: sw a4, 12(sp)
; ILP32E-FPELIM-NEXT: li a4, 0
; ILP32E-FPELIM-NEXT: call callee_many_scalars
; ILP32E-FPELIM-NEXT: lw ra, 16(sp) # 4-byte Folded Reload
@@ -1191,18 +1191,18 @@ define i32 @caller_many_scalars() {
; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
; ILP32E-WITHFP-NEXT: addi s0, sp, 24
; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
-; ILP32E-WITHFP-NEXT: li a0, 8
-; ILP32E-WITHFP-NEXT: sw a0, 12(sp)
-; ILP32E-WITHFP-NEXT: sw zero, 8(sp)
-; ILP32E-WITHFP-NEXT: li a0, 7
-; ILP32E-WITHFP-NEXT: sw a0, 4(sp)
-; ILP32E-WITHFP-NEXT: li a4, 6
+; ILP32E-WITHFP-NEXT: li a4, 8
+; ILP32E-WITHFP-NEXT: li a6, 7
+; ILP32E-WITHFP-NEXT: li a7, 6
; ILP32E-WITHFP-NEXT: li a0, 1
; ILP32E-WITHFP-NEXT: li a1, 2
; ILP32E-WITHFP-NEXT: li a2, 3
; ILP32E-WITHFP-NEXT: li a3, 4
; ILP32E-WITHFP-NEXT: li a5, 5
-; ILP32E-WITHFP-NEXT: sw a4, 0(sp)
+; ILP32E-WITHFP-NEXT: sw a7, 0(sp)
+; ILP32E-WITHFP-NEXT: sw a6, 4(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 8(sp)
+; ILP32E-WITHFP-NEXT: sw a4, 12(sp)
; ILP32E-WITHFP-NEXT: li a4, 0
; ILP32E-WITHFP-NEXT: call callee_many_scalars
; ILP32E-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload
@@ -1216,18 +1216,18 @@ define i32 @caller_many_scalars() {
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -16
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 20
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 8
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 7
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 4(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 6
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 8
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a6, 7
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a7, 6
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 5
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 0(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a7, 0(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 4(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 12(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 0
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_many_scalars
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 16
@@ -1242,18 +1242,18 @@ define i32 @caller_many_scalars() {
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 24
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 8
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 7
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 4(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 6
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 8
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a6, 7
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a7, 6
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 5
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 0(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a7, 0(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 4(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 12(sp)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 0
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_many_scalars
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 16
@@ -1380,17 +1380,17 @@ define i32 @caller_large_scalars() {
; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0
; ILP32E-FPELIM-NEXT: andi sp, sp, -16
; ILP32E-FPELIM-NEXT: lui a0, 524272
-; ILP32E-FPELIM-NEXT: sw a0, 12(sp)
-; ILP32E-FPELIM-NEXT: sw zero, 8(sp)
-; ILP32E-FPELIM-NEXT: sw zero, 4(sp)
; ILP32E-FPELIM-NEXT: sw zero, 0(sp)
-; ILP32E-FPELIM-NEXT: sw zero, 36(sp)
-; ILP32E-FPELIM-NEXT: sw zero, 32(sp)
-; ILP32E-FPELIM-NEXT: sw zero, 28(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 4(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 8(sp)
+; ILP32E-FPELIM-NEXT: sw a0, 12(sp)
; ILP32E-FPELIM-NEXT: li a2, 1
; ILP32E-FPELIM-NEXT: addi a0, sp, 24
; ILP32E-FPELIM-NEXT: mv a1, sp
; ILP32E-FPELIM-NEXT: sw a2, 24(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 28(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 32(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 36(sp)
; ILP32E-FPELIM-NEXT: call callee_large_scalars
; ILP32E-FPELIM-NEXT: addi sp, s0, -48
; ILP32E-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
@@ -1410,17 +1410,17 @@ define i32 @caller_large_scalars() {
; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
; ILP32E-WITHFP-NEXT: andi sp, sp, -16
; ILP32E-WITHFP-NEXT: lui a0, 524272
-; ILP32E-WITHFP-NEXT: sw a0, 12(sp)
-; ILP32E-WITHFP-NEXT: sw zero, 8(sp)
-; ILP32E-WITHFP-NEXT: sw zero, 4(sp)
; ILP32E-WITHFP-NEXT: sw zero, 0(sp)
-; ILP32E-WITHFP-NEXT: sw zero, 36(sp)
-; ILP32E-WITHFP-NEXT: sw zero, 32(sp)
-; ILP32E-WITHFP-NEXT: sw zero, 28(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 4(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 8(sp)
+; ILP32E-WITHFP-NEXT: sw a0, 12(sp)
; ILP32E-WITHFP-NEXT: li a2, 1
; ILP32E-WITHFP-NEXT: addi a0, sp, 24
; ILP32E-WITHFP-NEXT: mv a1, sp
; ILP32E-WITHFP-NEXT: sw a2, 24(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 28(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 32(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 36(sp)
; ILP32E-WITHFP-NEXT: call callee_large_scalars
; ILP32E-WITHFP-NEXT: addi sp, s0, -48
; ILP32E-WITHFP-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
@@ -1439,17 +1439,17 @@ define i32 @caller_large_scalars() {
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 524272
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 0(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 36(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 32(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 28(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 1
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 24
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a1, sp
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 24(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 28(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 32(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 36(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalars
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -48
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 40
@@ -1466,17 +1466,17 @@ define i32 @caller_large_scalars() {
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 524272
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 0(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 36(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 32(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 28(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 1
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 24
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a1, sp
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 24(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 28(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 32(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 36(sp)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalars
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -48
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 40
@@ -1613,29 +1613,29 @@ define i32 @caller_large_scalars_exhausted_regs() {
; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0
; ILP32E-FPELIM-NEXT: andi sp, sp, -16
; ILP32E-FPELIM-NEXT: addi a0, sp, 16
+; ILP32E-FPELIM-NEXT: li a1, 9
+; ILP32E-FPELIM-NEXT: addi a2, sp, 40
+; ILP32E-FPELIM-NEXT: li a3, 7
+; ILP32E-FPELIM-NEXT: sw a3, 0(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 4(sp)
+; ILP32E-FPELIM-NEXT: sw a1, 8(sp)
; ILP32E-FPELIM-NEXT: sw a0, 12(sp)
-; ILP32E-FPELIM-NEXT: li a0, 9
-; ILP32E-FPELIM-NEXT: sw a0, 8(sp)
-; ILP32E-FPELIM-NEXT: addi a0, sp, 40
-; ILP32E-FPELIM-NEXT: sw a0, 4(sp)
-; ILP32E-FPELIM-NEXT: li a0, 7
-; ILP32E-FPELIM-NEXT: sw a0, 0(sp)
; ILP32E-FPELIM-NEXT: lui a0, 524272
-; ILP32E-FPELIM-NEXT: sw a0, 28(sp)
-; ILP32E-FPELIM-NEXT: sw zero, 24(sp)
-; ILP32E-FPELIM-NEXT: sw zero, 20(sp)
; ILP32E-FPELIM-NEXT: sw zero, 16(sp)
-; ILP32E-FPELIM-NEXT: sw zero, 52(sp)
-; ILP32E-FPELIM-NEXT: sw zero, 48(sp)
-; ILP32E-FPELIM-NEXT: li a0, 8
-; ILP32E-FPELIM-NEXT: sw a0, 40(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 20(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 24(sp)
+; ILP32E-FPELIM-NEXT: sw a0, 28(sp)
+; ILP32E-FPELIM-NEXT: li a6, 8
; ILP32E-FPELIM-NEXT: li a0, 1
; ILP32E-FPELIM-NEXT: li a1, 2
; ILP32E-FPELIM-NEXT: li a2, 3
; ILP32E-FPELIM-NEXT: li a3, 4
; ILP32E-FPELIM-NEXT: li a4, 5
; ILP32E-FPELIM-NEXT: li a5, 6
+; ILP32E-FPELIM-NEXT: sw a6, 40(sp)
; ILP32E-FPELIM-NEXT: sw zero, 44(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 48(sp)
+; ILP32E-FPELIM-NEXT: sw zero, 52(sp)
; ILP32E-FPELIM-NEXT: call callee_large_scalars_exhausted_regs
; ILP32E-FPELIM-NEXT: addi sp, s0, -64
; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
@@ -1655,29 +1655,29 @@ define i32 @caller_large_scalars_exhausted_regs() {
; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
; ILP32E-WITHFP-NEXT: andi sp, sp, -16
; ILP32E-WITHFP-NEXT: addi a0, sp, 16
+; ILP32E-WITHFP-NEXT: li a1, 9
+; ILP32E-WITHFP-NEXT: addi a2, sp, 40
+; ILP32E-WITHFP-NEXT: li a3, 7
+; ILP32E-WITHFP-NEXT: sw a3, 0(sp)
+; ILP32E-WITHFP-NEXT: sw a2, 4(sp)
+; ILP32E-WITHFP-NEXT: sw a1, 8(sp)
; ILP32E-WITHFP-NEXT: sw a0, 12(sp)
-; ILP32E-WITHFP-NEXT: li a0, 9
-; ILP32E-WITHFP-NEXT: sw a0, 8(sp)
-; ILP32E-WITHFP-NEXT: addi a0, sp, 40
-; ILP32E-WITHFP-NEXT: sw a0, 4(sp)
-; ILP32E-WITHFP-NEXT: li a0, 7
-; ILP32E-WITHFP-NEXT: sw a0, 0(sp)
; ILP32E-WITHFP-NEXT: lui a0, 524272
-; ILP32E-WITHFP-NEXT: sw a0, 28(sp)
-; ILP32E-WITHFP-NEXT: sw zero, 24(sp)
-; ILP32E-WITHFP-NEXT: sw zero, 20(sp)
; ILP32E-WITHFP-NEXT: sw zero, 16(sp)
-; ILP32E-WITHFP-NEXT: sw zero, 52(sp)
-; ILP32E-WITHFP-NEXT: sw zero, 48(sp)
-; ILP32E-WITHFP-NEXT: li a0, 8
-; ILP32E-WITHFP-NEXT: sw a0, 40(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 20(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 24(sp)
+; ILP32E-WITHFP-NEXT: sw a0, 28(sp)
+; ILP32E-WITHFP-NEXT: li a6, 8
; ILP32E-WITHFP-NEXT: li a0, 1
; ILP32E-WITHFP-NEXT: li a1, 2
; ILP32E-WITHFP-NEXT: li a2, 3
; ILP32E-WITHFP-NEXT: li a3, 4
; ILP32E-WITHFP-NEXT: li a4, 5
; ILP32E-WITHFP-NEXT: li a5, 6
+; ILP32E-WITHFP-NEXT: sw a6, 40(sp)
; ILP32E-WITHFP-NEXT: sw zero, 44(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 48(sp)
+; ILP32E-WITHFP-NEXT: sw zero, 52(sp)
; ILP32E-WITHFP-NEXT: call callee_large_scalars_exhausted_regs
; ILP32E-WITHFP-NEXT: addi sp, s0, -64
; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
@@ -1696,29 +1696,29 @@ define i32 @caller_large_scalars_exhausted_regs() {
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 16
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 9
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a2, sp, 40
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 7
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 0(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 4(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 8(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 9
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 8(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 40
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 4(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 7
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 0(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 524272
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 28(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 24(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 20(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 16(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 52(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 48(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 8
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 40(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 20(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 24(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 28(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a6, 8
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 5
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 6
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 40(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 44(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 48(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 52(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalars_exhausted_regs
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -64
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 56
@@ -1735,29 +1735,29 @@ define i32 @caller_large_scalars_exhausted_regs() {
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 16
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 9
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a2, sp, 40
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 7
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 0(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 4(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 8(sp)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 9
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 8(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 40
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 4(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 7
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 0(sp)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 524272
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 28(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 24(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 20(sp)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 16(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 52(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 48(sp)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 8
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 40(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 20(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 24(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 28(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a6, 8
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 5
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 6
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 40(sp)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 44(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 48(sp)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 52(sp)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalars_exhausted_regs
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -64
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, sp, 56
@@ -2034,12 +2034,12 @@ define i32 @caller_large_struct() {
; ILP32E-FPELIM-NEXT: sw ra, 32(sp) # 4-byte Folded Spill
; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4
; ILP32E-FPELIM-NEXT: li a0, 1
-; ILP32E-FPELIM-NEXT: sw a0, 16(sp)
; ILP32E-FPELIM-NEXT: li a1, 2
-; ILP32E-FPELIM-NEXT: sw a1, 20(sp)
; ILP32E-FPELIM-NEXT: li a2, 3
-; ILP32E-FPELIM-NEXT: sw a2, 24(sp)
; ILP32E-FPELIM-NEXT: li a3, 4
+; ILP32E-FPELIM-NEXT: sw a0, 16(sp)
+; ILP32E-FPELIM-NEXT: sw a1, 20(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 24(sp)
; ILP32E-FPELIM-NEXT: sw a3, 28(sp)
; ILP32E-FPELIM-NEXT: sw a0, 0(sp)
; ILP32E-FPELIM-NEXT: sw a1, 4(sp)
@@ -2062,12 +2062,12 @@ define i32 @caller_large_struct() {
; ILP32E-WITHFP-NEXT: addi s0, sp, 40
; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
; ILP32E-WITHFP-NEXT: li a0, 1
-; ILP32E-WITHFP-NEXT: sw a0, -24(s0)
; ILP32E-WITHFP-NEXT: li a1, 2
-; ILP32E-WITHFP-NEXT: sw a1, -20(s0)
; ILP32E-WITHFP-NEXT: li a2, 3
-; ILP32E-WITHFP-NEXT: sw a2, -16(s0)
; ILP32E-WITHFP-NEXT: li a3, 4
+; ILP32E-WITHFP-NEXT: sw a0, -24(s0)
+; ILP32E-WITHFP-NEXT: sw a1, -20(s0)
+; ILP32E-WITHFP-NEXT: sw a2, -16(s0)
; ILP32E-WITHFP-NEXT: sw a3, -12(s0)
; ILP32E-WITHFP-NEXT: sw a0, -40(s0)
; ILP32E-WITHFP-NEXT: sw a1, -36(s0)
@@ -2087,12 +2087,12 @@ define i32 @caller_large_struct() {
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 36
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_offset ra, -4
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 16(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 20(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 24(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 16(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 20(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 24(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 28(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 0(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 4(sp)
@@ -2113,12 +2113,12 @@ define i32 @caller_large_struct() {
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 40
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, -24(s0)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, -20(s0)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, -16(s0)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, -24(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, -20(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, -16(s0)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, -12(s0)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, -40(s0)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, -36(s0)
@@ -2252,10 +2252,10 @@ define fp128 @callee_large_scalar_ret() {
; ILP32E-FPELIM-LABEL: callee_large_scalar_ret:
; ILP32E-FPELIM: # %bb.0:
; ILP32E-FPELIM-NEXT: lui a1, 524272
-; ILP32E-FPELIM-NEXT: sw a1, 12(a0)
-; ILP32E-FPELIM-NEXT: sw zero, 8(a0)
-; ILP32E-FPELIM-NEXT: sw zero, 4(a0)
; ILP32E-FPELIM-NEXT: sw zero, 0(a0)
+; ILP32E-FPELIM-NEXT: sw zero, 4(a0)
+; ILP32E-FPELIM-NEXT: sw zero, 8(a0)
+; ILP32E-FPELIM-NEXT: sw a1, 12(a0)
; ILP32E-FPELIM-NEXT: ret
;
; ILP32E-WITHFP-LABEL: callee_large_scalar_ret:
@@ -2269,10 +2269,10 @@ define fp128 @callee_large_scalar_ret() {
; ILP32E-WITHFP-NEXT: addi s0, sp, 8
; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
; ILP32E-WITHFP-NEXT: lui a1, 524272
-; ILP32E-WITHFP-NEXT: sw a1, 12(a0)
-; ILP32E-WITHFP-NEXT: sw zero, 8(a0)
-; ILP32E-WITHFP-NEXT: sw zero, 4(a0)
; ILP32E-WITHFP-NEXT: sw zero, 0(a0)
+; ILP32E-WITHFP-NEXT: sw zero, 4(a0)
+; ILP32E-WITHFP-NEXT: sw zero, 8(a0)
+; ILP32E-WITHFP-NEXT: sw a1, 12(a0)
; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload
; ILP32E-WITHFP-NEXT: addi sp, sp, 8
@@ -2281,10 +2281,10 @@ define fp128 @callee_large_scalar_ret() {
; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_scalar_ret:
; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a1, 524272
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 12(a0)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(a0)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(a0)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 0(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 12(a0)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret
;
; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_scalar_ret:
@@ -2296,10 +2296,10 @@ define fp128 @callee_large_scalar_ret() {
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a1, 524272
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 12(a0)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(a0)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(a0)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 0(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 12(a0)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
ret fp128 0xL00000000000000007FFF000000000000
}
@@ -2384,13 +2384,13 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result
; ILP32E-FPELIM-LABEL: callee_large_struct_ret:
; ILP32E-FPELIM: # %bb.0:
; ILP32E-FPELIM-NEXT: li a1, 1
+; ILP32E-FPELIM-NEXT: li a2, 2
+; ILP32E-FPELIM-NEXT: li a3, 3
+; ILP32E-FPELIM-NEXT: li a4, 4
; ILP32E-FPELIM-NEXT: sw a1, 0(a0)
-; ILP32E-FPELIM-NEXT: li a1, 2
-; ILP32E-FPELIM-NEXT: sw a1, 4(a0)
-; ILP32E-FPELIM-NEXT: li a1, 3
-; ILP32E-FPELIM-NEXT: sw a1, 8(a0)
-; ILP32E-FPELIM-NEXT: li a1, 4
-; ILP32E-FPELIM-NEXT: sw a1, 12(a0)
+; ILP32E-FPELIM-NEXT: sw a2, 4(a0)
+; ILP32E-FPELIM-NEXT: sw a3, 8(a0)
+; ILP32E-FPELIM-NEXT: sw a4, 12(a0)
; ILP32E-FPELIM-NEXT: ret
;
; ILP32E-WITHFP-LABEL: callee_large_struct_ret:
@@ -2404,13 +2404,13 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result
; ILP32E-WITHFP-NEXT: addi s0, sp, 8
; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
; ILP32E-WITHFP-NEXT: li a1, 1
+; ILP32E-WITHFP-NEXT: li a2, 2
+; ILP32E-WITHFP-NEXT: li a3, 3
+; ILP32E-WITHFP-NEXT: li a4, 4
; ILP32E-WITHFP-NEXT: sw a1, 0(a0)
-; ILP32E-WITHFP-NEXT: li a1, 2
-; ILP32E-WITHFP-NEXT: sw a1, 4(a0)
-; ILP32E-WITHFP-NEXT: li a1, 3
-; ILP32E-WITHFP-NEXT: sw a1, 8(a0)
-; ILP32E-WITHFP-NEXT: li a1, 4
-; ILP32E-WITHFP-NEXT: sw a1, 12(a0)
+; ILP32E-WITHFP-NEXT: sw a2, 4(a0)
+; ILP32E-WITHFP-NEXT: sw a3, 8(a0)
+; ILP32E-WITHFP-NEXT: sw a4, 12(a0)
; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload
; ILP32E-WITHFP-NEXT: addi sp, sp, 8
@@ -2419,13 +2419,13 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result
; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_struct_ret:
; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 2
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 3
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 4
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 0(a0)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 4(a0)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 3
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 8(a0)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 4
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 12(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 4(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 8(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 12(a0)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret
;
; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_large_struct_ret:
@@ -2437,13 +2437,13 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 2
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 3
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 4
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 0(a0)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 4(a0)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 3
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 8(a0)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 4
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 12(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 4(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 8(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 12(a0)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
%a = getelementptr inbounds %struct.large, ptr %agg.result, i32 0, i32 0
store i32 1, ptr %a, align 4
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll
index a0e1b002b7260d..cbd2cef981d71f 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll
@@ -82,8 +82,7 @@ define i32 @caller_many_scalars() nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: li a0, 8
-; RV64I-NEXT: sd a0, 8(sp)
+; RV64I-NEXT: li a4, 8
; RV64I-NEXT: li a0, 1
; RV64I-NEXT: li a1, 2
; RV64I-NEXT: li a2, 3
@@ -92,6 +91,7 @@ define i32 @caller_many_scalars() nounwind {
; RV64I-NEXT: li a6, 6
; RV64I-NEXT: li a7, 7
; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: sd a4, 8(sp)
; RV64I-NEXT: li a4, 0
; RV64I-NEXT: call callee_many_scalars
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -133,18 +133,18 @@ define i64 @caller_large_scalars() nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -80
; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd zero, 24(sp)
-; RV64I-NEXT: sd zero, 16(sp)
-; RV64I-NEXT: sd zero, 8(sp)
; RV64I-NEXT: li a0, 2
; RV64I-NEXT: sd a0, 0(sp)
-; RV64I-NEXT: sd zero, 56(sp)
-; RV64I-NEXT: sd zero, 48(sp)
-; RV64I-NEXT: sd zero, 40(sp)
+; RV64I-NEXT: sd zero, 8(sp)
+; RV64I-NEXT: sd zero, 16(sp)
+; RV64I-NEXT: sd zero, 24(sp)
; RV64I-NEXT: li a2, 1
; RV64I-NEXT: addi a0, sp, 32
; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: sd a2, 32(sp)
+; RV64I-NEXT: sd zero, 40(sp)
+; RV64I-NEXT: sd zero, 48(sp)
+; RV64I-NEXT: sd zero, 56(sp)
; RV64I-NEXT: call callee_large_scalars
; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 80
@@ -189,18 +189,15 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind {
; RV64I-NEXT: addi sp, sp, -96
; RV64I-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
; RV64I-NEXT: addi a0, sp, 16
+; RV64I-NEXT: li a1, 9
+; RV64I-NEXT: sd a1, 0(sp)
; RV64I-NEXT: sd a0, 8(sp)
-; RV64I-NEXT: li a0, 9
-; RV64I-NEXT: sd a0, 0(sp)
-; RV64I-NEXT: sd zero, 40(sp)
-; RV64I-NEXT: sd zero, 32(sp)
-; RV64I-NEXT: sd zero, 24(sp)
; RV64I-NEXT: li a0, 10
; RV64I-NEXT: sd a0, 16(sp)
-; RV64I-NEXT: sd zero, 72(sp)
-; RV64I-NEXT: sd zero, 64(sp)
-; RV64I-NEXT: li a0, 8
-; RV64I-NEXT: sd a0, 48(sp)
+; RV64I-NEXT: sd zero, 24(sp)
+; RV64I-NEXT: sd zero, 32(sp)
+; RV64I-NEXT: sd zero, 40(sp)
+; RV64I-NEXT: li t0, 8
; RV64I-NEXT: li a0, 1
; RV64I-NEXT: li a1, 2
; RV64I-NEXT: li a2, 3
@@ -209,7 +206,10 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind {
; RV64I-NEXT: li a5, 6
; RV64I-NEXT: li a6, 7
; RV64I-NEXT: addi a7, sp, 48
+; RV64I-NEXT: sd t0, 48(sp)
; RV64I-NEXT: sd zero, 56(sp)
+; RV64I-NEXT: sd zero, 64(sp)
+; RV64I-NEXT: sd zero, 72(sp)
; RV64I-NEXT: call callee_large_scalars_exhausted_regs
; RV64I-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 96
@@ -293,12 +293,12 @@ define i64 @caller_large_struct() nounwind {
; RV64I-NEXT: addi sp, sp, -80
; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
; RV64I-NEXT: li a0, 1
-; RV64I-NEXT: sd a0, 40(sp)
; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: sd a1, 48(sp)
; RV64I-NEXT: li a2, 3
-; RV64I-NEXT: sd a2, 56(sp)
; RV64I-NEXT: li a3, 4
+; RV64I-NEXT: sd a0, 40(sp)
+; RV64I-NEXT: sd a1, 48(sp)
+; RV64I-NEXT: sd a2, 56(sp)
; RV64I-NEXT: sd a3, 64(sp)
; RV64I-NEXT: sd a0, 8(sp)
; RV64I-NEXT: sd a1, 16(sp)
@@ -357,15 +357,12 @@ define void @caller_aligned_stack() nounwind {
; RV64I-NEXT: addi sp, sp, -64
; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
; RV64I-NEXT: li a0, 12
+; RV64I-NEXT: li a1, 11
+; RV64I-NEXT: sd a1, 40(sp)
; RV64I-NEXT: sd a0, 48(sp)
-; RV64I-NEXT: li a0, 11
-; RV64I-NEXT: sd a0, 40(sp)
-; RV64I-NEXT: li a0, 10
-; RV64I-NEXT: sd a0, 32(sp)
-; RV64I-NEXT: sd zero, 24(sp)
-; RV64I-NEXT: li a0, 9
-; RV64I-NEXT: sd a0, 16(sp)
-; RV64I-NEXT: li a6, 8
+; RV64I-NEXT: li a6, 10
+; RV64I-NEXT: li t0, 9
+; RV64I-NEXT: li t1, 8
; RV64I-NEXT: li a0, 1
; RV64I-NEXT: li a1, 2
; RV64I-NEXT: li a2, 3
@@ -373,7 +370,10 @@ define void @caller_aligned_stack() nounwind {
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: li a5, 6
; RV64I-NEXT: li a7, 7
-; RV64I-NEXT: sd a6, 0(sp)
+; RV64I-NEXT: sd t1, 0(sp)
+; RV64I-NEXT: sd t0, 16(sp)
+; RV64I-NEXT: sd zero, 24(sp)
+; RV64I-NEXT: sd a6, 32(sp)
; RV64I-NEXT: li a6, 0
; RV64I-NEXT: call callee_aligned_stack
; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
@@ -449,12 +449,12 @@ define i256 @callee_large_scalar_ret() nounwind {
; RV64I-LABEL: callee_large_scalar_ret:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, -1
-; RV64I-NEXT: sd a1, 24(a0)
-; RV64I-NEXT: sd a1, 16(a0)
+; RV64I-NEXT: lui a2, 1018435
+; RV64I-NEXT: addiw a2, a2, 747
+; RV64I-NEXT: sd a2, 0(a0)
; RV64I-NEXT: sd a1, 8(a0)
-; RV64I-NEXT: lui a1, 1018435
-; RV64I-NEXT: addiw a1, a1, 747
-; RV64I-NEXT: sd a1, 0(a0)
+; RV64I-NEXT: sd a1, 16(a0)
+; RV64I-NEXT: sd a1, 24(a0)
; RV64I-NEXT: ret
ret i256 -123456789
}
@@ -478,18 +478,18 @@ define void @caller_large_scalar_ret() nounwind {
define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result) nounwind {
; RV64I-LABEL: callee_large_struct_ret:
; RV64I: # %bb.0:
-; RV64I-NEXT: sw zero, 4(a0)
; RV64I-NEXT: li a1, 1
+; RV64I-NEXT: li a2, 2
; RV64I-NEXT: sw a1, 0(a0)
+; RV64I-NEXT: sw zero, 4(a0)
+; RV64I-NEXT: sw a2, 8(a0)
; RV64I-NEXT: sw zero, 12(a0)
-; RV64I-NEXT: li a1, 2
-; RV64I-NEXT: sw a1, 8(a0)
-; RV64I-NEXT: sw zero, 20(a0)
; RV64I-NEXT: li a1, 3
+; RV64I-NEXT: li a2, 4
; RV64I-NEXT: sw a1, 16(a0)
+; RV64I-NEXT: sw zero, 20(a0)
+; RV64I-NEXT: sw a2, 24(a0)
; RV64I-NEXT: sw zero, 28(a0)
-; RV64I-NEXT: li a1, 4
-; RV64I-NEXT: sw a1, 24(a0)
; RV64I-NEXT: ret
store i64 1, ptr %agg.result, align 4
%b = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 1
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll
index bb2fd593402512..985135a086e247 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll
@@ -114,14 +114,14 @@ define i64 @caller_float_on_stack() nounwind {
; RV64I-LP64E-FPELIM-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-LP64E-FPELIM-NEXT: addi s0, sp, 48
; RV64I-LP64E-FPELIM-NEXT: andi sp, sp, -16
-; RV64I-LP64E-FPELIM-NEXT: lui a0, 264704
-; RV64I-LP64E-FPELIM-NEXT: sd a0, 16(sp)
-; RV64I-LP64E-FPELIM-NEXT: sd zero, 8(sp)
-; RV64I-LP64E-FPELIM-NEXT: li a1, 4
+; RV64I-LP64E-FPELIM-NEXT: lui a1, 264704
+; RV64I-LP64E-FPELIM-NEXT: li a3, 4
; RV64I-LP64E-FPELIM-NEXT: li a0, 1
; RV64I-LP64E-FPELIM-NEXT: li a2, 2
; RV64I-LP64E-FPELIM-NEXT: li a4, 3
-; RV64I-LP64E-FPELIM-NEXT: sd a1, 0(sp)
+; RV64I-LP64E-FPELIM-NEXT: sd a3, 0(sp)
+; RV64I-LP64E-FPELIM-NEXT: sd zero, 8(sp)
+; RV64I-LP64E-FPELIM-NEXT: sd a1, 16(sp)
; RV64I-LP64E-FPELIM-NEXT: li a1, 0
; RV64I-LP64E-FPELIM-NEXT: li a3, 0
; RV64I-LP64E-FPELIM-NEXT: li a5, 0
@@ -139,14 +139,14 @@ define i64 @caller_float_on_stack() nounwind {
; RV64I-LP64E-WITHFP-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-LP64E-WITHFP-NEXT: addi s0, sp, 48
; RV64I-LP64E-WITHFP-NEXT: andi sp, sp, -16
-; RV64I-LP64E-WITHFP-NEXT: lui a0, 264704
-; RV64I-LP64E-WITHFP-NEXT: sd a0, 16(sp)
-; RV64I-LP64E-WITHFP-NEXT: sd zero, 8(sp)
-; RV64I-LP64E-WITHFP-NEXT: li a1, 4
+; RV64I-LP64E-WITHFP-NEXT: lui a1, 264704
+; RV64I-LP64E-WITHFP-NEXT: li a3, 4
; RV64I-LP64E-WITHFP-NEXT: li a0, 1
; RV64I-LP64E-WITHFP-NEXT: li a2, 2
; RV64I-LP64E-WITHFP-NEXT: li a4, 3
-; RV64I-LP64E-WITHFP-NEXT: sd a1, 0(sp)
+; RV64I-LP64E-WITHFP-NEXT: sd a3, 0(sp)
+; RV64I-LP64E-WITHFP-NEXT: sd zero, 8(sp)
+; RV64I-LP64E-WITHFP-NEXT: sd a1, 16(sp)
; RV64I-LP64E-WITHFP-NEXT: li a1, 0
; RV64I-LP64E-WITHFP-NEXT: li a3, 0
; RV64I-LP64E-WITHFP-NEXT: li a5, 0
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll
index 37d9eb6990b0e4..eaba1acffa0547 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll
@@ -32,13 +32,14 @@ define float @caller_onstack_f32_noop(float %a) nounwind {
; RV32IF: # %bb.0:
; RV32IF-NEXT: addi sp, sp, -16
; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32IF-NEXT: sw a0, 4(sp)
-; RV32IF-NEXT: lui a1, 264704
+; RV32IF-NEXT: mv a1, a0
+; RV32IF-NEXT: lui a3, 264704
; RV32IF-NEXT: li a0, 1
; RV32IF-NEXT: li a2, 2
; RV32IF-NEXT: li a4, 3
; RV32IF-NEXT: li a6, 4
-; RV32IF-NEXT: sw a1, 0(sp)
+; RV32IF-NEXT: sw a3, 0(sp)
+; RV32IF-NEXT: sw a1, 4(sp)
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: li a3, 0
; RV32IF-NEXT: li a5, 0
@@ -60,12 +61,12 @@ define float @caller_onstack_f32_fadd(float %a, float %b) nounwind {
; RV32IF-NEXT: fmv.w.x fa4, a0
; RV32IF-NEXT: fadd.s fa3, fa4, fa5
; RV32IF-NEXT: fsub.s fa5, fa5, fa4
-; RV32IF-NEXT: fsw fa5, 4(sp)
; RV32IF-NEXT: li a0, 1
; RV32IF-NEXT: li a2, 2
; RV32IF-NEXT: li a4, 3
; RV32IF-NEXT: li a6, 4
; RV32IF-NEXT: fsw fa3, 0(sp)
+; RV32IF-NEXT: fsw fa5, 4(sp)
; RV32IF-NEXT: li a1, 0
; RV32IF-NEXT: li a3, 0
; RV32IF-NEXT: li a5, 0
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll
index d3530a4341330d..63d4ea5fee3313 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll
@@ -32,15 +32,16 @@ define float @caller_onstack_f32_noop(float %a) nounwind {
; RV32IF-ILP32E: # %bb.0:
; RV32IF-ILP32E-NEXT: addi sp, sp, -20
; RV32IF-ILP32E-NEXT: sw ra, 16(sp) # 4-byte Folded Spill
-; RV32IF-ILP32E-NEXT: sw a0, 12(sp)
-; RV32IF-ILP32E-NEXT: lui a0, 264704
-; RV32IF-ILP32E-NEXT: sw a0, 8(sp)
-; RV32IF-ILP32E-NEXT: sw zero, 4(sp)
-; RV32IF-ILP32E-NEXT: li a1, 4
+; RV32IF-ILP32E-NEXT: mv a1, a0
+; RV32IF-ILP32E-NEXT: lui a3, 264704
+; RV32IF-ILP32E-NEXT: li a5, 4
; RV32IF-ILP32E-NEXT: li a0, 1
; RV32IF-ILP32E-NEXT: li a2, 2
; RV32IF-ILP32E-NEXT: li a4, 3
-; RV32IF-ILP32E-NEXT: sw a1, 0(sp)
+; RV32IF-ILP32E-NEXT: sw a5, 0(sp)
+; RV32IF-ILP32E-NEXT: sw zero, 4(sp)
+; RV32IF-ILP32E-NEXT: sw a3, 8(sp)
+; RV32IF-ILP32E-NEXT: sw a1, 12(sp)
; RV32IF-ILP32E-NEXT: li a1, 0
; RV32IF-ILP32E-NEXT: li a3, 0
; RV32IF-ILP32E-NEXT: li a5, 0
@@ -61,14 +62,14 @@ define float @caller_onstack_f32_fadd(float %a, float %b) nounwind {
; RV32IF-ILP32E-NEXT: fmv.w.x fa4, a0
; RV32IF-ILP32E-NEXT: fadd.s fa3, fa4, fa5
; RV32IF-ILP32E-NEXT: fsub.s fa5, fa5, fa4
-; RV32IF-ILP32E-NEXT: sw zero, 4(sp)
-; RV32IF-ILP32E-NEXT: li a0, 4
-; RV32IF-ILP32E-NEXT: sw a0, 0(sp)
-; RV32IF-ILP32E-NEXT: fsw fa5, 12(sp)
+; RV32IF-ILP32E-NEXT: li a1, 4
; RV32IF-ILP32E-NEXT: li a0, 1
; RV32IF-ILP32E-NEXT: li a2, 2
; RV32IF-ILP32E-NEXT: li a4, 3
+; RV32IF-ILP32E-NEXT: sw a1, 0(sp)
+; RV32IF-ILP32E-NEXT: sw zero, 4(sp)
; RV32IF-ILP32E-NEXT: fsw fa3, 8(sp)
+; RV32IF-ILP32E-NEXT: fsw fa5, 12(sp)
; RV32IF-ILP32E-NEXT: li a1, 0
; RV32IF-ILP32E-NEXT: li a3, 0
; RV32IF-ILP32E-NEXT: li a5, 0
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-vector-float.ll b/llvm/test/CodeGen/RISCV/calling-conv-vector-float.ll
index 8ebe9b6301c44d..4153cad1ae8814 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-vector-float.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-vector-float.ll
@@ -41,10 +41,10 @@ define <4 x float> @callee_v4f32(<4 x float> %x, <4 x float> %y) {
; RV64-NEXT: fadd.s fa2, fa1, fa2
; RV64-NEXT: fadd.s fa4, fa3, fa4
; RV64-NEXT: fadd.s fa5, fa5, ft1
-; RV64-NEXT: fsw fa5, 12(a0)
-; RV64-NEXT: fsw fa4, 8(a0)
-; RV64-NEXT: fsw fa2, 4(a0)
; RV64-NEXT: fsw fa0, 0(a0)
+; RV64-NEXT: fsw fa2, 4(a0)
+; RV64-NEXT: fsw fa4, 8(a0)
+; RV64-NEXT: fsw fa5, 12(a0)
; RV64-NEXT: ret
;
; RV64LP64F-LABEL: callee_v4f32:
@@ -53,10 +53,10 @@ define <4 x float> @callee_v4f32(<4 x float> %x, <4 x float> %y) {
; RV64LP64F-NEXT: fadd.s fa5, fa1, fa5
; RV64LP64F-NEXT: fadd.s fa2, fa2, fa6
; RV64LP64F-NEXT: fadd.s fa3, fa3, fa7
-; RV64LP64F-NEXT: fsw fa3, 12(a0)
-; RV64LP64F-NEXT: fsw fa2, 8(a0)
-; RV64LP64F-NEXT: fsw fa5, 4(a0)
; RV64LP64F-NEXT: fsw fa4, 0(a0)
+; RV64LP64F-NEXT: fsw fa5, 4(a0)
+; RV64LP64F-NEXT: fsw fa2, 8(a0)
+; RV64LP64F-NEXT: fsw fa3, 12(a0)
; RV64LP64F-NEXT: ret
%z = fadd <4 x float> %x, %y
ret <4 x float> %z
diff --git a/llvm/test/CodeGen/RISCV/calls.ll b/llvm/test/CodeGen/RISCV/calls.ll
index f18bbb4ed84ee3..cf0e625f3c6c78 100644
--- a/llvm/test/CodeGen/RISCV/calls.ll
+++ b/llvm/test/CodeGen/RISCV/calls.ll
@@ -566,8 +566,8 @@ define i32 @test_call_external_many_args(i32 %a) nounwind {
; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; CHECK-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; CHECK-NEXT: mv s0, a0
-; CHECK-NEXT: sw a0, 4(sp)
; CHECK-NEXT: sw a0, 0(sp)
+; CHECK-NEXT: sw a0, 4(sp)
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: mv a2, a0
; CHECK-NEXT: mv a3, a0
@@ -588,8 +588,8 @@ define i32 @test_call_external_many_args(i32 %a) nounwind {
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: sd a0, 8(sp)
; RV64I-NEXT: sd a0, 0(sp)
+; RV64I-NEXT: sd a0, 8(sp)
; RV64I-NEXT: mv a1, a0
; RV64I-NEXT: mv a2, a0
; RV64I-NEXT: mv a3, a0
@@ -610,8 +610,8 @@ define i32 @test_call_external_many_args(i32 %a) nounwind {
; RV64I-SMALL-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-SMALL-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-SMALL-NEXT: mv s0, a0
-; RV64I-SMALL-NEXT: sd a0, 8(sp)
; RV64I-SMALL-NEXT: sd a0, 0(sp)
+; RV64I-SMALL-NEXT: sd a0, 8(sp)
; RV64I-SMALL-NEXT: mv a1, a0
; RV64I-SMALL-NEXT: mv a2, a0
; RV64I-SMALL-NEXT: mv a3, a0
@@ -632,8 +632,8 @@ define i32 @test_call_external_many_args(i32 %a) nounwind {
; RV64I-MEDIUM-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-MEDIUM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-MEDIUM-NEXT: mv s0, a0
-; RV64I-MEDIUM-NEXT: sd a0, 8(sp)
; RV64I-MEDIUM-NEXT: sd a0, 0(sp)
+; RV64I-MEDIUM-NEXT: sd a0, 8(sp)
; RV64I-MEDIUM-NEXT: mv a1, a0
; RV64I-MEDIUM-NEXT: mv a2, a0
; RV64I-MEDIUM-NEXT: mv a3, a0
@@ -657,8 +657,8 @@ define i32 @test_call_external_many_args(i32 %a) nounwind {
; RV64I-LARGE-NEXT: .Lpcrel_hi4:
; RV64I-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI8_0)
; RV64I-LARGE-NEXT: ld t1, %pcrel_lo(.Lpcrel_hi4)(a0)
-; RV64I-LARGE-NEXT: sd s0, 8(sp)
; RV64I-LARGE-NEXT: sd s0, 0(sp)
+; RV64I-LARGE-NEXT: sd s0, 8(sp)
; RV64I-LARGE-NEXT: mv a0, s0
; RV64I-LARGE-NEXT: mv a1, s0
; RV64I-LARGE-NEXT: mv a2, s0
@@ -684,8 +684,8 @@ define i32 @test_call_external_many_args(i32 %a) nounwind {
; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi4:
; RV64I-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI8_0)
; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi4)(a0)
-; RV64I-LARGE-ZICFILP-NEXT: sd s0, 8(sp)
; RV64I-LARGE-ZICFILP-NEXT: sd s0, 0(sp)
+; RV64I-LARGE-ZICFILP-NEXT: sd s0, 8(sp)
; RV64I-LARGE-ZICFILP-NEXT: mv a0, s0
; RV64I-LARGE-ZICFILP-NEXT: mv a1, s0
; RV64I-LARGE-ZICFILP-NEXT: mv a2, s0
@@ -751,8 +751,8 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw a0, 4(sp)
; CHECK-NEXT: sw a0, 0(sp)
+; CHECK-NEXT: sw a0, 4(sp)
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: mv a2, a0
; CHECK-NEXT: mv a3, a0
@@ -769,8 +769,8 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd a0, 8(sp)
; RV64I-NEXT: sd a0, 0(sp)
+; RV64I-NEXT: sd a0, 8(sp)
; RV64I-NEXT: mv a1, a0
; RV64I-NEXT: mv a2, a0
; RV64I-NEXT: mv a3, a0
@@ -787,8 +787,8 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind {
; RV64I-SMALL: # %bb.0:
; RV64I-SMALL-NEXT: addi sp, sp, -32
; RV64I-SMALL-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-SMALL-NEXT: sd a0, 8(sp)
; RV64I-SMALL-NEXT: sd a0, 0(sp)
+; RV64I-SMALL-NEXT: sd a0, 8(sp)
; RV64I-SMALL-NEXT: mv a1, a0
; RV64I-SMALL-NEXT: mv a2, a0
; RV64I-SMALL-NEXT: mv a3, a0
@@ -805,8 +805,8 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind {
; RV64I-MEDIUM: # %bb.0:
; RV64I-MEDIUM-NEXT: addi sp, sp, -32
; RV64I-MEDIUM-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-MEDIUM-NEXT: sd a0, 8(sp)
; RV64I-MEDIUM-NEXT: sd a0, 0(sp)
+; RV64I-MEDIUM-NEXT: sd a0, 8(sp)
; RV64I-MEDIUM-NEXT: mv a1, a0
; RV64I-MEDIUM-NEXT: mv a2, a0
; RV64I-MEDIUM-NEXT: mv a3, a0
@@ -826,8 +826,8 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind {
; RV64I-LARGE-NEXT: .Lpcrel_hi5:
; RV64I-LARGE-NEXT: auipc a1, %pcrel_hi(.LCPI10_0)
; RV64I-LARGE-NEXT: ld t1, %pcrel_lo(.Lpcrel_hi5)(a1)
-; RV64I-LARGE-NEXT: sd a0, 8(sp)
; RV64I-LARGE-NEXT: sd a0, 0(sp)
+; RV64I-LARGE-NEXT: sd a0, 8(sp)
; RV64I-LARGE-NEXT: mv a1, a0
; RV64I-LARGE-NEXT: mv a2, a0
; RV64I-LARGE-NEXT: mv a3, a0
@@ -848,8 +848,8 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind {
; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi5:
; RV64I-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI10_0)
; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi5)(a1)
-; RV64I-LARGE-ZICFILP-NEXT: sd a0, 8(sp)
; RV64I-LARGE-ZICFILP-NEXT: sd a0, 0(sp)
+; RV64I-LARGE-ZICFILP-NEXT: sd a0, 8(sp)
; RV64I-LARGE-ZICFILP-NEXT: mv a1, a0
; RV64I-LARGE-ZICFILP-NEXT: mv a2, a0
; RV64I-LARGE-ZICFILP-NEXT: mv a3, a0
diff --git a/llvm/test/CodeGen/RISCV/double-calling-conv.ll b/llvm/test/CodeGen/RISCV/double-calling-conv.ll
index 57aaa4c9f74e46..b9e80dccd97b9a 100644
--- a/llvm/test/CodeGen/RISCV/double-calling-conv.ll
+++ b/llvm/test/CodeGen/RISCV/double-calling-conv.ll
@@ -181,19 +181,19 @@ define double @caller_double_stack() nounwind {
; RV32IFD-NEXT: addi sp, sp, -32
; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: lui a0, 262510
-; RV32IFD-NEXT: addi a0, a0, 327
-; RV32IFD-NEXT: sw a0, 4(sp)
+; RV32IFD-NEXT: addi a1, a0, 327
; RV32IFD-NEXT: lui a0, 713032
-; RV32IFD-NEXT: addi a1, a0, -1311
-; RV32IFD-NEXT: sw a1, 0(sp)
+; RV32IFD-NEXT: addi a3, a0, -1311
; RV32IFD-NEXT: lui a0, 262574
-; RV32IFD-NEXT: addi a0, a0, 327
-; RV32IFD-NEXT: sw a0, 12(sp)
+; RV32IFD-NEXT: addi a5, a0, 327
; RV32IFD-NEXT: li a0, 1
; RV32IFD-NEXT: li a2, 2
; RV32IFD-NEXT: li a4, 3
; RV32IFD-NEXT: li a6, 4
-; RV32IFD-NEXT: sw a1, 8(sp)
+; RV32IFD-NEXT: sw a3, 0(sp)
+; RV32IFD-NEXT: sw a1, 4(sp)
+; RV32IFD-NEXT: sw a3, 8(sp)
+; RV32IFD-NEXT: sw a5, 12(sp)
; RV32IFD-NEXT: li a1, 0
; RV32IFD-NEXT: li a3, 0
; RV32IFD-NEXT: li a5, 0
@@ -208,19 +208,19 @@ define double @caller_double_stack() nounwind {
; RV32IZFINXZDINX-NEXT: addi sp, sp, -32
; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: lui a0, 262510
-; RV32IZFINXZDINX-NEXT: addi a0, a0, 327
-; RV32IZFINXZDINX-NEXT: sw a0, 4(sp)
+; RV32IZFINXZDINX-NEXT: addi a1, a0, 327
; RV32IZFINXZDINX-NEXT: lui a0, 713032
-; RV32IZFINXZDINX-NEXT: addi a1, a0, -1311
-; RV32IZFINXZDINX-NEXT: sw a1, 0(sp)
+; RV32IZFINXZDINX-NEXT: addi a3, a0, -1311
; RV32IZFINXZDINX-NEXT: lui a0, 262574
-; RV32IZFINXZDINX-NEXT: addi a0, a0, 327
-; RV32IZFINXZDINX-NEXT: sw a0, 12(sp)
+; RV32IZFINXZDINX-NEXT: addi a5, a0, 327
; RV32IZFINXZDINX-NEXT: li a0, 1
; RV32IZFINXZDINX-NEXT: li a2, 2
; RV32IZFINXZDINX-NEXT: li a4, 3
; RV32IZFINXZDINX-NEXT: li a6, 4
-; RV32IZFINXZDINX-NEXT: sw a1, 8(sp)
+; RV32IZFINXZDINX-NEXT: sw a3, 0(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 4(sp)
+; RV32IZFINXZDINX-NEXT: sw a3, 8(sp)
+; RV32IZFINXZDINX-NEXT: sw a5, 12(sp)
; RV32IZFINXZDINX-NEXT: li a1, 0
; RV32IZFINXZDINX-NEXT: li a3, 0
; RV32IZFINXZDINX-NEXT: li a5, 0
diff --git a/llvm/test/CodeGen/RISCV/double-convert-strict.ll b/llvm/test/CodeGen/RISCV/double-convert-strict.ll
index 3732978b8bd83e..2b1ec10fcaf17e 100644
--- a/llvm/test/CodeGen/RISCV/double-convert-strict.ll
+++ b/llvm/test/CodeGen/RISCV/double-convert-strict.ll
@@ -792,8 +792,8 @@ define signext i32 @fcvt_d_w_demanded_bits(i32 signext %0, ptr %1) nounwind stri
; RV32I-NEXT: addi s1, a0, 1
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __floatsidf
-; RV32I-NEXT: sw a1, 4(s0)
; RV32I-NEXT: sw a0, 0(s0)
+; RV32I-NEXT: sw a1, 4(s0)
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -865,8 +865,8 @@ define signext i32 @fcvt_d_wu_demanded_bits(i32 signext %0, ptr %1) nounwind str
; RV32I-NEXT: addi s1, a0, 1
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __floatunsidf
-; RV32I-NEXT: sw a1, 4(s0)
; RV32I-NEXT: sw a0, 0(s0)
+; RV32I-NEXT: sw a1, 4(s0)
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll
index ef2d8e7627be54..a8b141618bbb3a 100644
--- a/llvm/test/CodeGen/RISCV/double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/double-convert.ll
@@ -1474,8 +1474,8 @@ define signext i32 @fcvt_d_w_demanded_bits(i32 signext %0, ptr %1) nounwind {
; RV32I-NEXT: addi s1, a0, 1
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __floatsidf
-; RV32I-NEXT: sw a1, 4(s0)
; RV32I-NEXT: sw a0, 0(s0)
+; RV32I-NEXT: sw a1, 4(s0)
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -1547,8 +1547,8 @@ define signext i32 @fcvt_d_wu_demanded_bits(i32 signext %0, ptr %1) nounwind {
; RV32I-NEXT: addi s1, a0, 1
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __floatunsidf
-; RV32I-NEXT: sw a1, 4(s0)
; RV32I-NEXT: sw a0, 0(s0)
+; RV32I-NEXT: sw a1, 4(s0)
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/fastcc-bf16.ll b/llvm/test/CodeGen/RISCV/fastcc-bf16.ll
index 493bc63992547e..17356116081fff 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-bf16.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-bf16.ll
@@ -50,18 +50,18 @@ define bfloat @caller(<32 x bfloat> %A) nounwind {
; CHECK-NEXT: fmv.h.x fa5, a5
; CHECK-NEXT: fmv.h.x fa6, a6
; CHECK-NEXT: fmv.h.x fa7, a7
-; CHECK-NEXT: fsh fs11, 22(sp)
-; CHECK-NEXT: fsh fs10, 20(sp)
-; CHECK-NEXT: fsh fs9, 18(sp)
; CHECK-NEXT: fsh fs8, 16(sp)
-; CHECK-NEXT: fsh fs7, 14(sp)
-; CHECK-NEXT: fsh fs6, 12(sp)
-; CHECK-NEXT: fsh fs5, 10(sp)
+; CHECK-NEXT: fsh fs9, 18(sp)
+; CHECK-NEXT: fsh fs10, 20(sp)
+; CHECK-NEXT: fsh fs11, 22(sp)
; CHECK-NEXT: fsh fs4, 8(sp)
-; CHECK-NEXT: fsh fs3, 6(sp)
-; CHECK-NEXT: fsh fs2, 4(sp)
-; CHECK-NEXT: fsh fs1, 2(sp)
+; CHECK-NEXT: fsh fs5, 10(sp)
+; CHECK-NEXT: fsh fs6, 12(sp)
+; CHECK-NEXT: fsh fs7, 14(sp)
; CHECK-NEXT: fsh fs0, 0(sp)
+; CHECK-NEXT: fsh fs1, 2(sp)
+; CHECK-NEXT: fsh fs2, 4(sp)
+; CHECK-NEXT: fsh fs3, 6(sp)
; CHECK-NEXT: call callee
; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 32
diff --git a/llvm/test/CodeGen/RISCV/fastcc-float.ll b/llvm/test/CodeGen/RISCV/fastcc-float.ll
index 488c97d5a4506c..237a72d983de4a 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-float.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-float.ll
@@ -50,18 +50,18 @@ define float @caller(<32 x float> %A) nounwind {
; CHECK-NEXT: fmv.w.x fa5, a5
; CHECK-NEXT: fmv.w.x fa6, a6
; CHECK-NEXT: fmv.w.x fa7, a7
-; CHECK-NEXT: fsw fs11, 44(sp)
-; CHECK-NEXT: fsw fs10, 40(sp)
-; CHECK-NEXT: fsw fs9, 36(sp)
; CHECK-NEXT: fsw fs8, 32(sp)
-; CHECK-NEXT: fsw fs7, 28(sp)
-; CHECK-NEXT: fsw fs6, 24(sp)
-; CHECK-NEXT: fsw fs5, 20(sp)
+; CHECK-NEXT: fsw fs9, 36(sp)
+; CHECK-NEXT: fsw fs10, 40(sp)
+; CHECK-NEXT: fsw fs11, 44(sp)
; CHECK-NEXT: fsw fs4, 16(sp)
-; CHECK-NEXT: fsw fs3, 12(sp)
-; CHECK-NEXT: fsw fs2, 8(sp)
-; CHECK-NEXT: fsw fs1, 4(sp)
+; CHECK-NEXT: fsw fs5, 20(sp)
+; CHECK-NEXT: fsw fs6, 24(sp)
+; CHECK-NEXT: fsw fs7, 28(sp)
; CHECK-NEXT: fsw fs0, 0(sp)
+; CHECK-NEXT: fsw fs1, 4(sp)
+; CHECK-NEXT: fsw fs2, 8(sp)
+; CHECK-NEXT: fsw fs3, 12(sp)
; CHECK-NEXT: call callee
; CHECK-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 64
diff --git a/llvm/test/CodeGen/RISCV/fastcc-half.ll b/llvm/test/CodeGen/RISCV/fastcc-half.ll
index 499b91b9d81c35..bf8d4e8dcb98c4 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-half.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-half.ll
@@ -50,18 +50,18 @@ define half @caller(<32 x half> %A) nounwind {
; CHECK-NEXT: fmv.h.x fa5, a5
; CHECK-NEXT: fmv.h.x fa6, a6
; CHECK-NEXT: fmv.h.x fa7, a7
-; CHECK-NEXT: fsh fs11, 22(sp)
-; CHECK-NEXT: fsh fs10, 20(sp)
-; CHECK-NEXT: fsh fs9, 18(sp)
; CHECK-NEXT: fsh fs8, 16(sp)
-; CHECK-NEXT: fsh fs7, 14(sp)
-; CHECK-NEXT: fsh fs6, 12(sp)
-; CHECK-NEXT: fsh fs5, 10(sp)
+; CHECK-NEXT: fsh fs9, 18(sp)
+; CHECK-NEXT: fsh fs10, 20(sp)
+; CHECK-NEXT: fsh fs11, 22(sp)
; CHECK-NEXT: fsh fs4, 8(sp)
-; CHECK-NEXT: fsh fs3, 6(sp)
-; CHECK-NEXT: fsh fs2, 4(sp)
-; CHECK-NEXT: fsh fs1, 2(sp)
+; CHECK-NEXT: fsh fs5, 10(sp)
+; CHECK-NEXT: fsh fs6, 12(sp)
+; CHECK-NEXT: fsh fs7, 14(sp)
; CHECK-NEXT: fsh fs0, 0(sp)
+; CHECK-NEXT: fsh fs1, 2(sp)
+; CHECK-NEXT: fsh fs2, 4(sp)
+; CHECK-NEXT: fsh fs3, 6(sp)
; CHECK-NEXT: call callee
; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 32
diff --git a/llvm/test/CodeGen/RISCV/fastcc-int.ll b/llvm/test/CodeGen/RISCV/fastcc-int.ll
index 75046b701b2352..3570ece269609f 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-int.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-int.ll
@@ -40,10 +40,10 @@ define i32 @caller(<16 x i32> %A) nounwind {
; RV32-NEXT: lw t2, 52(a0)
; RV32-NEXT: lw s0, 56(a0)
; RV32-NEXT: lw a0, 60(a0)
-; RV32-NEXT: sw a0, 12(sp)
-; RV32-NEXT: sw s0, 8(sp)
-; RV32-NEXT: sw t2, 4(sp)
; RV32-NEXT: sw t1, 0(sp)
+; RV32-NEXT: sw t2, 4(sp)
+; RV32-NEXT: sw s0, 8(sp)
+; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: mv a0, t0
; RV32-NEXT: call callee
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -72,10 +72,10 @@ define i32 @caller(<16 x i32> %A) nounwind {
; RV64-NEXT: ld t2, 104(a0)
; RV64-NEXT: ld s0, 112(a0)
; RV64-NEXT: ld a0, 120(a0)
-; RV64-NEXT: sd a0, 24(sp)
-; RV64-NEXT: sd s0, 16(sp)
-; RV64-NEXT: sd t2, 8(sp)
; RV64-NEXT: sd t1, 0(sp)
+; RV64-NEXT: sd t2, 8(sp)
+; RV64-NEXT: sd s0, 16(sp)
+; RV64-NEXT: sd a0, 24(sp)
; RV64-NEXT: mv a0, t0
; RV64-NEXT: call callee
; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
index 0eefc34ad552a9..7523119c4ff778 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
@@ -289,8 +289,8 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZHINX32-NEXT: lh t0, 124(sp)
; ZHINX32-NEXT: sh t0, 52(sp) # 2-byte Folded Spill
; ZHINX32-NEXT: lh t6, 128(sp)
-; ZHINX32-NEXT: lh t5, 132(sp)
-; ZHINX32-NEXT: lh t4, 136(sp)
+; ZHINX32-NEXT: lh t4, 132(sp)
+; ZHINX32-NEXT: lh t5, 136(sp)
; ZHINX32-NEXT: lh s0, 140(sp)
; ZHINX32-NEXT: lh s1, 144(sp)
; ZHINX32-NEXT: lh s2, 148(sp)
@@ -304,30 +304,30 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZHINX32-NEXT: lh s10, 180(sp)
; ZHINX32-NEXT: lh s11, 184(sp)
; ZHINX32-NEXT: lh ra, 188(sp)
-; ZHINX32-NEXT: lh t3, 192(sp)
-; ZHINX32-NEXT: lh t2, 196(sp)
-; ZHINX32-NEXT: lh t1, 200(sp)
-; ZHINX32-NEXT: lh t0, 204(sp)
-; ZHINX32-NEXT: sh t0, 38(sp)
-; ZHINX32-NEXT: sh t1, 36(sp)
-; ZHINX32-NEXT: sh t2, 34(sp)
-; ZHINX32-NEXT: sh t3, 32(sp)
-; ZHINX32-NEXT: sh ra, 30(sp)
-; ZHINX32-NEXT: sh s11, 28(sp)
-; ZHINX32-NEXT: sh s10, 26(sp)
+; ZHINX32-NEXT: lh t0, 192(sp)
+; ZHINX32-NEXT: lh t1, 196(sp)
+; ZHINX32-NEXT: lh t2, 200(sp)
+; ZHINX32-NEXT: lh t3, 204(sp)
+; ZHINX32-NEXT: sh t0, 32(sp)
+; ZHINX32-NEXT: sh t1, 34(sp)
+; ZHINX32-NEXT: sh t2, 36(sp)
+; ZHINX32-NEXT: sh t3, 38(sp)
; ZHINX32-NEXT: sh s9, 24(sp)
-; ZHINX32-NEXT: sh s8, 22(sp)
-; ZHINX32-NEXT: sh s7, 20(sp)
-; ZHINX32-NEXT: sh s6, 18(sp)
+; ZHINX32-NEXT: sh s10, 26(sp)
+; ZHINX32-NEXT: sh s11, 28(sp)
+; ZHINX32-NEXT: sh ra, 30(sp)
; ZHINX32-NEXT: sh s5, 16(sp)
-; ZHINX32-NEXT: sh s4, 14(sp)
-; ZHINX32-NEXT: sh s3, 12(sp)
-; ZHINX32-NEXT: sh s2, 10(sp)
+; ZHINX32-NEXT: sh s6, 18(sp)
+; ZHINX32-NEXT: sh s7, 20(sp)
+; ZHINX32-NEXT: sh s8, 22(sp)
; ZHINX32-NEXT: sh s1, 8(sp)
-; ZHINX32-NEXT: sh s0, 6(sp)
-; ZHINX32-NEXT: sh t4, 4(sp)
-; ZHINX32-NEXT: sh t5, 2(sp)
+; ZHINX32-NEXT: sh s2, 10(sp)
+; ZHINX32-NEXT: sh s3, 12(sp)
+; ZHINX32-NEXT: sh s4, 14(sp)
; ZHINX32-NEXT: sh t6, 0(sp)
+; ZHINX32-NEXT: sh t4, 2(sp)
+; ZHINX32-NEXT: sh t5, 4(sp)
+; ZHINX32-NEXT: sh s0, 6(sp)
; ZHINX32-NEXT: lh t3, 58(sp) # 2-byte Folded Reload
; ZHINX32-NEXT: lh t4, 56(sp) # 2-byte Folded Reload
; ZHINX32-NEXT: lh t5, 54(sp) # 2-byte Folded Reload
@@ -374,8 +374,8 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZHINX64-NEXT: lh t0, 184(sp)
; ZHINX64-NEXT: sh t0, 48(sp) # 2-byte Folded Spill
; ZHINX64-NEXT: lh t6, 192(sp)
-; ZHINX64-NEXT: lh t5, 200(sp)
-; ZHINX64-NEXT: lh t4, 208(sp)
+; ZHINX64-NEXT: lh t4, 200(sp)
+; ZHINX64-NEXT: lh t5, 208(sp)
; ZHINX64-NEXT: lh s0, 216(sp)
; ZHINX64-NEXT: lh s1, 224(sp)
; ZHINX64-NEXT: lh s2, 232(sp)
@@ -389,30 +389,30 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZHINX64-NEXT: lh s10, 296(sp)
; ZHINX64-NEXT: lh s11, 304(sp)
; ZHINX64-NEXT: lh ra, 312(sp)
-; ZHINX64-NEXT: lh t3, 320(sp)
-; ZHINX64-NEXT: lh t2, 328(sp)
-; ZHINX64-NEXT: lh t1, 336(sp)
-; ZHINX64-NEXT: lh t0, 344(sp)
-; ZHINX64-NEXT: sh t0, 38(sp)
-; ZHINX64-NEXT: sh t1, 36(sp)
-; ZHINX64-NEXT: sh t2, 34(sp)
-; ZHINX64-NEXT: sh t3, 32(sp)
-; ZHINX64-NEXT: sh ra, 30(sp)
-; ZHINX64-NEXT: sh s11, 28(sp)
-; ZHINX64-NEXT: sh s10, 26(sp)
+; ZHINX64-NEXT: lh t0, 320(sp)
+; ZHINX64-NEXT: lh t1, 328(sp)
+; ZHINX64-NEXT: lh t2, 336(sp)
+; ZHINX64-NEXT: lh t3, 344(sp)
+; ZHINX64-NEXT: sh t0, 32(sp)
+; ZHINX64-NEXT: sh t1, 34(sp)
+; ZHINX64-NEXT: sh t2, 36(sp)
+; ZHINX64-NEXT: sh t3, 38(sp)
; ZHINX64-NEXT: sh s9, 24(sp)
-; ZHINX64-NEXT: sh s8, 22(sp)
-; ZHINX64-NEXT: sh s7, 20(sp)
-; ZHINX64-NEXT: sh s6, 18(sp)
+; ZHINX64-NEXT: sh s10, 26(sp)
+; ZHINX64-NEXT: sh s11, 28(sp)
+; ZHINX64-NEXT: sh ra, 30(sp)
; ZHINX64-NEXT: sh s5, 16(sp)
-; ZHINX64-NEXT: sh s4, 14(sp)
-; ZHINX64-NEXT: sh s3, 12(sp)
-; ZHINX64-NEXT: sh s2, 10(sp)
+; ZHINX64-NEXT: sh s6, 18(sp)
+; ZHINX64-NEXT: sh s7, 20(sp)
+; ZHINX64-NEXT: sh s8, 22(sp)
; ZHINX64-NEXT: sh s1, 8(sp)
-; ZHINX64-NEXT: sh s0, 6(sp)
-; ZHINX64-NEXT: sh t4, 4(sp)
-; ZHINX64-NEXT: sh t5, 2(sp)
+; ZHINX64-NEXT: sh s2, 10(sp)
+; ZHINX64-NEXT: sh s3, 12(sp)
+; ZHINX64-NEXT: sh s4, 14(sp)
; ZHINX64-NEXT: sh t6, 0(sp)
+; ZHINX64-NEXT: sh t4, 2(sp)
+; ZHINX64-NEXT: sh t5, 4(sp)
+; ZHINX64-NEXT: sh s0, 6(sp)
; ZHINX64-NEXT: lh t3, 54(sp) # 2-byte Folded Reload
; ZHINX64-NEXT: lh t4, 52(sp) # 2-byte Folded Reload
; ZHINX64-NEXT: lh t5, 50(sp) # 2-byte Folded Reload
@@ -450,67 +450,67 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZFINX32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill
; ZFINX32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill
; ZFINX32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: lw t0, 0(a0)
-; ZFINX32-NEXT: lw a1, 4(a0)
-; ZFINX32-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: lw a1, 8(a0)
-; ZFINX32-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: lw a1, 12(a0)
-; ZFINX32-NEXT: sw a1, 96(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: lw a1, 16(a0)
-; ZFINX32-NEXT: sw a1, 92(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: lw a5, 20(a0)
-; ZFINX32-NEXT: lw a6, 24(a0)
-; ZFINX32-NEXT: lw a7, 28(a0)
-; ZFINX32-NEXT: lw t3, 32(a0)
-; ZFINX32-NEXT: lw t4, 36(a0)
-; ZFINX32-NEXT: lw t5, 40(a0)
-; ZFINX32-NEXT: lw t6, 44(a0)
-; ZFINX32-NEXT: lw t1, 48(a0)
-; ZFINX32-NEXT: lw t2, 52(a0)
-; ZFINX32-NEXT: lw s0, 56(a0)
-; ZFINX32-NEXT: lw s1, 60(a0)
-; ZFINX32-NEXT: lw s2, 64(a0)
-; ZFINX32-NEXT: lw s3, 68(a0)
-; ZFINX32-NEXT: lw s4, 72(a0)
-; ZFINX32-NEXT: lw s5, 76(a0)
-; ZFINX32-NEXT: lw s6, 80(a0)
-; ZFINX32-NEXT: lw s7, 84(a0)
-; ZFINX32-NEXT: lw s8, 88(a0)
-; ZFINX32-NEXT: lw s9, 92(a0)
-; ZFINX32-NEXT: lw s10, 96(a0)
-; ZFINX32-NEXT: lw s11, 100(a0)
-; ZFINX32-NEXT: lw ra, 104(a0)
-; ZFINX32-NEXT: lw a4, 108(a0)
-; ZFINX32-NEXT: lw a3, 112(a0)
-; ZFINX32-NEXT: lw a2, 116(a0)
-; ZFINX32-NEXT: lw a1, 120(a0)
-; ZFINX32-NEXT: lw a0, 124(a0)
-; ZFINX32-NEXT: sw a0, 76(sp)
-; ZFINX32-NEXT: sw a1, 72(sp)
-; ZFINX32-NEXT: sw a2, 68(sp)
-; ZFINX32-NEXT: sw a3, 64(sp)
-; ZFINX32-NEXT: sw a4, 60(sp)
-; ZFINX32-NEXT: sw ra, 56(sp)
-; ZFINX32-NEXT: sw s11, 52(sp)
+; ZFINX32-NEXT: mv t0, a0
+; ZFINX32-NEXT: lw a0, 0(a0)
+; ZFINX32-NEXT: sw a0, 104(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: lw a0, 4(t0)
+; ZFINX32-NEXT: sw a0, 100(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: lw a0, 8(t0)
+; ZFINX32-NEXT: sw a0, 96(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: lw a0, 12(t0)
+; ZFINX32-NEXT: sw a0, 92(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: lw a4, 16(t0)
+; ZFINX32-NEXT: lw a5, 20(t0)
+; ZFINX32-NEXT: lw a6, 24(t0)
+; ZFINX32-NEXT: lw a7, 28(t0)
+; ZFINX32-NEXT: lw t3, 32(t0)
+; ZFINX32-NEXT: lw t4, 36(t0)
+; ZFINX32-NEXT: lw t5, 40(t0)
+; ZFINX32-NEXT: lw t6, 44(t0)
+; ZFINX32-NEXT: lw t1, 48(t0)
+; ZFINX32-NEXT: lw t2, 52(t0)
+; ZFINX32-NEXT: lw s0, 56(t0)
+; ZFINX32-NEXT: lw s1, 60(t0)
+; ZFINX32-NEXT: lw s2, 64(t0)
+; ZFINX32-NEXT: lw s3, 68(t0)
+; ZFINX32-NEXT: lw s4, 72(t0)
+; ZFINX32-NEXT: lw s5, 76(t0)
+; ZFINX32-NEXT: lw s6, 80(t0)
+; ZFINX32-NEXT: lw s7, 84(t0)
+; ZFINX32-NEXT: lw s8, 88(t0)
+; ZFINX32-NEXT: lw s9, 92(t0)
+; ZFINX32-NEXT: lw s10, 96(t0)
+; ZFINX32-NEXT: lw s11, 100(t0)
+; ZFINX32-NEXT: lw ra, 104(t0)
+; ZFINX32-NEXT: lw a3, 108(t0)
+; ZFINX32-NEXT: lw a0, 112(t0)
+; ZFINX32-NEXT: lw a1, 116(t0)
+; ZFINX32-NEXT: lw a2, 120(t0)
+; ZFINX32-NEXT: lw t0, 124(t0)
+; ZFINX32-NEXT: sw a0, 64(sp)
+; ZFINX32-NEXT: sw a1, 68(sp)
+; ZFINX32-NEXT: sw a2, 72(sp)
+; ZFINX32-NEXT: sw t0, 76(sp)
; ZFINX32-NEXT: sw s10, 48(sp)
-; ZFINX32-NEXT: sw s9, 44(sp)
-; ZFINX32-NEXT: sw s8, 40(sp)
-; ZFINX32-NEXT: sw s7, 36(sp)
+; ZFINX32-NEXT: sw s11, 52(sp)
+; ZFINX32-NEXT: sw ra, 56(sp)
+; ZFINX32-NEXT: sw a3, 60(sp)
; ZFINX32-NEXT: sw s6, 32(sp)
-; ZFINX32-NEXT: sw s5, 28(sp)
-; ZFINX32-NEXT: sw s4, 24(sp)
-; ZFINX32-NEXT: sw s3, 20(sp)
+; ZFINX32-NEXT: sw s7, 36(sp)
+; ZFINX32-NEXT: sw s8, 40(sp)
+; ZFINX32-NEXT: sw s9, 44(sp)
; ZFINX32-NEXT: sw s2, 16(sp)
-; ZFINX32-NEXT: sw s1, 12(sp)
-; ZFINX32-NEXT: sw s0, 8(sp)
-; ZFINX32-NEXT: sw t2, 4(sp)
+; ZFINX32-NEXT: sw s3, 20(sp)
+; ZFINX32-NEXT: sw s4, 24(sp)
+; ZFINX32-NEXT: sw s5, 28(sp)
; ZFINX32-NEXT: sw t1, 0(sp)
-; ZFINX32-NEXT: mv a0, t0
-; ZFINX32-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw a2, 100(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw a3, 96(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw a4, 92(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: sw t2, 4(sp)
+; ZFINX32-NEXT: sw s0, 8(sp)
+; ZFINX32-NEXT: sw s1, 12(sp)
+; ZFINX32-NEXT: lw a0, 104(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw a1, 100(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw a2, 96(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw a3, 92(sp) # 4-byte Folded Reload
; ZFINX32-NEXT: call callee_half_32
; ZFINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZFINX32-NEXT: lui a1, 1048560
@@ -548,67 +548,67 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZFINX64-NEXT: sd s9, 216(sp) # 8-byte Folded Spill
; ZFINX64-NEXT: sd s10, 208(sp) # 8-byte Folded Spill
; ZFINX64-NEXT: sd s11, 200(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: ld t0, 0(a0)
-; ZFINX64-NEXT: ld a1, 8(a0)
-; ZFINX64-NEXT: sd a1, 192(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: ld a1, 16(a0)
-; ZFINX64-NEXT: sd a1, 184(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: ld a1, 24(a0)
-; ZFINX64-NEXT: sd a1, 176(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: ld a1, 32(a0)
-; ZFINX64-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: ld a5, 40(a0)
-; ZFINX64-NEXT: ld a6, 48(a0)
-; ZFINX64-NEXT: ld a7, 56(a0)
-; ZFINX64-NEXT: ld t3, 64(a0)
-; ZFINX64-NEXT: ld t4, 72(a0)
-; ZFINX64-NEXT: ld t5, 80(a0)
-; ZFINX64-NEXT: ld t6, 88(a0)
-; ZFINX64-NEXT: ld t1, 96(a0)
-; ZFINX64-NEXT: ld t2, 104(a0)
-; ZFINX64-NEXT: ld s0, 112(a0)
-; ZFINX64-NEXT: ld s1, 120(a0)
-; ZFINX64-NEXT: ld s2, 128(a0)
-; ZFINX64-NEXT: ld s3, 136(a0)
-; ZFINX64-NEXT: ld s4, 144(a0)
-; ZFINX64-NEXT: ld s5, 152(a0)
-; ZFINX64-NEXT: ld s6, 160(a0)
-; ZFINX64-NEXT: ld s7, 168(a0)
-; ZFINX64-NEXT: ld s8, 176(a0)
-; ZFINX64-NEXT: ld s9, 184(a0)
-; ZFINX64-NEXT: ld s10, 192(a0)
-; ZFINX64-NEXT: ld s11, 200(a0)
-; ZFINX64-NEXT: ld ra, 208(a0)
-; ZFINX64-NEXT: ld a4, 216(a0)
-; ZFINX64-NEXT: ld a3, 224(a0)
-; ZFINX64-NEXT: ld a2, 232(a0)
-; ZFINX64-NEXT: ld a1, 240(a0)
-; ZFINX64-NEXT: ld a0, 248(a0)
-; ZFINX64-NEXT: sd a0, 152(sp)
-; ZFINX64-NEXT: sd a1, 144(sp)
-; ZFINX64-NEXT: sd a2, 136(sp)
-; ZFINX64-NEXT: sd a3, 128(sp)
-; ZFINX64-NEXT: sd a4, 120(sp)
-; ZFINX64-NEXT: sd ra, 112(sp)
-; ZFINX64-NEXT: sd s11, 104(sp)
+; ZFINX64-NEXT: mv t0, a0
+; ZFINX64-NEXT: ld a0, 0(a0)
+; ZFINX64-NEXT: sd a0, 192(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: ld a0, 8(t0)
+; ZFINX64-NEXT: sd a0, 184(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: ld a0, 16(t0)
+; ZFINX64-NEXT: sd a0, 176(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: ld a0, 24(t0)
+; ZFINX64-NEXT: sd a0, 168(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: ld a4, 32(t0)
+; ZFINX64-NEXT: ld a5, 40(t0)
+; ZFINX64-NEXT: ld a6, 48(t0)
+; ZFINX64-NEXT: ld a7, 56(t0)
+; ZFINX64-NEXT: ld t3, 64(t0)
+; ZFINX64-NEXT: ld t4, 72(t0)
+; ZFINX64-NEXT: ld t5, 80(t0)
+; ZFINX64-NEXT: ld t6, 88(t0)
+; ZFINX64-NEXT: ld t1, 96(t0)
+; ZFINX64-NEXT: ld t2, 104(t0)
+; ZFINX64-NEXT: ld s0, 112(t0)
+; ZFINX64-NEXT: ld s1, 120(t0)
+; ZFINX64-NEXT: ld s2, 128(t0)
+; ZFINX64-NEXT: ld s3, 136(t0)
+; ZFINX64-NEXT: ld s4, 144(t0)
+; ZFINX64-NEXT: ld s5, 152(t0)
+; ZFINX64-NEXT: ld s6, 160(t0)
+; ZFINX64-NEXT: ld s7, 168(t0)
+; ZFINX64-NEXT: ld s8, 176(t0)
+; ZFINX64-NEXT: ld s9, 184(t0)
+; ZFINX64-NEXT: ld s10, 192(t0)
+; ZFINX64-NEXT: ld s11, 200(t0)
+; ZFINX64-NEXT: ld ra, 208(t0)
+; ZFINX64-NEXT: ld a3, 216(t0)
+; ZFINX64-NEXT: ld a0, 224(t0)
+; ZFINX64-NEXT: ld a1, 232(t0)
+; ZFINX64-NEXT: ld a2, 240(t0)
+; ZFINX64-NEXT: ld t0, 248(t0)
+; ZFINX64-NEXT: sd a0, 128(sp)
+; ZFINX64-NEXT: sd a1, 136(sp)
+; ZFINX64-NEXT: sd a2, 144(sp)
+; ZFINX64-NEXT: sd t0, 152(sp)
; ZFINX64-NEXT: sd s10, 96(sp)
-; ZFINX64-NEXT: sd s9, 88(sp)
-; ZFINX64-NEXT: sd s8, 80(sp)
-; ZFINX64-NEXT: sd s7, 72(sp)
+; ZFINX64-NEXT: sd s11, 104(sp)
+; ZFINX64-NEXT: sd ra, 112(sp)
+; ZFINX64-NEXT: sd a3, 120(sp)
; ZFINX64-NEXT: sd s6, 64(sp)
-; ZFINX64-NEXT: sd s5, 56(sp)
-; ZFINX64-NEXT: sd s4, 48(sp)
-; ZFINX64-NEXT: sd s3, 40(sp)
+; ZFINX64-NEXT: sd s7, 72(sp)
+; ZFINX64-NEXT: sd s8, 80(sp)
+; ZFINX64-NEXT: sd s9, 88(sp)
; ZFINX64-NEXT: sd s2, 32(sp)
-; ZFINX64-NEXT: sd s1, 24(sp)
-; ZFINX64-NEXT: sd s0, 16(sp)
-; ZFINX64-NEXT: sd t2, 8(sp)
+; ZFINX64-NEXT: sd s3, 40(sp)
+; ZFINX64-NEXT: sd s4, 48(sp)
+; ZFINX64-NEXT: sd s5, 56(sp)
; ZFINX64-NEXT: sd t1, 0(sp)
-; ZFINX64-NEXT: mv a0, t0
-; ZFINX64-NEXT: ld a1, 192(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld a2, 184(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld a3, 176(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld a4, 168(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: sd t2, 8(sp)
+; ZFINX64-NEXT: sd s0, 16(sp)
+; ZFINX64-NEXT: sd s1, 24(sp)
+; ZFINX64-NEXT: ld a0, 192(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld a1, 184(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld a2, 176(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld a3, 168(sp) # 8-byte Folded Reload
; ZFINX64-NEXT: call callee_half_32
; ZFINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZFINX64-NEXT: lui a1, 1048560
@@ -646,67 +646,67 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZDINX32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill
; ZDINX32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill
; ZDINX32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: lw t0, 0(a0)
-; ZDINX32-NEXT: lw a1, 4(a0)
-; ZDINX32-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: lw a1, 8(a0)
-; ZDINX32-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: lw a1, 12(a0)
-; ZDINX32-NEXT: sw a1, 96(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: lw a1, 16(a0)
-; ZDINX32-NEXT: sw a1, 92(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: lw a5, 20(a0)
-; ZDINX32-NEXT: lw a6, 24(a0)
-; ZDINX32-NEXT: lw a7, 28(a0)
-; ZDINX32-NEXT: lw t3, 32(a0)
-; ZDINX32-NEXT: lw t4, 36(a0)
-; ZDINX32-NEXT: lw t5, 40(a0)
-; ZDINX32-NEXT: lw t6, 44(a0)
-; ZDINX32-NEXT: lw t1, 48(a0)
-; ZDINX32-NEXT: lw t2, 52(a0)
-; ZDINX32-NEXT: lw s0, 56(a0)
-; ZDINX32-NEXT: lw s1, 60(a0)
-; ZDINX32-NEXT: lw s2, 64(a0)
-; ZDINX32-NEXT: lw s3, 68(a0)
-; ZDINX32-NEXT: lw s4, 72(a0)
-; ZDINX32-NEXT: lw s5, 76(a0)
-; ZDINX32-NEXT: lw s6, 80(a0)
-; ZDINX32-NEXT: lw s7, 84(a0)
-; ZDINX32-NEXT: lw s8, 88(a0)
-; ZDINX32-NEXT: lw s9, 92(a0)
-; ZDINX32-NEXT: lw s10, 96(a0)
-; ZDINX32-NEXT: lw s11, 100(a0)
-; ZDINX32-NEXT: lw ra, 104(a0)
-; ZDINX32-NEXT: lw a4, 108(a0)
-; ZDINX32-NEXT: lw a3, 112(a0)
-; ZDINX32-NEXT: lw a2, 116(a0)
-; ZDINX32-NEXT: lw a1, 120(a0)
-; ZDINX32-NEXT: lw a0, 124(a0)
-; ZDINX32-NEXT: sw a0, 76(sp)
-; ZDINX32-NEXT: sw a1, 72(sp)
-; ZDINX32-NEXT: sw a2, 68(sp)
-; ZDINX32-NEXT: sw a3, 64(sp)
-; ZDINX32-NEXT: sw a4, 60(sp)
-; ZDINX32-NEXT: sw ra, 56(sp)
-; ZDINX32-NEXT: sw s11, 52(sp)
+; ZDINX32-NEXT: mv t0, a0
+; ZDINX32-NEXT: lw a0, 0(a0)
+; ZDINX32-NEXT: sw a0, 104(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: lw a0, 4(t0)
+; ZDINX32-NEXT: sw a0, 100(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: lw a0, 8(t0)
+; ZDINX32-NEXT: sw a0, 96(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: lw a0, 12(t0)
+; ZDINX32-NEXT: sw a0, 92(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: lw a4, 16(t0)
+; ZDINX32-NEXT: lw a5, 20(t0)
+; ZDINX32-NEXT: lw a6, 24(t0)
+; ZDINX32-NEXT: lw a7, 28(t0)
+; ZDINX32-NEXT: lw t3, 32(t0)
+; ZDINX32-NEXT: lw t4, 36(t0)
+; ZDINX32-NEXT: lw t5, 40(t0)
+; ZDINX32-NEXT: lw t6, 44(t0)
+; ZDINX32-NEXT: lw t1, 48(t0)
+; ZDINX32-NEXT: lw t2, 52(t0)
+; ZDINX32-NEXT: lw s0, 56(t0)
+; ZDINX32-NEXT: lw s1, 60(t0)
+; ZDINX32-NEXT: lw s2, 64(t0)
+; ZDINX32-NEXT: lw s3, 68(t0)
+; ZDINX32-NEXT: lw s4, 72(t0)
+; ZDINX32-NEXT: lw s5, 76(t0)
+; ZDINX32-NEXT: lw s6, 80(t0)
+; ZDINX32-NEXT: lw s7, 84(t0)
+; ZDINX32-NEXT: lw s8, 88(t0)
+; ZDINX32-NEXT: lw s9, 92(t0)
+; ZDINX32-NEXT: lw s10, 96(t0)
+; ZDINX32-NEXT: lw s11, 100(t0)
+; ZDINX32-NEXT: lw ra, 104(t0)
+; ZDINX32-NEXT: lw a3, 108(t0)
+; ZDINX32-NEXT: lw a0, 112(t0)
+; ZDINX32-NEXT: lw a1, 116(t0)
+; ZDINX32-NEXT: lw a2, 120(t0)
+; ZDINX32-NEXT: lw t0, 124(t0)
+; ZDINX32-NEXT: sw a0, 64(sp)
+; ZDINX32-NEXT: sw a1, 68(sp)
+; ZDINX32-NEXT: sw a2, 72(sp)
+; ZDINX32-NEXT: sw t0, 76(sp)
; ZDINX32-NEXT: sw s10, 48(sp)
-; ZDINX32-NEXT: sw s9, 44(sp)
-; ZDINX32-NEXT: sw s8, 40(sp)
-; ZDINX32-NEXT: sw s7, 36(sp)
+; ZDINX32-NEXT: sw s11, 52(sp)
+; ZDINX32-NEXT: sw ra, 56(sp)
+; ZDINX32-NEXT: sw a3, 60(sp)
; ZDINX32-NEXT: sw s6, 32(sp)
-; ZDINX32-NEXT: sw s5, 28(sp)
-; ZDINX32-NEXT: sw s4, 24(sp)
-; ZDINX32-NEXT: sw s3, 20(sp)
+; ZDINX32-NEXT: sw s7, 36(sp)
+; ZDINX32-NEXT: sw s8, 40(sp)
+; ZDINX32-NEXT: sw s9, 44(sp)
; ZDINX32-NEXT: sw s2, 16(sp)
-; ZDINX32-NEXT: sw s1, 12(sp)
-; ZDINX32-NEXT: sw s0, 8(sp)
-; ZDINX32-NEXT: sw t2, 4(sp)
+; ZDINX32-NEXT: sw s3, 20(sp)
+; ZDINX32-NEXT: sw s4, 24(sp)
+; ZDINX32-NEXT: sw s5, 28(sp)
; ZDINX32-NEXT: sw t1, 0(sp)
-; ZDINX32-NEXT: mv a0, t0
-; ZDINX32-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw a2, 100(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw a3, 96(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw a4, 92(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: sw t2, 4(sp)
+; ZDINX32-NEXT: sw s0, 8(sp)
+; ZDINX32-NEXT: sw s1, 12(sp)
+; ZDINX32-NEXT: lw a0, 104(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw a1, 100(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw a2, 96(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw a3, 92(sp) # 4-byte Folded Reload
; ZDINX32-NEXT: call callee_half_32
; ZDINX32-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZDINX32-NEXT: lui a1, 1048560
@@ -744,67 +744,67 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZDINX64-NEXT: sd s9, 216(sp) # 8-byte Folded Spill
; ZDINX64-NEXT: sd s10, 208(sp) # 8-byte Folded Spill
; ZDINX64-NEXT: sd s11, 200(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: ld t0, 0(a0)
-; ZDINX64-NEXT: ld a1, 8(a0)
-; ZDINX64-NEXT: sd a1, 192(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: ld a1, 16(a0)
-; ZDINX64-NEXT: sd a1, 184(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: ld a1, 24(a0)
-; ZDINX64-NEXT: sd a1, 176(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: ld a1, 32(a0)
-; ZDINX64-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: ld a5, 40(a0)
-; ZDINX64-NEXT: ld a6, 48(a0)
-; ZDINX64-NEXT: ld a7, 56(a0)
-; ZDINX64-NEXT: ld t3, 64(a0)
-; ZDINX64-NEXT: ld t4, 72(a0)
-; ZDINX64-NEXT: ld t5, 80(a0)
-; ZDINX64-NEXT: ld t6, 88(a0)
-; ZDINX64-NEXT: ld t1, 96(a0)
-; ZDINX64-NEXT: ld t2, 104(a0)
-; ZDINX64-NEXT: ld s0, 112(a0)
-; ZDINX64-NEXT: ld s1, 120(a0)
-; ZDINX64-NEXT: ld s2, 128(a0)
-; ZDINX64-NEXT: ld s3, 136(a0)
-; ZDINX64-NEXT: ld s4, 144(a0)
-; ZDINX64-NEXT: ld s5, 152(a0)
-; ZDINX64-NEXT: ld s6, 160(a0)
-; ZDINX64-NEXT: ld s7, 168(a0)
-; ZDINX64-NEXT: ld s8, 176(a0)
-; ZDINX64-NEXT: ld s9, 184(a0)
-; ZDINX64-NEXT: ld s10, 192(a0)
-; ZDINX64-NEXT: ld s11, 200(a0)
-; ZDINX64-NEXT: ld ra, 208(a0)
-; ZDINX64-NEXT: ld a4, 216(a0)
-; ZDINX64-NEXT: ld a3, 224(a0)
-; ZDINX64-NEXT: ld a2, 232(a0)
-; ZDINX64-NEXT: ld a1, 240(a0)
-; ZDINX64-NEXT: ld a0, 248(a0)
-; ZDINX64-NEXT: sd a0, 152(sp)
-; ZDINX64-NEXT: sd a1, 144(sp)
-; ZDINX64-NEXT: sd a2, 136(sp)
-; ZDINX64-NEXT: sd a3, 128(sp)
-; ZDINX64-NEXT: sd a4, 120(sp)
-; ZDINX64-NEXT: sd ra, 112(sp)
-; ZDINX64-NEXT: sd s11, 104(sp)
+; ZDINX64-NEXT: mv t0, a0
+; ZDINX64-NEXT: ld a0, 0(a0)
+; ZDINX64-NEXT: sd a0, 192(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: ld a0, 8(t0)
+; ZDINX64-NEXT: sd a0, 184(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: ld a0, 16(t0)
+; ZDINX64-NEXT: sd a0, 176(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: ld a0, 24(t0)
+; ZDINX64-NEXT: sd a0, 168(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: ld a4, 32(t0)
+; ZDINX64-NEXT: ld a5, 40(t0)
+; ZDINX64-NEXT: ld a6, 48(t0)
+; ZDINX64-NEXT: ld a7, 56(t0)
+; ZDINX64-NEXT: ld t3, 64(t0)
+; ZDINX64-NEXT: ld t4, 72(t0)
+; ZDINX64-NEXT: ld t5, 80(t0)
+; ZDINX64-NEXT: ld t6, 88(t0)
+; ZDINX64-NEXT: ld t1, 96(t0)
+; ZDINX64-NEXT: ld t2, 104(t0)
+; ZDINX64-NEXT: ld s0, 112(t0)
+; ZDINX64-NEXT: ld s1, 120(t0)
+; ZDINX64-NEXT: ld s2, 128(t0)
+; ZDINX64-NEXT: ld s3, 136(t0)
+; ZDINX64-NEXT: ld s4, 144(t0)
+; ZDINX64-NEXT: ld s5, 152(t0)
+; ZDINX64-NEXT: ld s6, 160(t0)
+; ZDINX64-NEXT: ld s7, 168(t0)
+; ZDINX64-NEXT: ld s8, 176(t0)
+; ZDINX64-NEXT: ld s9, 184(t0)
+; ZDINX64-NEXT: ld s10, 192(t0)
+; ZDINX64-NEXT: ld s11, 200(t0)
+; ZDINX64-NEXT: ld ra, 208(t0)
+; ZDINX64-NEXT: ld a3, 216(t0)
+; ZDINX64-NEXT: ld a0, 224(t0)
+; ZDINX64-NEXT: ld a1, 232(t0)
+; ZDINX64-NEXT: ld a2, 240(t0)
+; ZDINX64-NEXT: ld t0, 248(t0)
+; ZDINX64-NEXT: sd a0, 128(sp)
+; ZDINX64-NEXT: sd a1, 136(sp)
+; ZDINX64-NEXT: sd a2, 144(sp)
+; ZDINX64-NEXT: sd t0, 152(sp)
; ZDINX64-NEXT: sd s10, 96(sp)
-; ZDINX64-NEXT: sd s9, 88(sp)
-; ZDINX64-NEXT: sd s8, 80(sp)
-; ZDINX64-NEXT: sd s7, 72(sp)
+; ZDINX64-NEXT: sd s11, 104(sp)
+; ZDINX64-NEXT: sd ra, 112(sp)
+; ZDINX64-NEXT: sd a3, 120(sp)
; ZDINX64-NEXT: sd s6, 64(sp)
-; ZDINX64-NEXT: sd s5, 56(sp)
-; ZDINX64-NEXT: sd s4, 48(sp)
-; ZDINX64-NEXT: sd s3, 40(sp)
+; ZDINX64-NEXT: sd s7, 72(sp)
+; ZDINX64-NEXT: sd s8, 80(sp)
+; ZDINX64-NEXT: sd s9, 88(sp)
; ZDINX64-NEXT: sd s2, 32(sp)
-; ZDINX64-NEXT: sd s1, 24(sp)
-; ZDINX64-NEXT: sd s0, 16(sp)
-; ZDINX64-NEXT: sd t2, 8(sp)
+; ZDINX64-NEXT: sd s3, 40(sp)
+; ZDINX64-NEXT: sd s4, 48(sp)
+; ZDINX64-NEXT: sd s5, 56(sp)
; ZDINX64-NEXT: sd t1, 0(sp)
-; ZDINX64-NEXT: mv a0, t0
-; ZDINX64-NEXT: ld a1, 192(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld a2, 184(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld a3, 176(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld a4, 168(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: sd t2, 8(sp)
+; ZDINX64-NEXT: sd s0, 16(sp)
+; ZDINX64-NEXT: sd s1, 24(sp)
+; ZDINX64-NEXT: ld a0, 192(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld a1, 184(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld a2, 176(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld a3, 168(sp) # 8-byte Folded Reload
; ZDINX64-NEXT: call callee_half_32
; ZDINX64-NEXT: # kill: def $x10_w killed $x10_w def $x10
; ZDINX64-NEXT: lui a1, 1048560
@@ -883,8 +883,8 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZHINX32-NEXT: lw t0, 172(sp)
; ZHINX32-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
; ZHINX32-NEXT: lw t6, 176(sp)
-; ZHINX32-NEXT: lw t5, 180(sp)
-; ZHINX32-NEXT: lw t4, 184(sp)
+; ZHINX32-NEXT: lw t4, 180(sp)
+; ZHINX32-NEXT: lw t5, 184(sp)
; ZHINX32-NEXT: lw s0, 188(sp)
; ZHINX32-NEXT: lw s1, 192(sp)
; ZHINX32-NEXT: lw s2, 196(sp)
@@ -898,30 +898,30 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZHINX32-NEXT: lw s10, 228(sp)
; ZHINX32-NEXT: lw s11, 232(sp)
; ZHINX32-NEXT: lw ra, 236(sp)
-; ZHINX32-NEXT: lw t3, 240(sp)
-; ZHINX32-NEXT: lw t2, 244(sp)
-; ZHINX32-NEXT: lw t1, 248(sp)
-; ZHINX32-NEXT: lw t0, 252(sp)
-; ZHINX32-NEXT: sw t0, 76(sp)
-; ZHINX32-NEXT: sw t1, 72(sp)
-; ZHINX32-NEXT: sw t2, 68(sp)
-; ZHINX32-NEXT: sw t3, 64(sp)
-; ZHINX32-NEXT: sw ra, 60(sp)
-; ZHINX32-NEXT: sw s11, 56(sp)
-; ZHINX32-NEXT: sw s10, 52(sp)
+; ZHINX32-NEXT: lw t0, 240(sp)
+; ZHINX32-NEXT: lw t1, 244(sp)
+; ZHINX32-NEXT: lw t2, 248(sp)
+; ZHINX32-NEXT: lw t3, 252(sp)
+; ZHINX32-NEXT: sw t0, 64(sp)
+; ZHINX32-NEXT: sw t1, 68(sp)
+; ZHINX32-NEXT: sw t2, 72(sp)
+; ZHINX32-NEXT: sw t3, 76(sp)
; ZHINX32-NEXT: sw s9, 48(sp)
-; ZHINX32-NEXT: sw s8, 44(sp)
-; ZHINX32-NEXT: sw s7, 40(sp)
-; ZHINX32-NEXT: sw s6, 36(sp)
+; ZHINX32-NEXT: sw s10, 52(sp)
+; ZHINX32-NEXT: sw s11, 56(sp)
+; ZHINX32-NEXT: sw ra, 60(sp)
; ZHINX32-NEXT: sw s5, 32(sp)
-; ZHINX32-NEXT: sw s4, 28(sp)
-; ZHINX32-NEXT: sw s3, 24(sp)
-; ZHINX32-NEXT: sw s2, 20(sp)
+; ZHINX32-NEXT: sw s6, 36(sp)
+; ZHINX32-NEXT: sw s7, 40(sp)
+; ZHINX32-NEXT: sw s8, 44(sp)
; ZHINX32-NEXT: sw s1, 16(sp)
-; ZHINX32-NEXT: sw s0, 12(sp)
-; ZHINX32-NEXT: sw t4, 8(sp)
-; ZHINX32-NEXT: sw t5, 4(sp)
+; ZHINX32-NEXT: sw s2, 20(sp)
+; ZHINX32-NEXT: sw s3, 24(sp)
+; ZHINX32-NEXT: sw s4, 28(sp)
; ZHINX32-NEXT: sw t6, 0(sp)
+; ZHINX32-NEXT: sw t4, 4(sp)
+; ZHINX32-NEXT: sw t5, 8(sp)
+; ZHINX32-NEXT: sw s0, 12(sp)
; ZHINX32-NEXT: lw t3, 104(sp) # 4-byte Folded Reload
; ZHINX32-NEXT: lw t4, 100(sp) # 4-byte Folded Reload
; ZHINX32-NEXT: lw t5, 96(sp) # 4-byte Folded Reload
@@ -968,8 +968,8 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZHINX64-NEXT: lw t0, 232(sp)
; ZHINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill
; ZHINX64-NEXT: lw t6, 240(sp)
-; ZHINX64-NEXT: lw t5, 248(sp)
-; ZHINX64-NEXT: lw t4, 256(sp)
+; ZHINX64-NEXT: lw t4, 248(sp)
+; ZHINX64-NEXT: lw t5, 256(sp)
; ZHINX64-NEXT: lw s0, 264(sp)
; ZHINX64-NEXT: lw s1, 272(sp)
; ZHINX64-NEXT: lw s2, 280(sp)
@@ -983,30 +983,30 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZHINX64-NEXT: lw s10, 344(sp)
; ZHINX64-NEXT: lw s11, 352(sp)
; ZHINX64-NEXT: lw ra, 360(sp)
-; ZHINX64-NEXT: lw t3, 368(sp)
-; ZHINX64-NEXT: lw t2, 376(sp)
-; ZHINX64-NEXT: lw t1, 384(sp)
-; ZHINX64-NEXT: lw t0, 392(sp)
-; ZHINX64-NEXT: sw t0, 76(sp)
-; ZHINX64-NEXT: sw t1, 72(sp)
-; ZHINX64-NEXT: sw t2, 68(sp)
-; ZHINX64-NEXT: sw t3, 64(sp)
-; ZHINX64-NEXT: sw ra, 60(sp)
-; ZHINX64-NEXT: sw s11, 56(sp)
-; ZHINX64-NEXT: sw s10, 52(sp)
+; ZHINX64-NEXT: lw t0, 368(sp)
+; ZHINX64-NEXT: lw t1, 376(sp)
+; ZHINX64-NEXT: lw t2, 384(sp)
+; ZHINX64-NEXT: lw t3, 392(sp)
+; ZHINX64-NEXT: sw t0, 64(sp)
+; ZHINX64-NEXT: sw t1, 68(sp)
+; ZHINX64-NEXT: sw t2, 72(sp)
+; ZHINX64-NEXT: sw t3, 76(sp)
; ZHINX64-NEXT: sw s9, 48(sp)
-; ZHINX64-NEXT: sw s8, 44(sp)
-; ZHINX64-NEXT: sw s7, 40(sp)
-; ZHINX64-NEXT: sw s6, 36(sp)
+; ZHINX64-NEXT: sw s10, 52(sp)
+; ZHINX64-NEXT: sw s11, 56(sp)
+; ZHINX64-NEXT: sw ra, 60(sp)
; ZHINX64-NEXT: sw s5, 32(sp)
-; ZHINX64-NEXT: sw s4, 28(sp)
-; ZHINX64-NEXT: sw s3, 24(sp)
-; ZHINX64-NEXT: sw s2, 20(sp)
+; ZHINX64-NEXT: sw s6, 36(sp)
+; ZHINX64-NEXT: sw s7, 40(sp)
+; ZHINX64-NEXT: sw s8, 44(sp)
; ZHINX64-NEXT: sw s1, 16(sp)
-; ZHINX64-NEXT: sw s0, 12(sp)
-; ZHINX64-NEXT: sw t4, 8(sp)
-; ZHINX64-NEXT: sw t5, 4(sp)
+; ZHINX64-NEXT: sw s2, 20(sp)
+; ZHINX64-NEXT: sw s3, 24(sp)
+; ZHINX64-NEXT: sw s4, 28(sp)
; ZHINX64-NEXT: sw t6, 0(sp)
+; ZHINX64-NEXT: sw t4, 4(sp)
+; ZHINX64-NEXT: sw t5, 8(sp)
+; ZHINX64-NEXT: sw s0, 12(sp)
; ZHINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload
; ZHINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload
; ZHINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload
@@ -1053,8 +1053,8 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZFINX32-NEXT: lw t0, 172(sp)
; ZFINX32-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
; ZFINX32-NEXT: lw t6, 176(sp)
-; ZFINX32-NEXT: lw t5, 180(sp)
-; ZFINX32-NEXT: lw t4, 184(sp)
+; ZFINX32-NEXT: lw t4, 180(sp)
+; ZFINX32-NEXT: lw t5, 184(sp)
; ZFINX32-NEXT: lw s0, 188(sp)
; ZFINX32-NEXT: lw s1, 192(sp)
; ZFINX32-NEXT: lw s2, 196(sp)
@@ -1068,30 +1068,30 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZFINX32-NEXT: lw s10, 228(sp)
; ZFINX32-NEXT: lw s11, 232(sp)
; ZFINX32-NEXT: lw ra, 236(sp)
-; ZFINX32-NEXT: lw t3, 240(sp)
-; ZFINX32-NEXT: lw t2, 244(sp)
-; ZFINX32-NEXT: lw t1, 248(sp)
-; ZFINX32-NEXT: lw t0, 252(sp)
-; ZFINX32-NEXT: sw t0, 76(sp)
-; ZFINX32-NEXT: sw t1, 72(sp)
-; ZFINX32-NEXT: sw t2, 68(sp)
-; ZFINX32-NEXT: sw t3, 64(sp)
-; ZFINX32-NEXT: sw ra, 60(sp)
-; ZFINX32-NEXT: sw s11, 56(sp)
-; ZFINX32-NEXT: sw s10, 52(sp)
+; ZFINX32-NEXT: lw t0, 240(sp)
+; ZFINX32-NEXT: lw t1, 244(sp)
+; ZFINX32-NEXT: lw t2, 248(sp)
+; ZFINX32-NEXT: lw t3, 252(sp)
+; ZFINX32-NEXT: sw t0, 64(sp)
+; ZFINX32-NEXT: sw t1, 68(sp)
+; ZFINX32-NEXT: sw t2, 72(sp)
+; ZFINX32-NEXT: sw t3, 76(sp)
; ZFINX32-NEXT: sw s9, 48(sp)
-; ZFINX32-NEXT: sw s8, 44(sp)
-; ZFINX32-NEXT: sw s7, 40(sp)
-; ZFINX32-NEXT: sw s6, 36(sp)
+; ZFINX32-NEXT: sw s10, 52(sp)
+; ZFINX32-NEXT: sw s11, 56(sp)
+; ZFINX32-NEXT: sw ra, 60(sp)
; ZFINX32-NEXT: sw s5, 32(sp)
-; ZFINX32-NEXT: sw s4, 28(sp)
-; ZFINX32-NEXT: sw s3, 24(sp)
-; ZFINX32-NEXT: sw s2, 20(sp)
+; ZFINX32-NEXT: sw s6, 36(sp)
+; ZFINX32-NEXT: sw s7, 40(sp)
+; ZFINX32-NEXT: sw s8, 44(sp)
; ZFINX32-NEXT: sw s1, 16(sp)
-; ZFINX32-NEXT: sw s0, 12(sp)
-; ZFINX32-NEXT: sw t4, 8(sp)
-; ZFINX32-NEXT: sw t5, 4(sp)
+; ZFINX32-NEXT: sw s2, 20(sp)
+; ZFINX32-NEXT: sw s3, 24(sp)
+; ZFINX32-NEXT: sw s4, 28(sp)
; ZFINX32-NEXT: sw t6, 0(sp)
+; ZFINX32-NEXT: sw t4, 4(sp)
+; ZFINX32-NEXT: sw t5, 8(sp)
+; ZFINX32-NEXT: sw s0, 12(sp)
; ZFINX32-NEXT: lw t3, 104(sp) # 4-byte Folded Reload
; ZFINX32-NEXT: lw t4, 100(sp) # 4-byte Folded Reload
; ZFINX32-NEXT: lw t5, 96(sp) # 4-byte Folded Reload
@@ -1138,8 +1138,8 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZFINX64-NEXT: lw t0, 232(sp)
; ZFINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill
; ZFINX64-NEXT: lw t6, 240(sp)
-; ZFINX64-NEXT: lw t5, 248(sp)
-; ZFINX64-NEXT: lw t4, 256(sp)
+; ZFINX64-NEXT: lw t4, 248(sp)
+; ZFINX64-NEXT: lw t5, 256(sp)
; ZFINX64-NEXT: lw s0, 264(sp)
; ZFINX64-NEXT: lw s1, 272(sp)
; ZFINX64-NEXT: lw s2, 280(sp)
@@ -1153,30 +1153,30 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZFINX64-NEXT: lw s10, 344(sp)
; ZFINX64-NEXT: lw s11, 352(sp)
; ZFINX64-NEXT: lw ra, 360(sp)
-; ZFINX64-NEXT: lw t3, 368(sp)
-; ZFINX64-NEXT: lw t2, 376(sp)
-; ZFINX64-NEXT: lw t1, 384(sp)
-; ZFINX64-NEXT: lw t0, 392(sp)
-; ZFINX64-NEXT: sw t0, 76(sp)
-; ZFINX64-NEXT: sw t1, 72(sp)
-; ZFINX64-NEXT: sw t2, 68(sp)
-; ZFINX64-NEXT: sw t3, 64(sp)
-; ZFINX64-NEXT: sw ra, 60(sp)
-; ZFINX64-NEXT: sw s11, 56(sp)
-; ZFINX64-NEXT: sw s10, 52(sp)
+; ZFINX64-NEXT: lw t0, 368(sp)
+; ZFINX64-NEXT: lw t1, 376(sp)
+; ZFINX64-NEXT: lw t2, 384(sp)
+; ZFINX64-NEXT: lw t3, 392(sp)
+; ZFINX64-NEXT: sw t0, 64(sp)
+; ZFINX64-NEXT: sw t1, 68(sp)
+; ZFINX64-NEXT: sw t2, 72(sp)
+; ZFINX64-NEXT: sw t3, 76(sp)
; ZFINX64-NEXT: sw s9, 48(sp)
-; ZFINX64-NEXT: sw s8, 44(sp)
-; ZFINX64-NEXT: sw s7, 40(sp)
-; ZFINX64-NEXT: sw s6, 36(sp)
+; ZFINX64-NEXT: sw s10, 52(sp)
+; ZFINX64-NEXT: sw s11, 56(sp)
+; ZFINX64-NEXT: sw ra, 60(sp)
; ZFINX64-NEXT: sw s5, 32(sp)
-; ZFINX64-NEXT: sw s4, 28(sp)
-; ZFINX64-NEXT: sw s3, 24(sp)
-; ZFINX64-NEXT: sw s2, 20(sp)
+; ZFINX64-NEXT: sw s6, 36(sp)
+; ZFINX64-NEXT: sw s7, 40(sp)
+; ZFINX64-NEXT: sw s8, 44(sp)
; ZFINX64-NEXT: sw s1, 16(sp)
-; ZFINX64-NEXT: sw s0, 12(sp)
-; ZFINX64-NEXT: sw t4, 8(sp)
-; ZFINX64-NEXT: sw t5, 4(sp)
+; ZFINX64-NEXT: sw s2, 20(sp)
+; ZFINX64-NEXT: sw s3, 24(sp)
+; ZFINX64-NEXT: sw s4, 28(sp)
; ZFINX64-NEXT: sw t6, 0(sp)
+; ZFINX64-NEXT: sw t4, 4(sp)
+; ZFINX64-NEXT: sw t5, 8(sp)
+; ZFINX64-NEXT: sw s0, 12(sp)
; ZFINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload
; ZFINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload
; ZFINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload
@@ -1223,8 +1223,8 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZDINX32-NEXT: lw t0, 172(sp)
; ZDINX32-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
; ZDINX32-NEXT: lw t6, 176(sp)
-; ZDINX32-NEXT: lw t5, 180(sp)
-; ZDINX32-NEXT: lw t4, 184(sp)
+; ZDINX32-NEXT: lw t4, 180(sp)
+; ZDINX32-NEXT: lw t5, 184(sp)
; ZDINX32-NEXT: lw s0, 188(sp)
; ZDINX32-NEXT: lw s1, 192(sp)
; ZDINX32-NEXT: lw s2, 196(sp)
@@ -1238,30 +1238,30 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZDINX32-NEXT: lw s10, 228(sp)
; ZDINX32-NEXT: lw s11, 232(sp)
; ZDINX32-NEXT: lw ra, 236(sp)
-; ZDINX32-NEXT: lw t3, 240(sp)
-; ZDINX32-NEXT: lw t2, 244(sp)
-; ZDINX32-NEXT: lw t1, 248(sp)
-; ZDINX32-NEXT: lw t0, 252(sp)
-; ZDINX32-NEXT: sw t0, 76(sp)
-; ZDINX32-NEXT: sw t1, 72(sp)
-; ZDINX32-NEXT: sw t2, 68(sp)
-; ZDINX32-NEXT: sw t3, 64(sp)
-; ZDINX32-NEXT: sw ra, 60(sp)
-; ZDINX32-NEXT: sw s11, 56(sp)
-; ZDINX32-NEXT: sw s10, 52(sp)
+; ZDINX32-NEXT: lw t0, 240(sp)
+; ZDINX32-NEXT: lw t1, 244(sp)
+; ZDINX32-NEXT: lw t2, 248(sp)
+; ZDINX32-NEXT: lw t3, 252(sp)
+; ZDINX32-NEXT: sw t0, 64(sp)
+; ZDINX32-NEXT: sw t1, 68(sp)
+; ZDINX32-NEXT: sw t2, 72(sp)
+; ZDINX32-NEXT: sw t3, 76(sp)
; ZDINX32-NEXT: sw s9, 48(sp)
-; ZDINX32-NEXT: sw s8, 44(sp)
-; ZDINX32-NEXT: sw s7, 40(sp)
-; ZDINX32-NEXT: sw s6, 36(sp)
+; ZDINX32-NEXT: sw s10, 52(sp)
+; ZDINX32-NEXT: sw s11, 56(sp)
+; ZDINX32-NEXT: sw ra, 60(sp)
; ZDINX32-NEXT: sw s5, 32(sp)
-; ZDINX32-NEXT: sw s4, 28(sp)
-; ZDINX32-NEXT: sw s3, 24(sp)
-; ZDINX32-NEXT: sw s2, 20(sp)
+; ZDINX32-NEXT: sw s6, 36(sp)
+; ZDINX32-NEXT: sw s7, 40(sp)
+; ZDINX32-NEXT: sw s8, 44(sp)
; ZDINX32-NEXT: sw s1, 16(sp)
-; ZDINX32-NEXT: sw s0, 12(sp)
-; ZDINX32-NEXT: sw t4, 8(sp)
-; ZDINX32-NEXT: sw t5, 4(sp)
+; ZDINX32-NEXT: sw s2, 20(sp)
+; ZDINX32-NEXT: sw s3, 24(sp)
+; ZDINX32-NEXT: sw s4, 28(sp)
; ZDINX32-NEXT: sw t6, 0(sp)
+; ZDINX32-NEXT: sw t4, 4(sp)
+; ZDINX32-NEXT: sw t5, 8(sp)
+; ZDINX32-NEXT: sw s0, 12(sp)
; ZDINX32-NEXT: lw t3, 104(sp) # 4-byte Folded Reload
; ZDINX32-NEXT: lw t4, 100(sp) # 4-byte Folded Reload
; ZDINX32-NEXT: lw t5, 96(sp) # 4-byte Folded Reload
@@ -1308,8 +1308,8 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZDINX64-NEXT: lw t0, 232(sp)
; ZDINX64-NEXT: sw t0, 88(sp) # 4-byte Folded Spill
; ZDINX64-NEXT: lw t6, 240(sp)
-; ZDINX64-NEXT: lw t5, 248(sp)
-; ZDINX64-NEXT: lw t4, 256(sp)
+; ZDINX64-NEXT: lw t4, 248(sp)
+; ZDINX64-NEXT: lw t5, 256(sp)
; ZDINX64-NEXT: lw s0, 264(sp)
; ZDINX64-NEXT: lw s1, 272(sp)
; ZDINX64-NEXT: lw s2, 280(sp)
@@ -1323,30 +1323,30 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZDINX64-NEXT: lw s10, 344(sp)
; ZDINX64-NEXT: lw s11, 352(sp)
; ZDINX64-NEXT: lw ra, 360(sp)
-; ZDINX64-NEXT: lw t3, 368(sp)
-; ZDINX64-NEXT: lw t2, 376(sp)
-; ZDINX64-NEXT: lw t1, 384(sp)
-; ZDINX64-NEXT: lw t0, 392(sp)
-; ZDINX64-NEXT: sw t0, 76(sp)
-; ZDINX64-NEXT: sw t1, 72(sp)
-; ZDINX64-NEXT: sw t2, 68(sp)
-; ZDINX64-NEXT: sw t3, 64(sp)
-; ZDINX64-NEXT: sw ra, 60(sp)
-; ZDINX64-NEXT: sw s11, 56(sp)
-; ZDINX64-NEXT: sw s10, 52(sp)
+; ZDINX64-NEXT: lw t0, 368(sp)
+; ZDINX64-NEXT: lw t1, 376(sp)
+; ZDINX64-NEXT: lw t2, 384(sp)
+; ZDINX64-NEXT: lw t3, 392(sp)
+; ZDINX64-NEXT: sw t0, 64(sp)
+; ZDINX64-NEXT: sw t1, 68(sp)
+; ZDINX64-NEXT: sw t2, 72(sp)
+; ZDINX64-NEXT: sw t3, 76(sp)
; ZDINX64-NEXT: sw s9, 48(sp)
-; ZDINX64-NEXT: sw s8, 44(sp)
-; ZDINX64-NEXT: sw s7, 40(sp)
-; ZDINX64-NEXT: sw s6, 36(sp)
+; ZDINX64-NEXT: sw s10, 52(sp)
+; ZDINX64-NEXT: sw s11, 56(sp)
+; ZDINX64-NEXT: sw ra, 60(sp)
; ZDINX64-NEXT: sw s5, 32(sp)
-; ZDINX64-NEXT: sw s4, 28(sp)
-; ZDINX64-NEXT: sw s3, 24(sp)
-; ZDINX64-NEXT: sw s2, 20(sp)
+; ZDINX64-NEXT: sw s6, 36(sp)
+; ZDINX64-NEXT: sw s7, 40(sp)
+; ZDINX64-NEXT: sw s8, 44(sp)
; ZDINX64-NEXT: sw s1, 16(sp)
-; ZDINX64-NEXT: sw s0, 12(sp)
-; ZDINX64-NEXT: sw t4, 8(sp)
-; ZDINX64-NEXT: sw t5, 4(sp)
+; ZDINX64-NEXT: sw s2, 20(sp)
+; ZDINX64-NEXT: sw s3, 24(sp)
+; ZDINX64-NEXT: sw s4, 28(sp)
; ZDINX64-NEXT: sw t6, 0(sp)
+; ZDINX64-NEXT: sw t4, 4(sp)
+; ZDINX64-NEXT: sw t5, 8(sp)
+; ZDINX64-NEXT: sw s0, 12(sp)
; ZDINX64-NEXT: lw t3, 100(sp) # 4-byte Folded Reload
; ZDINX64-NEXT: lw t4, 96(sp) # 4-byte Folded Reload
; ZDINX64-NEXT: lw t5, 92(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
index 4d4580188096f0..a204b928304123 100644
--- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
+++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
@@ -293,8 +293,8 @@ define dso_local void @store_g_4() nounwind {
; RV32I-MEDIUM-NEXT: .Lpcrel_hi6:
; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(g_4)
; RV32I-MEDIUM-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi6)
-; RV32I-MEDIUM-NEXT: sw zero, 4(a0)
; RV32I-MEDIUM-NEXT: sw zero, 0(a0)
+; RV32I-MEDIUM-NEXT: sw zero, 4(a0)
; RV32I-MEDIUM-NEXT: ret
;
; RV64I-LABEL: store_g_4:
@@ -335,8 +335,8 @@ define dso_local void @store_g_8() nounwind {
; RV32I-MEDIUM-NEXT: .Lpcrel_hi7:
; RV32I-MEDIUM-NEXT: auipc a0, %pcrel_hi(g_8)
; RV32I-MEDIUM-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi7)
-; RV32I-MEDIUM-NEXT: sw zero, 4(a0)
; RV32I-MEDIUM-NEXT: sw zero, 0(a0)
+; RV32I-MEDIUM-NEXT: sw zero, 4(a0)
; RV32I-MEDIUM-NEXT: ret
;
; RV64I-LABEL: store_g_8:
diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll
index 603491bf3d3003..133d735a46ed7f 100644
--- a/llvm/test/CodeGen/RISCV/forced-atomics.ll
+++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll
@@ -4338,8 +4338,8 @@ define i64 @cmpxchg64_monotonic(ptr %p) nounwind {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw zero, 4(sp)
; RV32-NEXT: sw zero, 0(sp)
+; RV32-NEXT: sw zero, 4(sp)
; RV32-NEXT: mv a1, sp
; RV32-NEXT: li a2, 1
; RV32-NEXT: li a3, 0
@@ -4398,8 +4398,8 @@ define i64 @cmpxchg64_seq_cst(ptr %p) nounwind {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT: sw zero, 4(sp)
; RV32-NEXT: sw zero, 0(sp)
+; RV32-NEXT: sw zero, 4(sp)
; RV32-NEXT: mv a1, sp
; RV32-NEXT: li a2, 1
; RV32-NEXT: li a4, 5
@@ -4464,14 +4464,14 @@ define i128 @load128(ptr %p) nounwind {
; RV32-NEXT: addi a2, sp, 8
; RV32-NEXT: li a3, 5
; RV32-NEXT: call __atomic_load
-; RV32-NEXT: lw a0, 20(sp)
-; RV32-NEXT: lw a1, 16(sp)
-; RV32-NEXT: lw a2, 12(sp)
-; RV32-NEXT: lw a3, 8(sp)
-; RV32-NEXT: sw a0, 12(s0)
-; RV32-NEXT: sw a1, 8(s0)
-; RV32-NEXT: sw a2, 4(s0)
-; RV32-NEXT: sw a3, 0(s0)
+; RV32-NEXT: lw a0, 8(sp)
+; RV32-NEXT: lw a1, 12(sp)
+; RV32-NEXT: lw a2, 16(sp)
+; RV32-NEXT: lw a3, 20(sp)
+; RV32-NEXT: sw a0, 0(s0)
+; RV32-NEXT: sw a1, 4(s0)
+; RV32-NEXT: sw a2, 8(s0)
+; RV32-NEXT: sw a3, 12(s0)
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
@@ -4496,10 +4496,10 @@ define void @store128(ptr %p) nounwind {
; RV32-NEXT: addi sp, sp, -32
; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32-NEXT: mv a1, a0
-; RV32-NEXT: sw zero, 20(sp)
-; RV32-NEXT: sw zero, 16(sp)
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw zero, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: sw zero, 16(sp)
+; RV32-NEXT: sw zero, 20(sp)
; RV32-NEXT: li a0, 16
; RV32-NEXT: addi a2, sp, 8
; RV32-NEXT: li a3, 5
@@ -4550,8 +4550,8 @@ define i128 @rmw128(ptr %p) nounwind {
; RV32-NEXT: sw a2, 20(sp)
; RV32-NEXT: sw a3, 24(sp)
; RV32-NEXT: sw a4, 28(sp)
-; RV32-NEXT: sw a5, 4(sp)
; RV32-NEXT: sw a0, 0(sp)
+; RV32-NEXT: sw a5, 4(sp)
; RV32-NEXT: sw a6, 8(sp)
; RV32-NEXT: sw a7, 12(sp)
; RV32-NEXT: li a0, 16
@@ -4599,29 +4599,29 @@ define i128 @cmpxchg128(ptr %p) nounwind {
; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: sw zero, 36(sp)
-; RV32-NEXT: sw zero, 32(sp)
-; RV32-NEXT: sw zero, 28(sp)
; RV32-NEXT: sw zero, 24(sp)
-; RV32-NEXT: sw zero, 20(sp)
-; RV32-NEXT: sw zero, 16(sp)
-; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: sw zero, 28(sp)
+; RV32-NEXT: sw zero, 32(sp)
+; RV32-NEXT: sw zero, 36(sp)
; RV32-NEXT: li a0, 1
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: sw zero, 16(sp)
+; RV32-NEXT: sw zero, 20(sp)
; RV32-NEXT: li a0, 16
; RV32-NEXT: addi a2, sp, 24
; RV32-NEXT: addi a3, sp, 8
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: call __atomic_compare_exchange
-; RV32-NEXT: lw a0, 36(sp)
-; RV32-NEXT: lw a1, 32(sp)
-; RV32-NEXT: lw a2, 28(sp)
-; RV32-NEXT: lw a3, 24(sp)
-; RV32-NEXT: sw a0, 12(s0)
-; RV32-NEXT: sw a1, 8(s0)
-; RV32-NEXT: sw a2, 4(s0)
-; RV32-NEXT: sw a3, 0(s0)
+; RV32-NEXT: lw a0, 24(sp)
+; RV32-NEXT: lw a1, 28(sp)
+; RV32-NEXT: lw a2, 32(sp)
+; RV32-NEXT: lw a3, 36(sp)
+; RV32-NEXT: sw a0, 0(s0)
+; RV32-NEXT: sw a1, 4(s0)
+; RV32-NEXT: sw a2, 8(s0)
+; RV32-NEXT: sw a3, 12(s0)
; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 48
@@ -4631,8 +4631,8 @@ define i128 @cmpxchg128(ptr %p) nounwind {
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -32
; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd zero, 8(sp)
; RV64-NEXT: sd zero, 0(sp)
+; RV64-NEXT: sd zero, 8(sp)
; RV64-NEXT: mv a1, sp
; RV64-NEXT: li a2, 1
; RV64-NEXT: li a4, 5
diff --git a/llvm/test/CodeGen/RISCV/fp128.ll b/llvm/test/CodeGen/RISCV/fp128.ll
index 611a70c0ad8b93..0bde85b54e5d15 100644
--- a/llvm/test/CodeGen/RISCV/fp128.ll
+++ b/llvm/test/CodeGen/RISCV/fp128.ll
@@ -15,24 +15,24 @@ define i32 @test_load_and_cmp() nounwind {
; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, %hi(x)
; RV32I-NEXT: lw a2, %lo(x)(a0)
-; RV32I-NEXT: lw a1, %lo(x+4)(a0)
-; RV32I-NEXT: lw a3, %lo(x+8)(a0)
-; RV32I-NEXT: lw a0, %lo(x+12)(a0)
-; RV32I-NEXT: lui a4, %hi(y)
-; RV32I-NEXT: lw a5, %lo(y)(a4)
-; RV32I-NEXT: lw a6, %lo(y+4)(a4)
-; RV32I-NEXT: lw a7, %lo(y+8)(a4)
-; RV32I-NEXT: lw a4, %lo(y+12)(a4)
-; RV32I-NEXT: sw a4, 20(sp)
-; RV32I-NEXT: sw a7, 16(sp)
+; RV32I-NEXT: lw a3, %lo(x+4)(a0)
+; RV32I-NEXT: lw a4, %lo(x+8)(a0)
+; RV32I-NEXT: lw a5, %lo(x+12)(a0)
+; RV32I-NEXT: lui a0, %hi(y)
+; RV32I-NEXT: lw a1, %lo(y)(a0)
+; RV32I-NEXT: lw a6, %lo(y+4)(a0)
+; RV32I-NEXT: lw a7, %lo(y+8)(a0)
+; RV32I-NEXT: lw a0, %lo(y+12)(a0)
+; RV32I-NEXT: sw a1, 8(sp)
; RV32I-NEXT: sw a6, 12(sp)
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a0, 36(sp)
-; RV32I-NEXT: sw a3, 32(sp)
-; RV32I-NEXT: sw a1, 28(sp)
+; RV32I-NEXT: sw a7, 16(sp)
+; RV32I-NEXT: sw a0, 20(sp)
; RV32I-NEXT: addi a0, sp, 24
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: sw a2, 24(sp)
+; RV32I-NEXT: sw a3, 28(sp)
+; RV32I-NEXT: sw a4, 32(sp)
+; RV32I-NEXT: sw a5, 36(sp)
; RV32I-NEXT: call __netf2
; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
@@ -52,35 +52,35 @@ define i32 @test_add_and_fptosi() nounwind {
; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
; RV32I-NEXT: lui a0, %hi(x)
; RV32I-NEXT: lw a3, %lo(x)(a0)
-; RV32I-NEXT: lw a1, %lo(x+4)(a0)
-; RV32I-NEXT: lw a2, %lo(x+8)(a0)
-; RV32I-NEXT: lw a0, %lo(x+12)(a0)
-; RV32I-NEXT: lui a4, %hi(y)
-; RV32I-NEXT: lw a5, %lo(y)(a4)
-; RV32I-NEXT: lw a6, %lo(y+4)(a4)
-; RV32I-NEXT: lw a7, %lo(y+8)(a4)
-; RV32I-NEXT: lw a4, %lo(y+12)(a4)
-; RV32I-NEXT: sw a4, 36(sp)
+; RV32I-NEXT: lw a4, %lo(x+4)(a0)
+; RV32I-NEXT: lw a5, %lo(x+8)(a0)
+; RV32I-NEXT: lw a6, %lo(x+12)(a0)
+; RV32I-NEXT: lui a0, %hi(y)
+; RV32I-NEXT: lw a1, %lo(y)(a0)
+; RV32I-NEXT: lw a2, %lo(y+4)(a0)
+; RV32I-NEXT: lw a7, %lo(y+8)(a0)
+; RV32I-NEXT: lw a0, %lo(y+12)(a0)
+; RV32I-NEXT: sw a1, 24(sp)
+; RV32I-NEXT: sw a2, 28(sp)
; RV32I-NEXT: sw a7, 32(sp)
-; RV32I-NEXT: sw a6, 28(sp)
-; RV32I-NEXT: sw a5, 24(sp)
-; RV32I-NEXT: sw a0, 52(sp)
-; RV32I-NEXT: sw a2, 48(sp)
-; RV32I-NEXT: sw a1, 44(sp)
+; RV32I-NEXT: sw a0, 36(sp)
; RV32I-NEXT: addi a0, sp, 56
; RV32I-NEXT: addi a1, sp, 40
; RV32I-NEXT: addi a2, sp, 24
; RV32I-NEXT: sw a3, 40(sp)
+; RV32I-NEXT: sw a4, 44(sp)
+; RV32I-NEXT: sw a5, 48(sp)
+; RV32I-NEXT: sw a6, 52(sp)
; RV32I-NEXT: call __addtf3
; RV32I-NEXT: lw a1, 56(sp)
-; RV32I-NEXT: lw a0, 60(sp)
-; RV32I-NEXT: lw a2, 64(sp)
-; RV32I-NEXT: lw a3, 68(sp)
-; RV32I-NEXT: sw a3, 20(sp)
-; RV32I-NEXT: sw a2, 16(sp)
-; RV32I-NEXT: sw a0, 12(sp)
+; RV32I-NEXT: lw a2, 60(sp)
+; RV32I-NEXT: lw a3, 64(sp)
+; RV32I-NEXT: lw a4, 68(sp)
; RV32I-NEXT: addi a0, sp, 8
; RV32I-NEXT: sw a1, 8(sp)
+; RV32I-NEXT: sw a2, 12(sp)
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a4, 20(sp)
; RV32I-NEXT: call __fixtfsi
; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 80
diff --git a/llvm/test/CodeGen/RISCV/frame.ll b/llvm/test/CodeGen/RISCV/frame.ll
index d50f1e55417a72..10d542496e0f7f 100644
--- a/llvm/test/CodeGen/RISCV/frame.ll
+++ b/llvm/test/CodeGen/RISCV/frame.ll
@@ -12,10 +12,10 @@ define i32 @test() nounwind {
; RV32I-FPELIM-NEXT: addi sp, sp, -32
; RV32I-FPELIM-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-FPELIM-NEXT: sw zero, 24(sp)
-; RV32I-FPELIM-NEXT: sw zero, 20(sp)
-; RV32I-FPELIM-NEXT: sw zero, 16(sp)
-; RV32I-FPELIM-NEXT: sw zero, 12(sp)
; RV32I-FPELIM-NEXT: sw zero, 8(sp)
+; RV32I-FPELIM-NEXT: sw zero, 12(sp)
+; RV32I-FPELIM-NEXT: sw zero, 16(sp)
+; RV32I-FPELIM-NEXT: sw zero, 20(sp)
; RV32I-FPELIM-NEXT: addi a0, sp, 12
; RV32I-FPELIM-NEXT: call test1
; RV32I-FPELIM-NEXT: li a0, 0
@@ -30,10 +30,10 @@ define i32 @test() nounwind {
; RV32I-WITHFP-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-WITHFP-NEXT: addi s0, sp, 32
; RV32I-WITHFP-NEXT: sw zero, -16(s0)
-; RV32I-WITHFP-NEXT: sw zero, -20(s0)
-; RV32I-WITHFP-NEXT: sw zero, -24(s0)
-; RV32I-WITHFP-NEXT: sw zero, -28(s0)
; RV32I-WITHFP-NEXT: sw zero, -32(s0)
+; RV32I-WITHFP-NEXT: sw zero, -28(s0)
+; RV32I-WITHFP-NEXT: sw zero, -24(s0)
+; RV32I-WITHFP-NEXT: sw zero, -20(s0)
; RV32I-WITHFP-NEXT: addi a0, s0, -28
; RV32I-WITHFP-NEXT: call test1
; RV32I-WITHFP-NEXT: li a0, 0
diff --git a/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll b/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll
index f532f3c9f33df0..9322abcfbbdcef 100644
--- a/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll
+++ b/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll
@@ -17,10 +17,10 @@ define void @getSetCCResultType(ptr %p, ptr %q) nounwind {
; RV32I-NEXT: addi a3, a3, -1
; RV32I-NEXT: addi a2, a2, -1
; RV32I-NEXT: addi a1, a1, -1
-; RV32I-NEXT: sw a1, 12(a0)
-; RV32I-NEXT: sw a2, 8(a0)
-; RV32I-NEXT: sw a3, 4(a0)
; RV32I-NEXT: sw a4, 0(a0)
+; RV32I-NEXT: sw a3, 4(a0)
+; RV32I-NEXT: sw a2, 8(a0)
+; RV32I-NEXT: sw a1, 12(a0)
; RV32I-NEXT: ret
entry:
%0 = load <4 x i32>, ptr %p, align 16
diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
index 5f9866f08c821d..77efffb6358af7 100644
--- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
+++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
@@ -17,9 +17,9 @@ define dso_local void @multiple_stores() local_unnamed_addr nounwind {
; CHECK-NEXT: lui a0, %hi(s)
; CHECK-NEXT: addi a0, a0, %lo(s)
; CHECK-NEXT: li a1, 10
+; CHECK-NEXT: li a2, 20
; CHECK-NEXT: sw a1, 160(a0)
-; CHECK-NEXT: li a1, 20
-; CHECK-NEXT: sw a1, 164(a0)
+; CHECK-NEXT: sw a2, 164(a0)
; CHECK-NEXT: ret
entry:
store i32 10, ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 1), align 4
diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
index 6009a6c7e138ae..8116d138d288e2 100644
--- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
+++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
@@ -71,8 +71,8 @@ define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) {
; RV32-NEXT: li a1, 0
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __muldi3
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a0, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: addi a2, sp, 16
; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v8, (a2), zero
diff --git a/llvm/test/CodeGen/RISCV/legalize-fneg.ll b/llvm/test/CodeGen/RISCV/legalize-fneg.ll
index dfd62e8d5f9f56..f60b77b92c09ea 100644
--- a/llvm/test/CodeGen/RISCV/legalize-fneg.ll
+++ b/llvm/test/CodeGen/RISCV/legalize-fneg.ll
@@ -57,14 +57,14 @@ define void @test3(ptr %a, ptr %b) nounwind {
; RV32-LABEL: test3:
; RV32: # %bb.0: # %entry
; RV32-NEXT: lw a2, 12(a1)
-; RV32-NEXT: lw a3, 4(a1)
-; RV32-NEXT: lw a4, 8(a1)
-; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: lw a3, 0(a1)
+; RV32-NEXT: lw a4, 4(a1)
+; RV32-NEXT: lw a1, 8(a1)
; RV32-NEXT: lui a5, 524288
; RV32-NEXT: xor a2, a2, a5
-; RV32-NEXT: sw a4, 8(a0)
-; RV32-NEXT: sw a1, 0(a0)
-; RV32-NEXT: sw a3, 4(a0)
+; RV32-NEXT: sw a3, 0(a0)
+; RV32-NEXT: sw a4, 4(a0)
+; RV32-NEXT: sw a1, 8(a0)
; RV32-NEXT: sw a2, 12(a0)
; RV32-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/llvm.exp10.ll b/llvm/test/CodeGen/RISCV/llvm.exp10.ll
index 0941f6a73da280..cc07449c4e6200 100644
--- a/llvm/test/CodeGen/RISCV/llvm.exp10.ll
+++ b/llvm/test/CodeGen/RISCV/llvm.exp10.ll
@@ -197,8 +197,8 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) {
; RV32IFD-NEXT: call exp10f
; RV32IFD-NEXT: call __truncsfhf2
; RV32IFD-NEXT: fmv.x.w a0, fa0
-; RV32IFD-NEXT: sh a0, 4(s0)
; RV32IFD-NEXT: sw s1, 0(s0)
+; RV32IFD-NEXT: sh a0, 4(s0)
; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
@@ -246,8 +246,8 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) {
; RV64IFD-NEXT: call exp10f
; RV64IFD-NEXT: call __truncsfhf2
; RV64IFD-NEXT: fmv.x.w a0, fa0
-; RV64IFD-NEXT: sh a0, 4(s0)
; RV64IFD-NEXT: sw s1, 0(s0)
+; RV64IFD-NEXT: sh a0, 4(s0)
; RV64IFD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -313,10 +313,10 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) {
; RV32IFD-NEXT: fmv.x.w s3, fs3
; RV32IFD-NEXT: call __truncsfhf2
; RV32IFD-NEXT: fmv.x.w a0, fa0
-; RV32IFD-NEXT: sh a0, 6(s0)
-; RV32IFD-NEXT: sh s3, 4(s0)
-; RV32IFD-NEXT: sh s2, 2(s0)
; RV32IFD-NEXT: sh s1, 0(s0)
+; RV32IFD-NEXT: sh s2, 2(s0)
+; RV32IFD-NEXT: sh s3, 4(s0)
+; RV32IFD-NEXT: sh a0, 6(s0)
; RV32IFD-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s1, 52(sp) # 4-byte Folded Reload
@@ -377,10 +377,10 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) {
; RV64IFD-NEXT: fmv.x.w s3, fs0
; RV64IFD-NEXT: call __truncsfhf2
; RV64IFD-NEXT: fmv.x.w a0, fa0
-; RV64IFD-NEXT: sh a0, 6(s0)
-; RV64IFD-NEXT: sh s3, 4(s0)
-; RV64IFD-NEXT: sh s2, 2(s0)
; RV64IFD-NEXT: sh s1, 0(s0)
+; RV64IFD-NEXT: sh s2, 2(s0)
+; RV64IFD-NEXT: sh s3, 4(s0)
+; RV64IFD-NEXT: sh a0, 6(s0)
; RV64IFD-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
@@ -504,9 +504,9 @@ define <3 x float> @exp10_v3f32(<3 x float> %x) {
; RV32IFD-NEXT: fmv.s fs1, fa0
; RV32IFD-NEXT: fmv.s fa0, fs0
; RV32IFD-NEXT: call exp10f
-; RV32IFD-NEXT: fsw fa0, 8(s0)
-; RV32IFD-NEXT: fsw fs1, 4(s0)
; RV32IFD-NEXT: fsw fs2, 0(s0)
+; RV32IFD-NEXT: fsw fs1, 4(s0)
+; RV32IFD-NEXT: fsw fa0, 8(s0)
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
@@ -544,8 +544,8 @@ define <3 x float> @exp10_v3f32(<3 x float> %x) {
; RV64IFD-NEXT: or s1, a0, s1
; RV64IFD-NEXT: fmv.s fa0, fs0
; RV64IFD-NEXT: call exp10f
-; RV64IFD-NEXT: fsw fa0, 8(s0)
; RV64IFD-NEXT: sd s1, 0(s0)
+; RV64IFD-NEXT: fsw fa0, 8(s0)
; RV64IFD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -588,10 +588,10 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) {
; RV32IFD-NEXT: fmv.s fs1, fa0
; RV32IFD-NEXT: fmv.s fa0, fs0
; RV32IFD-NEXT: call exp10f
-; RV32IFD-NEXT: fsw fa0, 12(s0)
-; RV32IFD-NEXT: fsw fs1, 8(s0)
-; RV32IFD-NEXT: fsw fs2, 4(s0)
; RV32IFD-NEXT: fsw fs3, 0(s0)
+; RV32IFD-NEXT: fsw fs2, 4(s0)
+; RV32IFD-NEXT: fsw fs1, 8(s0)
+; RV32IFD-NEXT: fsw fa0, 12(s0)
; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload
@@ -631,10 +631,10 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) {
; RV64IFD-NEXT: fmv.s fs1, fa0
; RV64IFD-NEXT: fmv.s fa0, fs0
; RV64IFD-NEXT: call exp10f
-; RV64IFD-NEXT: fsw fa0, 12(s0)
-; RV64IFD-NEXT: fsw fs1, 8(s0)
-; RV64IFD-NEXT: fsw fs2, 4(s0)
; RV64IFD-NEXT: fsw fs3, 0(s0)
+; RV64IFD-NEXT: fsw fs2, 4(s0)
+; RV64IFD-NEXT: fsw fs1, 8(s0)
+; RV64IFD-NEXT: fsw fa0, 12(s0)
; RV64IFD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload
@@ -736,9 +736,9 @@ define <3 x double> @exp10_v3f64(<3 x double> %x) {
; RV32IFD-NEXT: fmv.d fs1, fa0
; RV32IFD-NEXT: fmv.d fa0, fs0
; RV32IFD-NEXT: call exp10
-; RV32IFD-NEXT: fsd fa0, 16(s0)
-; RV32IFD-NEXT: fsd fs1, 8(s0)
; RV32IFD-NEXT: fsd fs2, 0(s0)
+; RV32IFD-NEXT: fsd fs1, 8(s0)
+; RV32IFD-NEXT: fsd fa0, 16(s0)
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
@@ -771,9 +771,9 @@ define <3 x double> @exp10_v3f64(<3 x double> %x) {
; RV64IFD-NEXT: fmv.d fs1, fa0
; RV64IFD-NEXT: fmv.d fa0, fs0
; RV64IFD-NEXT: call exp10
-; RV64IFD-NEXT: fsd fa0, 16(s0)
-; RV64IFD-NEXT: fsd fs1, 8(s0)
; RV64IFD-NEXT: fsd fs2, 0(s0)
+; RV64IFD-NEXT: fsd fs1, 8(s0)
+; RV64IFD-NEXT: fsd fa0, 16(s0)
; RV64IFD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload
@@ -816,10 +816,10 @@ define <4 x double> @exp10_v4f64(<4 x double> %x) {
; RV32IFD-NEXT: fmv.d fs1, fa0
; RV32IFD-NEXT: fmv.d fa0, fs0
; RV32IFD-NEXT: call exp10
-; RV32IFD-NEXT: fsd fa0, 24(s0)
-; RV32IFD-NEXT: fsd fs1, 16(s0)
-; RV32IFD-NEXT: fsd fs2, 8(s0)
; RV32IFD-NEXT: fsd fs3, 0(s0)
+; RV32IFD-NEXT: fsd fs2, 8(s0)
+; RV32IFD-NEXT: fsd fs1, 16(s0)
+; RV32IFD-NEXT: fsd fa0, 24(s0)
; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload
@@ -859,10 +859,10 @@ define <4 x double> @exp10_v4f64(<4 x double> %x) {
; RV64IFD-NEXT: fmv.d fs1, fa0
; RV64IFD-NEXT: fmv.d fa0, fs0
; RV64IFD-NEXT: call exp10
-; RV64IFD-NEXT: fsd fa0, 24(s0)
-; RV64IFD-NEXT: fsd fs1, 16(s0)
-; RV64IFD-NEXT: fsd fs2, 8(s0)
; RV64IFD-NEXT: fsd fs3, 0(s0)
+; RV64IFD-NEXT: fsd fs2, 8(s0)
+; RV64IFD-NEXT: fsd fs1, 16(s0)
+; RV64IFD-NEXT: fsd fa0, 24(s0)
; RV64IFD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: fld fs0, 24(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/llvm.frexp.ll b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
index 2c9d640e03a634..e85a7118f5ff83 100644
--- a/llvm/test/CodeGen/RISCV/llvm.frexp.ll
+++ b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
@@ -568,18 +568,18 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi
; RV32IFD-NEXT: addi a0, sp, 20
; RV32IFD-NEXT: fmv.s fa0, fs0
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: lw a0, 20(sp)
-; RV32IFD-NEXT: lw a1, 16(sp)
-; RV32IFD-NEXT: lw a2, 12(sp)
-; RV32IFD-NEXT: lw a3, 8(sp)
-; RV32IFD-NEXT: sw a0, 28(s0)
-; RV32IFD-NEXT: sw a1, 24(s0)
-; RV32IFD-NEXT: sw a2, 20(s0)
-; RV32IFD-NEXT: sw a3, 16(s0)
-; RV32IFD-NEXT: fsw fa0, 12(s0)
-; RV32IFD-NEXT: fsw fs1, 8(s0)
-; RV32IFD-NEXT: fsw fs2, 4(s0)
+; RV32IFD-NEXT: lw a0, 8(sp)
+; RV32IFD-NEXT: lw a1, 12(sp)
+; RV32IFD-NEXT: lw a2, 16(sp)
+; RV32IFD-NEXT: lw a3, 20(sp)
+; RV32IFD-NEXT: sw a0, 16(s0)
+; RV32IFD-NEXT: sw a1, 20(s0)
+; RV32IFD-NEXT: sw a2, 24(s0)
+; RV32IFD-NEXT: sw a3, 28(s0)
; RV32IFD-NEXT: fsw fs3, 0(s0)
+; RV32IFD-NEXT: fsw fs2, 4(s0)
+; RV32IFD-NEXT: fsw fs1, 8(s0)
+; RV32IFD-NEXT: fsw fa0, 12(s0)
; RV32IFD-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 48(sp) # 8-byte Folded Reload
@@ -616,18 +616,18 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi
; RV64IFD-NEXT: addi a0, sp, 24
; RV64IFD-NEXT: fmv.s fa0, fs0
; RV64IFD-NEXT: call frexpf
-; RV64IFD-NEXT: ld a0, 24(sp)
-; RV64IFD-NEXT: ld a1, 16(sp)
-; RV64IFD-NEXT: ld a2, 8(sp)
-; RV64IFD-NEXT: ld a3, 0(sp)
-; RV64IFD-NEXT: sw a0, 28(s0)
-; RV64IFD-NEXT: sw a1, 24(s0)
-; RV64IFD-NEXT: sw a2, 20(s0)
-; RV64IFD-NEXT: sw a3, 16(s0)
-; RV64IFD-NEXT: fsw fa0, 12(s0)
-; RV64IFD-NEXT: fsw fs1, 8(s0)
-; RV64IFD-NEXT: fsw fs2, 4(s0)
+; RV64IFD-NEXT: ld a0, 0(sp)
+; RV64IFD-NEXT: ld a1, 8(sp)
+; RV64IFD-NEXT: ld a2, 16(sp)
+; RV64IFD-NEXT: ld a3, 24(sp)
+; RV64IFD-NEXT: sw a0, 16(s0)
+; RV64IFD-NEXT: sw a1, 20(s0)
+; RV64IFD-NEXT: sw a2, 24(s0)
+; RV64IFD-NEXT: sw a3, 28(s0)
; RV64IFD-NEXT: fsw fs3, 0(s0)
+; RV64IFD-NEXT: fsw fs2, 4(s0)
+; RV64IFD-NEXT: fsw fs1, 8(s0)
+; RV64IFD-NEXT: fsw fa0, 12(s0)
; RV64IFD-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: fld fs0, 56(sp) # 8-byte Folded Reload
@@ -666,18 +666,18 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi
; RV32IZFINXZDINX-NEXT: addi a1, sp, 20
; RV32IZFINXZDINX-NEXT: mv a0, s0
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: lw a1, 20(sp)
-; RV32IZFINXZDINX-NEXT: lw a2, 16(sp)
-; RV32IZFINXZDINX-NEXT: lw a3, 12(sp)
-; RV32IZFINXZDINX-NEXT: lw a4, 8(sp)
-; RV32IZFINXZDINX-NEXT: sw a1, 28(s3)
-; RV32IZFINXZDINX-NEXT: sw a2, 24(s3)
-; RV32IZFINXZDINX-NEXT: sw a3, 20(s3)
-; RV32IZFINXZDINX-NEXT: sw a4, 16(s3)
-; RV32IZFINXZDINX-NEXT: sw a0, 12(s3)
-; RV32IZFINXZDINX-NEXT: sw s1, 8(s3)
-; RV32IZFINXZDINX-NEXT: sw s2, 4(s3)
+; RV32IZFINXZDINX-NEXT: lw a1, 8(sp)
+; RV32IZFINXZDINX-NEXT: lw a2, 12(sp)
+; RV32IZFINXZDINX-NEXT: lw a3, 16(sp)
+; RV32IZFINXZDINX-NEXT: lw a4, 20(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 16(s3)
+; RV32IZFINXZDINX-NEXT: sw a2, 20(s3)
+; RV32IZFINXZDINX-NEXT: sw a3, 24(s3)
+; RV32IZFINXZDINX-NEXT: sw a4, 28(s3)
; RV32IZFINXZDINX-NEXT: sw s4, 0(s3)
+; RV32IZFINXZDINX-NEXT: sw s2, 4(s3)
+; RV32IZFINXZDINX-NEXT: sw s1, 8(s3)
+; RV32IZFINXZDINX-NEXT: sw a0, 12(s3)
; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
@@ -716,18 +716,18 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi
; RV64IZFINXZDINX-NEXT: addi a1, sp, 24
; RV64IZFINXZDINX-NEXT: mv a0, s0
; RV64IZFINXZDINX-NEXT: call frexpf
-; RV64IZFINXZDINX-NEXT: ld a1, 24(sp)
-; RV64IZFINXZDINX-NEXT: ld a2, 16(sp)
-; RV64IZFINXZDINX-NEXT: ld a3, 8(sp)
-; RV64IZFINXZDINX-NEXT: ld a4, 0(sp)
-; RV64IZFINXZDINX-NEXT: sw a1, 28(s3)
-; RV64IZFINXZDINX-NEXT: sw a2, 24(s3)
-; RV64IZFINXZDINX-NEXT: sw a3, 20(s3)
-; RV64IZFINXZDINX-NEXT: sw a4, 16(s3)
-; RV64IZFINXZDINX-NEXT: sw a0, 12(s3)
-; RV64IZFINXZDINX-NEXT: sw s1, 8(s3)
-; RV64IZFINXZDINX-NEXT: sw s2, 4(s3)
+; RV64IZFINXZDINX-NEXT: ld a1, 0(sp)
+; RV64IZFINXZDINX-NEXT: ld a2, 8(sp)
+; RV64IZFINXZDINX-NEXT: ld a3, 16(sp)
+; RV64IZFINXZDINX-NEXT: ld a4, 24(sp)
+; RV64IZFINXZDINX-NEXT: sw a1, 16(s3)
+; RV64IZFINXZDINX-NEXT: sw a2, 20(s3)
+; RV64IZFINXZDINX-NEXT: sw a3, 24(s3)
+; RV64IZFINXZDINX-NEXT: sw a4, 28(s3)
; RV64IZFINXZDINX-NEXT: sw s4, 0(s3)
+; RV64IZFINXZDINX-NEXT: sw s2, 4(s3)
+; RV64IZFINXZDINX-NEXT: sw s1, 8(s3)
+; RV64IZFINXZDINX-NEXT: sw a0, 12(s3)
; RV64IZFINXZDINX-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
; RV64IZFINXZDINX-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; RV64IZFINXZDINX-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
@@ -770,14 +770,14 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi
; RV32I-NEXT: lw a2, 12(sp)
; RV32I-NEXT: lw a3, 16(sp)
; RV32I-NEXT: lw a4, 20(sp)
-; RV32I-NEXT: sw a0, 12(s3)
-; RV32I-NEXT: sw s1, 8(s3)
-; RV32I-NEXT: sw s0, 4(s3)
; RV32I-NEXT: sw s4, 0(s3)
-; RV32I-NEXT: sw a4, 28(s3)
-; RV32I-NEXT: sw a3, 24(s3)
-; RV32I-NEXT: sw a2, 20(s3)
+; RV32I-NEXT: sw s0, 4(s3)
+; RV32I-NEXT: sw s1, 8(s3)
+; RV32I-NEXT: sw a0, 12(s3)
; RV32I-NEXT: sw a1, 16(s3)
+; RV32I-NEXT: sw a2, 20(s3)
+; RV32I-NEXT: sw a3, 24(s3)
+; RV32I-NEXT: sw a4, 28(s3)
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
@@ -820,14 +820,14 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi
; RV64I-NEXT: lw a2, 4(sp)
; RV64I-NEXT: lw a3, 8(sp)
; RV64I-NEXT: lw a4, 12(sp)
-; RV64I-NEXT: sw a0, 12(s3)
-; RV64I-NEXT: sw s1, 8(s3)
-; RV64I-NEXT: sw s0, 4(s3)
; RV64I-NEXT: sw s4, 0(s3)
-; RV64I-NEXT: sw a4, 28(s3)
-; RV64I-NEXT: sw a3, 24(s3)
-; RV64I-NEXT: sw a2, 20(s3)
+; RV64I-NEXT: sw s0, 4(s3)
+; RV64I-NEXT: sw s1, 8(s3)
+; RV64I-NEXT: sw a0, 12(s3)
; RV64I-NEXT: sw a1, 16(s3)
+; RV64I-NEXT: sw a2, 20(s3)
+; RV64I-NEXT: sw a3, 24(s3)
+; RV64I-NEXT: sw a4, 28(s3)
; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
@@ -868,10 +868,10 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi
; RV32IFD-NEXT: addi a0, sp, 20
; RV32IFD-NEXT: fmv.s fa0, fs0
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: fsw fa0, 12(s0)
-; RV32IFD-NEXT: fsw fs1, 8(s0)
-; RV32IFD-NEXT: fsw fs2, 4(s0)
; RV32IFD-NEXT: fsw fs3, 0(s0)
+; RV32IFD-NEXT: fsw fs2, 4(s0)
+; RV32IFD-NEXT: fsw fs1, 8(s0)
+; RV32IFD-NEXT: fsw fa0, 12(s0)
; RV32IFD-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 48(sp) # 8-byte Folded Reload
@@ -908,10 +908,10 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi
; RV64IFD-NEXT: addi a0, sp, 24
; RV64IFD-NEXT: fmv.s fa0, fs0
; RV64IFD-NEXT: call frexpf
-; RV64IFD-NEXT: fsw fa0, 12(s0)
-; RV64IFD-NEXT: fsw fs1, 8(s0)
-; RV64IFD-NEXT: fsw fs2, 4(s0)
; RV64IFD-NEXT: fsw fs3, 0(s0)
+; RV64IFD-NEXT: fsw fs2, 4(s0)
+; RV64IFD-NEXT: fsw fs1, 8(s0)
+; RV64IFD-NEXT: fsw fa0, 12(s0)
; RV64IFD-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: fld fs0, 56(sp) # 8-byte Folded Reload
@@ -950,10 +950,10 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi
; RV32IZFINXZDINX-NEXT: addi a1, sp, 20
; RV32IZFINXZDINX-NEXT: mv a0, s0
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: sw a0, 12(s3)
-; RV32IZFINXZDINX-NEXT: sw s1, 8(s3)
-; RV32IZFINXZDINX-NEXT: sw s2, 4(s3)
; RV32IZFINXZDINX-NEXT: sw s4, 0(s3)
+; RV32IZFINXZDINX-NEXT: sw s2, 4(s3)
+; RV32IZFINXZDINX-NEXT: sw s1, 8(s3)
+; RV32IZFINXZDINX-NEXT: sw a0, 12(s3)
; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
@@ -992,10 +992,10 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi
; RV64IZFINXZDINX-NEXT: addi a1, sp, 24
; RV64IZFINXZDINX-NEXT: mv a0, s0
; RV64IZFINXZDINX-NEXT: call frexpf
-; RV64IZFINXZDINX-NEXT: sw a0, 12(s3)
-; RV64IZFINXZDINX-NEXT: sw s1, 8(s3)
-; RV64IZFINXZDINX-NEXT: sw s2, 4(s3)
; RV64IZFINXZDINX-NEXT: sw s4, 0(s3)
+; RV64IZFINXZDINX-NEXT: sw s2, 4(s3)
+; RV64IZFINXZDINX-NEXT: sw s1, 8(s3)
+; RV64IZFINXZDINX-NEXT: sw a0, 12(s3)
; RV64IZFINXZDINX-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
; RV64IZFINXZDINX-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; RV64IZFINXZDINX-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
@@ -1034,10 +1034,10 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi
; RV32I-NEXT: addi a1, sp, 20
; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call frexpf
-; RV32I-NEXT: sw a0, 12(s3)
-; RV32I-NEXT: sw s1, 8(s3)
-; RV32I-NEXT: sw s0, 4(s3)
; RV32I-NEXT: sw s4, 0(s3)
+; RV32I-NEXT: sw s0, 4(s3)
+; RV32I-NEXT: sw s1, 8(s3)
+; RV32I-NEXT: sw a0, 12(s3)
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
@@ -1076,10 +1076,10 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call frexpf
-; RV64I-NEXT: sw a0, 12(s3)
-; RV64I-NEXT: sw s1, 8(s3)
-; RV64I-NEXT: sw s0, 4(s3)
; RV64I-NEXT: sw s4, 0(s3)
+; RV64I-NEXT: sw s0, 4(s3)
+; RV64I-NEXT: sw s1, 8(s3)
+; RV64I-NEXT: sw a0, 12(s3)
; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
@@ -1117,14 +1117,14 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV32IFD-NEXT: addi a0, sp, 12
; RV32IFD-NEXT: fmv.s fa0, fs0
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: lw a0, 12(sp)
-; RV32IFD-NEXT: lw a1, 8(sp)
-; RV32IFD-NEXT: lw a2, 4(sp)
-; RV32IFD-NEXT: lw a3, 0(sp)
-; RV32IFD-NEXT: sw a0, 12(s0)
-; RV32IFD-NEXT: sw a1, 8(s0)
-; RV32IFD-NEXT: sw a2, 4(s0)
-; RV32IFD-NEXT: sw a3, 0(s0)
+; RV32IFD-NEXT: lw a0, 0(sp)
+; RV32IFD-NEXT: lw a1, 4(sp)
+; RV32IFD-NEXT: lw a2, 8(sp)
+; RV32IFD-NEXT: lw a3, 12(sp)
+; RV32IFD-NEXT: sw a0, 0(s0)
+; RV32IFD-NEXT: sw a1, 4(s0)
+; RV32IFD-NEXT: sw a2, 8(s0)
+; RV32IFD-NEXT: sw a3, 12(s0)
; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload
@@ -1156,14 +1156,14 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV64IFD-NEXT: addi a0, sp, 32
; RV64IFD-NEXT: fmv.s fa0, fs0
; RV64IFD-NEXT: call frexpf
-; RV64IFD-NEXT: ld a0, 32(sp)
-; RV64IFD-NEXT: ld a1, 24(sp)
-; RV64IFD-NEXT: ld a2, 16(sp)
-; RV64IFD-NEXT: ld a3, 8(sp)
-; RV64IFD-NEXT: sw a0, 12(s0)
-; RV64IFD-NEXT: sw a1, 8(s0)
-; RV64IFD-NEXT: sw a2, 4(s0)
-; RV64IFD-NEXT: sw a3, 0(s0)
+; RV64IFD-NEXT: ld a0, 8(sp)
+; RV64IFD-NEXT: ld a1, 16(sp)
+; RV64IFD-NEXT: ld a2, 24(sp)
+; RV64IFD-NEXT: ld a3, 32(sp)
+; RV64IFD-NEXT: sw a0, 0(s0)
+; RV64IFD-NEXT: sw a1, 4(s0)
+; RV64IFD-NEXT: sw a2, 8(s0)
+; RV64IFD-NEXT: sw a3, 12(s0)
; RV64IFD-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: fld fs0, 56(sp) # 8-byte Folded Reload
@@ -1197,14 +1197,14 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV32IZFINXZDINX-NEXT: addi a1, sp, 24
; RV32IZFINXZDINX-NEXT: mv a0, s0
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: lw a0, 24(sp)
-; RV32IZFINXZDINX-NEXT: lw a1, 20(sp)
-; RV32IZFINXZDINX-NEXT: lw a2, 16(sp)
-; RV32IZFINXZDINX-NEXT: lw a3, 12(sp)
-; RV32IZFINXZDINX-NEXT: sw a0, 12(s3)
-; RV32IZFINXZDINX-NEXT: sw a1, 8(s3)
-; RV32IZFINXZDINX-NEXT: sw a2, 4(s3)
-; RV32IZFINXZDINX-NEXT: sw a3, 0(s3)
+; RV32IZFINXZDINX-NEXT: lw a0, 12(sp)
+; RV32IZFINXZDINX-NEXT: lw a1, 16(sp)
+; RV32IZFINXZDINX-NEXT: lw a2, 20(sp)
+; RV32IZFINXZDINX-NEXT: lw a3, 24(sp)
+; RV32IZFINXZDINX-NEXT: sw a0, 0(s3)
+; RV32IZFINXZDINX-NEXT: sw a1, 4(s3)
+; RV32IZFINXZDINX-NEXT: sw a2, 8(s3)
+; RV32IZFINXZDINX-NEXT: sw a3, 12(s3)
; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
@@ -1238,14 +1238,14 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV64IZFINXZDINX-NEXT: addi a1, sp, 32
; RV64IZFINXZDINX-NEXT: mv a0, s0
; RV64IZFINXZDINX-NEXT: call frexpf
-; RV64IZFINXZDINX-NEXT: ld a0, 32(sp)
-; RV64IZFINXZDINX-NEXT: ld a1, 24(sp)
-; RV64IZFINXZDINX-NEXT: ld a2, 16(sp)
-; RV64IZFINXZDINX-NEXT: ld a3, 8(sp)
-; RV64IZFINXZDINX-NEXT: sw a0, 12(s3)
-; RV64IZFINXZDINX-NEXT: sw a1, 8(s3)
-; RV64IZFINXZDINX-NEXT: sw a2, 4(s3)
-; RV64IZFINXZDINX-NEXT: sw a3, 0(s3)
+; RV64IZFINXZDINX-NEXT: ld a0, 8(sp)
+; RV64IZFINXZDINX-NEXT: ld a1, 16(sp)
+; RV64IZFINXZDINX-NEXT: ld a2, 24(sp)
+; RV64IZFINXZDINX-NEXT: ld a3, 32(sp)
+; RV64IZFINXZDINX-NEXT: sw a0, 0(s3)
+; RV64IZFINXZDINX-NEXT: sw a1, 4(s3)
+; RV64IZFINXZDINX-NEXT: sw a2, 8(s3)
+; RV64IZFINXZDINX-NEXT: sw a3, 12(s3)
; RV64IZFINXZDINX-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
; RV64IZFINXZDINX-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; RV64IZFINXZDINX-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
@@ -1279,14 +1279,14 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call frexpf
-; RV32I-NEXT: lw a0, 24(sp)
-; RV32I-NEXT: lw a1, 20(sp)
-; RV32I-NEXT: lw a2, 16(sp)
-; RV32I-NEXT: lw a3, 12(sp)
-; RV32I-NEXT: sw a0, 12(s3)
-; RV32I-NEXT: sw a1, 8(s3)
-; RV32I-NEXT: sw a2, 4(s3)
-; RV32I-NEXT: sw a3, 0(s3)
+; RV32I-NEXT: lw a0, 12(sp)
+; RV32I-NEXT: lw a1, 16(sp)
+; RV32I-NEXT: lw a2, 20(sp)
+; RV32I-NEXT: lw a3, 24(sp)
+; RV32I-NEXT: sw a0, 0(s3)
+; RV32I-NEXT: sw a1, 4(s3)
+; RV32I-NEXT: sw a2, 8(s3)
+; RV32I-NEXT: sw a3, 12(s3)
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
@@ -1320,14 +1320,14 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV64I-NEXT: addi a1, sp, 20
; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call frexpf
-; RV64I-NEXT: lw a0, 20(sp)
-; RV64I-NEXT: lw a1, 16(sp)
-; RV64I-NEXT: lw a2, 12(sp)
-; RV64I-NEXT: lw a3, 8(sp)
-; RV64I-NEXT: sw a0, 12(s3)
-; RV64I-NEXT: sw a1, 8(s3)
-; RV64I-NEXT: sw a2, 4(s3)
-; RV64I-NEXT: sw a3, 0(s3)
+; RV64I-NEXT: lw a0, 8(sp)
+; RV64I-NEXT: lw a1, 12(sp)
+; RV64I-NEXT: lw a2, 16(sp)
+; RV64I-NEXT: lw a3, 20(sp)
+; RV64I-NEXT: sw a0, 0(s3)
+; RV64I-NEXT: sw a1, 4(s3)
+; RV64I-NEXT: sw a2, 8(s3)
+; RV64I-NEXT: sw a3, 12(s3)
; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
@@ -1397,8 +1397,8 @@ define { double, i32 } @test_frexp_f64_i32(double %a) nounwind {
; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: call frexp
; RV32I-NEXT: lw a2, 4(sp)
-; RV32I-NEXT: sw a1, 4(s0)
; RV32I-NEXT: sw a0, 0(s0)
+; RV32I-NEXT: sw a1, 4(s0)
; RV32I-NEXT: sw a2, 8(s0)
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -1580,28 +1580,28 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind {
; RV32IFD-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: lw a3, 0(a1)
-; RV32IFD-NEXT: lw a2, 4(a1)
-; RV32IFD-NEXT: lw a4, 8(a1)
-; RV32IFD-NEXT: lw a1, 12(a1)
+; RV32IFD-NEXT: lw a4, 4(a1)
+; RV32IFD-NEXT: lw a5, 8(a1)
+; RV32IFD-NEXT: lw a6, 12(a1)
; RV32IFD-NEXT: mv s0, a0
-; RV32IFD-NEXT: sw a1, 12(sp)
-; RV32IFD-NEXT: sw a4, 8(sp)
-; RV32IFD-NEXT: sw a2, 4(sp)
; RV32IFD-NEXT: addi a0, sp, 16
; RV32IFD-NEXT: mv a1, sp
; RV32IFD-NEXT: addi a2, sp, 36
; RV32IFD-NEXT: sw a3, 0(sp)
+; RV32IFD-NEXT: sw a4, 4(sp)
+; RV32IFD-NEXT: sw a5, 8(sp)
+; RV32IFD-NEXT: sw a6, 12(sp)
; RV32IFD-NEXT: call frexpl
-; RV32IFD-NEXT: lw a0, 24(sp)
-; RV32IFD-NEXT: lw a1, 28(sp)
-; RV32IFD-NEXT: lw a2, 16(sp)
-; RV32IFD-NEXT: lw a3, 20(sp)
-; RV32IFD-NEXT: lw a4, 36(sp)
-; RV32IFD-NEXT: sw a1, 12(s0)
-; RV32IFD-NEXT: sw a0, 8(s0)
-; RV32IFD-NEXT: sw a3, 4(s0)
-; RV32IFD-NEXT: sw a2, 0(s0)
-; RV32IFD-NEXT: sw a4, 16(s0)
+; RV32IFD-NEXT: lw a0, 36(sp)
+; RV32IFD-NEXT: lw a1, 16(sp)
+; RV32IFD-NEXT: lw a2, 20(sp)
+; RV32IFD-NEXT: lw a3, 24(sp)
+; RV32IFD-NEXT: lw a4, 28(sp)
+; RV32IFD-NEXT: sw a1, 0(s0)
+; RV32IFD-NEXT: sw a2, 4(s0)
+; RV32IFD-NEXT: sw a3, 8(s0)
+; RV32IFD-NEXT: sw a4, 12(s0)
+; RV32IFD-NEXT: sw a0, 16(s0)
; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 48
@@ -1619,8 +1619,8 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind {
; RV64IFD-NEXT: mv a1, a3
; RV64IFD-NEXT: call frexpl
; RV64IFD-NEXT: lw a2, 12(sp)
-; RV64IFD-NEXT: sd a1, 8(s0)
; RV64IFD-NEXT: sd a0, 0(s0)
+; RV64IFD-NEXT: sd a1, 8(s0)
; RV64IFD-NEXT: sw a2, 16(s0)
; RV64IFD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
@@ -1633,28 +1633,28 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind {
; RV32IZFINXZDINX-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: lw a3, 0(a1)
-; RV32IZFINXZDINX-NEXT: lw a2, 4(a1)
-; RV32IZFINXZDINX-NEXT: lw a4, 8(a1)
-; RV32IZFINXZDINX-NEXT: lw a1, 12(a1)
+; RV32IZFINXZDINX-NEXT: lw a4, 4(a1)
+; RV32IZFINXZDINX-NEXT: lw a5, 8(a1)
+; RV32IZFINXZDINX-NEXT: lw a6, 12(a1)
; RV32IZFINXZDINX-NEXT: mv s0, a0
-; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
-; RV32IZFINXZDINX-NEXT: sw a4, 8(sp)
-; RV32IZFINXZDINX-NEXT: sw a2, 4(sp)
; RV32IZFINXZDINX-NEXT: addi a0, sp, 16
; RV32IZFINXZDINX-NEXT: mv a1, sp
; RV32IZFINXZDINX-NEXT: addi a2, sp, 36
; RV32IZFINXZDINX-NEXT: sw a3, 0(sp)
+; RV32IZFINXZDINX-NEXT: sw a4, 4(sp)
+; RV32IZFINXZDINX-NEXT: sw a5, 8(sp)
+; RV32IZFINXZDINX-NEXT: sw a6, 12(sp)
; RV32IZFINXZDINX-NEXT: call frexpl
-; RV32IZFINXZDINX-NEXT: lw a0, 24(sp)
-; RV32IZFINXZDINX-NEXT: lw a1, 28(sp)
-; RV32IZFINXZDINX-NEXT: lw a2, 16(sp)
-; RV32IZFINXZDINX-NEXT: lw a3, 20(sp)
-; RV32IZFINXZDINX-NEXT: lw a4, 36(sp)
-; RV32IZFINXZDINX-NEXT: sw a1, 12(s0)
-; RV32IZFINXZDINX-NEXT: sw a0, 8(s0)
-; RV32IZFINXZDINX-NEXT: sw a3, 4(s0)
-; RV32IZFINXZDINX-NEXT: sw a2, 0(s0)
-; RV32IZFINXZDINX-NEXT: sw a4, 16(s0)
+; RV32IZFINXZDINX-NEXT: lw a0, 36(sp)
+; RV32IZFINXZDINX-NEXT: lw a1, 16(sp)
+; RV32IZFINXZDINX-NEXT: lw a2, 20(sp)
+; RV32IZFINXZDINX-NEXT: lw a3, 24(sp)
+; RV32IZFINXZDINX-NEXT: lw a4, 28(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 0(s0)
+; RV32IZFINXZDINX-NEXT: sw a2, 4(s0)
+; RV32IZFINXZDINX-NEXT: sw a3, 8(s0)
+; RV32IZFINXZDINX-NEXT: sw a4, 12(s0)
+; RV32IZFINXZDINX-NEXT: sw a0, 16(s0)
; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 48
@@ -1672,8 +1672,8 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind {
; RV64IZFINXZDINX-NEXT: mv a1, a3
; RV64IZFINXZDINX-NEXT: call frexpl
; RV64IZFINXZDINX-NEXT: lw a2, 12(sp)
-; RV64IZFINXZDINX-NEXT: sd a1, 8(s0)
; RV64IZFINXZDINX-NEXT: sd a0, 0(s0)
+; RV64IZFINXZDINX-NEXT: sd a1, 8(s0)
; RV64IZFINXZDINX-NEXT: sw a2, 16(s0)
; RV64IZFINXZDINX-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64IZFINXZDINX-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
@@ -1686,28 +1686,28 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind {
; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw a2, 4(a1)
-; RV32I-NEXT: lw a4, 8(a1)
-; RV32I-NEXT: lw a1, 12(a1)
+; RV32I-NEXT: lw a4, 4(a1)
+; RV32I-NEXT: lw a5, 8(a1)
+; RV32I-NEXT: lw a6, 12(a1)
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: sw a1, 12(sp)
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a0, sp, 16
; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: addi a2, sp, 36
; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a6, 12(sp)
; RV32I-NEXT: call frexpl
-; RV32I-NEXT: lw a0, 24(sp)
-; RV32I-NEXT: lw a1, 28(sp)
-; RV32I-NEXT: lw a2, 16(sp)
-; RV32I-NEXT: lw a3, 20(sp)
-; RV32I-NEXT: lw a4, 36(sp)
-; RV32I-NEXT: sw a1, 12(s0)
-; RV32I-NEXT: sw a0, 8(s0)
-; RV32I-NEXT: sw a3, 4(s0)
-; RV32I-NEXT: sw a2, 0(s0)
-; RV32I-NEXT: sw a4, 16(s0)
+; RV32I-NEXT: lw a0, 36(sp)
+; RV32I-NEXT: lw a1, 16(sp)
+; RV32I-NEXT: lw a2, 20(sp)
+; RV32I-NEXT: lw a3, 24(sp)
+; RV32I-NEXT: lw a4, 28(sp)
+; RV32I-NEXT: sw a1, 0(s0)
+; RV32I-NEXT: sw a2, 4(s0)
+; RV32I-NEXT: sw a3, 8(s0)
+; RV32I-NEXT: sw a4, 12(s0)
+; RV32I-NEXT: sw a0, 16(s0)
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 48
@@ -1725,8 +1725,8 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind {
; RV64I-NEXT: mv a1, a3
; RV64I-NEXT: call frexpl
; RV64I-NEXT: lw a2, 12(sp)
-; RV64I-NEXT: sd a1, 8(s0)
; RV64I-NEXT: sd a0, 0(s0)
+; RV64I-NEXT: sd a1, 8(s0)
; RV64I-NEXT: sw a2, 16(s0)
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
@@ -1743,26 +1743,26 @@ define fp128 @test_frexp_f128_i32_only_use_fract(fp128 %a) nounwind {
; RV32IFD-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: lw a3, 0(a1)
-; RV32IFD-NEXT: lw a2, 4(a1)
-; RV32IFD-NEXT: lw a4, 8(a1)
-; RV32IFD-NEXT: lw a1, 12(a1)
+; RV32IFD-NEXT: lw a4, 4(a1)
+; RV32IFD-NEXT: lw a5, 8(a1)
+; RV32IFD-NEXT: lw a6, 12(a1)
; RV32IFD-NEXT: mv s0, a0
-; RV32IFD-NEXT: sw a1, 12(sp)
-; RV32IFD-NEXT: sw a4, 8(sp)
-; RV32IFD-NEXT: sw a2, 4(sp)
; RV32IFD-NEXT: addi a0, sp, 16
; RV32IFD-NEXT: mv a1, sp
; RV32IFD-NEXT: addi a2, sp, 36
; RV32IFD-NEXT: sw a3, 0(sp)
+; RV32IFD-NEXT: sw a4, 4(sp)
+; RV32IFD-NEXT: sw a5, 8(sp)
+; RV32IFD-NEXT: sw a6, 12(sp)
; RV32IFD-NEXT: call frexpl
-; RV32IFD-NEXT: lw a0, 28(sp)
-; RV32IFD-NEXT: lw a1, 24(sp)
-; RV32IFD-NEXT: lw a2, 20(sp)
-; RV32IFD-NEXT: lw a3, 16(sp)
-; RV32IFD-NEXT: sw a0, 12(s0)
-; RV32IFD-NEXT: sw a1, 8(s0)
-; RV32IFD-NEXT: sw a2, 4(s0)
-; RV32IFD-NEXT: sw a3, 0(s0)
+; RV32IFD-NEXT: lw a0, 16(sp)
+; RV32IFD-NEXT: lw a1, 20(sp)
+; RV32IFD-NEXT: lw a2, 24(sp)
+; RV32IFD-NEXT: lw a3, 28(sp)
+; RV32IFD-NEXT: sw a0, 0(s0)
+; RV32IFD-NEXT: sw a1, 4(s0)
+; RV32IFD-NEXT: sw a2, 8(s0)
+; RV32IFD-NEXT: sw a3, 12(s0)
; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 48
@@ -1784,26 +1784,26 @@ define fp128 @test_frexp_f128_i32_only_use_fract(fp128 %a) nounwind {
; RV32IZFINXZDINX-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: lw a3, 0(a1)
-; RV32IZFINXZDINX-NEXT: lw a2, 4(a1)
-; RV32IZFINXZDINX-NEXT: lw a4, 8(a1)
-; RV32IZFINXZDINX-NEXT: lw a1, 12(a1)
+; RV32IZFINXZDINX-NEXT: lw a4, 4(a1)
+; RV32IZFINXZDINX-NEXT: lw a5, 8(a1)
+; RV32IZFINXZDINX-NEXT: lw a6, 12(a1)
; RV32IZFINXZDINX-NEXT: mv s0, a0
-; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
-; RV32IZFINXZDINX-NEXT: sw a4, 8(sp)
-; RV32IZFINXZDINX-NEXT: sw a2, 4(sp)
; RV32IZFINXZDINX-NEXT: addi a0, sp, 16
; RV32IZFINXZDINX-NEXT: mv a1, sp
; RV32IZFINXZDINX-NEXT: addi a2, sp, 36
; RV32IZFINXZDINX-NEXT: sw a3, 0(sp)
+; RV32IZFINXZDINX-NEXT: sw a4, 4(sp)
+; RV32IZFINXZDINX-NEXT: sw a5, 8(sp)
+; RV32IZFINXZDINX-NEXT: sw a6, 12(sp)
; RV32IZFINXZDINX-NEXT: call frexpl
-; RV32IZFINXZDINX-NEXT: lw a0, 28(sp)
-; RV32IZFINXZDINX-NEXT: lw a1, 24(sp)
-; RV32IZFINXZDINX-NEXT: lw a2, 20(sp)
-; RV32IZFINXZDINX-NEXT: lw a3, 16(sp)
-; RV32IZFINXZDINX-NEXT: sw a0, 12(s0)
-; RV32IZFINXZDINX-NEXT: sw a1, 8(s0)
-; RV32IZFINXZDINX-NEXT: sw a2, 4(s0)
-; RV32IZFINXZDINX-NEXT: sw a3, 0(s0)
+; RV32IZFINXZDINX-NEXT: lw a0, 16(sp)
+; RV32IZFINXZDINX-NEXT: lw a1, 20(sp)
+; RV32IZFINXZDINX-NEXT: lw a2, 24(sp)
+; RV32IZFINXZDINX-NEXT: lw a3, 28(sp)
+; RV32IZFINXZDINX-NEXT: sw a0, 0(s0)
+; RV32IZFINXZDINX-NEXT: sw a1, 4(s0)
+; RV32IZFINXZDINX-NEXT: sw a2, 8(s0)
+; RV32IZFINXZDINX-NEXT: sw a3, 12(s0)
; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 48
@@ -1825,26 +1825,26 @@ define fp128 @test_frexp_f128_i32_only_use_fract(fp128 %a) nounwind {
; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw a2, 4(a1)
-; RV32I-NEXT: lw a4, 8(a1)
-; RV32I-NEXT: lw a1, 12(a1)
+; RV32I-NEXT: lw a4, 4(a1)
+; RV32I-NEXT: lw a5, 8(a1)
+; RV32I-NEXT: lw a6, 12(a1)
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: sw a1, 12(sp)
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a2, 4(sp)
; RV32I-NEXT: addi a0, sp, 16
; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: addi a2, sp, 36
; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a6, 12(sp)
; RV32I-NEXT: call frexpl
-; RV32I-NEXT: lw a0, 28(sp)
-; RV32I-NEXT: lw a1, 24(sp)
-; RV32I-NEXT: lw a2, 20(sp)
-; RV32I-NEXT: lw a3, 16(sp)
-; RV32I-NEXT: sw a0, 12(s0)
-; RV32I-NEXT: sw a1, 8(s0)
-; RV32I-NEXT: sw a2, 4(s0)
-; RV32I-NEXT: sw a3, 0(s0)
+; RV32I-NEXT: lw a0, 16(sp)
+; RV32I-NEXT: lw a1, 20(sp)
+; RV32I-NEXT: lw a2, 24(sp)
+; RV32I-NEXT: lw a3, 28(sp)
+; RV32I-NEXT: sw a0, 0(s0)
+; RV32I-NEXT: sw a1, 4(s0)
+; RV32I-NEXT: sw a2, 8(s0)
+; RV32I-NEXT: sw a3, 12(s0)
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 48
@@ -1870,16 +1870,16 @@ define i32 @test_frexp_f128_i32_only_use_exp(fp128 %a) nounwind {
; RV32IFD-NEXT: addi sp, sp, -48
; RV32IFD-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32IFD-NEXT: lw a3, 0(a0)
-; RV32IFD-NEXT: lw a1, 4(a0)
-; RV32IFD-NEXT: lw a2, 8(a0)
-; RV32IFD-NEXT: lw a0, 12(a0)
-; RV32IFD-NEXT: sw a0, 20(sp)
-; RV32IFD-NEXT: sw a2, 16(sp)
-; RV32IFD-NEXT: sw a1, 12(sp)
+; RV32IFD-NEXT: lw a4, 4(a0)
+; RV32IFD-NEXT: lw a5, 8(a0)
+; RV32IFD-NEXT: lw a6, 12(a0)
; RV32IFD-NEXT: addi a0, sp, 24
; RV32IFD-NEXT: addi a1, sp, 8
; RV32IFD-NEXT: addi a2, sp, 40
; RV32IFD-NEXT: sw a3, 8(sp)
+; RV32IFD-NEXT: sw a4, 12(sp)
+; RV32IFD-NEXT: sw a5, 16(sp)
+; RV32IFD-NEXT: sw a6, 20(sp)
; RV32IFD-NEXT: call frexpl
; RV32IFD-NEXT: lw a0, 40(sp)
; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
@@ -1902,16 +1902,16 @@ define i32 @test_frexp_f128_i32_only_use_exp(fp128 %a) nounwind {
; RV32IZFINXZDINX-NEXT: addi sp, sp, -48
; RV32IZFINXZDINX-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: lw a3, 0(a0)
-; RV32IZFINXZDINX-NEXT: lw a1, 4(a0)
-; RV32IZFINXZDINX-NEXT: lw a2, 8(a0)
-; RV32IZFINXZDINX-NEXT: lw a0, 12(a0)
-; RV32IZFINXZDINX-NEXT: sw a0, 20(sp)
-; RV32IZFINXZDINX-NEXT: sw a2, 16(sp)
-; RV32IZFINXZDINX-NEXT: sw a1, 12(sp)
+; RV32IZFINXZDINX-NEXT: lw a4, 4(a0)
+; RV32IZFINXZDINX-NEXT: lw a5, 8(a0)
+; RV32IZFINXZDINX-NEXT: lw a6, 12(a0)
; RV32IZFINXZDINX-NEXT: addi a0, sp, 24
; RV32IZFINXZDINX-NEXT: addi a1, sp, 8
; RV32IZFINXZDINX-NEXT: addi a2, sp, 40
; RV32IZFINXZDINX-NEXT: sw a3, 8(sp)
+; RV32IZFINXZDINX-NEXT: sw a4, 12(sp)
+; RV32IZFINXZDINX-NEXT: sw a5, 16(sp)
+; RV32IZFINXZDINX-NEXT: sw a6, 20(sp)
; RV32IZFINXZDINX-NEXT: call frexpl
; RV32IZFINXZDINX-NEXT: lw a0, 40(sp)
; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
@@ -1934,16 +1934,16 @@ define i32 @test_frexp_f128_i32_only_use_exp(fp128 %a) nounwind {
; RV32I-NEXT: addi sp, sp, -48
; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32I-NEXT: lw a3, 0(a0)
-; RV32I-NEXT: lw a1, 4(a0)
-; RV32I-NEXT: lw a2, 8(a0)
-; RV32I-NEXT: lw a0, 12(a0)
-; RV32I-NEXT: sw a0, 20(sp)
-; RV32I-NEXT: sw a2, 16(sp)
-; RV32I-NEXT: sw a1, 12(sp)
+; RV32I-NEXT: lw a4, 4(a0)
+; RV32I-NEXT: lw a5, 8(a0)
+; RV32I-NEXT: lw a6, 12(a0)
; RV32I-NEXT: addi a0, sp, 24
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: addi a2, sp, 40
; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a5, 16(sp)
+; RV32I-NEXT: sw a6, 20(sp)
; RV32I-NEXT: call frexpl
; RV32I-NEXT: lw a0, 40(sp)
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/memcpy.ll b/llvm/test/CodeGen/RISCV/memcpy.ll
index 41c27d83defe61..5b8955ee0e0a0c 100644
--- a/llvm/test/CodeGen/RISCV/memcpy.ll
+++ b/llvm/test/CodeGen/RISCV/memcpy.ll
@@ -25,16 +25,16 @@ define i32 @t0() {
; RV32: # %bb.0: # %entry
; RV32-NEXT: lui a0, %hi(src)
; RV32-NEXT: lw a1, %lo(src)(a0)
+; RV32-NEXT: lui a2, %hi(dst)
+; RV32-NEXT: sw a1, %lo(dst)(a2)
; RV32-NEXT: addi a0, a0, %lo(src)
-; RV32-NEXT: lw a2, 4(a0)
+; RV32-NEXT: lw a1, 4(a0)
; RV32-NEXT: lh a3, 8(a0)
; RV32-NEXT: lbu a0, 10(a0)
-; RV32-NEXT: lui a4, %hi(dst)
-; RV32-NEXT: sw a1, %lo(dst)(a4)
-; RV32-NEXT: addi a1, a4, %lo(dst)
-; RV32-NEXT: sb a0, 10(a1)
-; RV32-NEXT: sh a3, 8(a1)
-; RV32-NEXT: sw a2, 4(a1)
+; RV32-NEXT: addi a2, a2, %lo(dst)
+; RV32-NEXT: sw a1, 4(a2)
+; RV32-NEXT: sh a3, 8(a2)
+; RV32-NEXT: sb a0, 10(a2)
; RV32-NEXT: li a0, 0
; RV32-NEXT: ret
;
@@ -42,14 +42,14 @@ define i32 @t0() {
; RV64: # %bb.0: # %entry
; RV64-NEXT: lui a0, %hi(src)
; RV64-NEXT: ld a1, %lo(src)(a0)
+; RV64-NEXT: lui a2, %hi(dst)
; RV64-NEXT: addi a0, a0, %lo(src)
-; RV64-NEXT: lh a2, 8(a0)
+; RV64-NEXT: lh a3, 8(a0)
; RV64-NEXT: lbu a0, 10(a0)
-; RV64-NEXT: lui a3, %hi(dst)
-; RV64-NEXT: sd a1, %lo(dst)(a3)
-; RV64-NEXT: addi a1, a3, %lo(dst)
+; RV64-NEXT: sd a1, %lo(dst)(a2)
+; RV64-NEXT: addi a1, a2, %lo(dst)
+; RV64-NEXT: sh a3, 8(a1)
; RV64-NEXT: sb a0, 10(a1)
-; RV64-NEXT: sh a2, 8(a1)
; RV64-NEXT: li a0, 0
; RV64-NEXT: ret
;
@@ -107,39 +107,39 @@ define void @t1(ptr nocapture %C) nounwind {
; RV32-FAST-NEXT: sw a1, 27(a0)
; RV32-FAST-NEXT: lui a1, 300325
; RV32-FAST-NEXT: addi a1, a1, 1107
+; RV32-FAST-NEXT: lui a2, 132181
+; RV32-FAST-NEXT: addi a2, a2, -689
+; RV32-FAST-NEXT: lui a3, 340483
+; RV32-FAST-NEXT: addi a3, a3, -947
+; RV32-FAST-NEXT: sw a3, 16(a0)
+; RV32-FAST-NEXT: sw a2, 20(a0)
; RV32-FAST-NEXT: sw a1, 24(a0)
-; RV32-FAST-NEXT: lui a1, 132181
-; RV32-FAST-NEXT: addi a1, a1, -689
-; RV32-FAST-NEXT: sw a1, 20(a0)
-; RV32-FAST-NEXT: lui a1, 340483
-; RV32-FAST-NEXT: addi a1, a1, -947
-; RV32-FAST-NEXT: sw a1, 16(a0)
; RV32-FAST-NEXT: lui a1, 267556
; RV32-FAST-NEXT: addi a1, a1, 1871
+; RV32-FAST-NEXT: lui a2, 337154
+; RV32-FAST-NEXT: addi a2, a2, 69
+; RV32-FAST-NEXT: lui a3, 320757
+; RV32-FAST-NEXT: addi a3, a3, 1107
+; RV32-FAST-NEXT: lui a4, 365861
+; RV32-FAST-NEXT: addi a4, a4, -1980
+; RV32-FAST-NEXT: sw a4, 0(a0)
+; RV32-FAST-NEXT: sw a3, 4(a0)
+; RV32-FAST-NEXT: sw a2, 8(a0)
; RV32-FAST-NEXT: sw a1, 12(a0)
-; RV32-FAST-NEXT: lui a1, 337154
-; RV32-FAST-NEXT: addi a1, a1, 69
-; RV32-FAST-NEXT: sw a1, 8(a0)
-; RV32-FAST-NEXT: lui a1, 320757
-; RV32-FAST-NEXT: addi a1, a1, 1107
-; RV32-FAST-NEXT: sw a1, 4(a0)
-; RV32-FAST-NEXT: lui a1, 365861
-; RV32-FAST-NEXT: addi a1, a1, -1980
-; RV32-FAST-NEXT: sw a1, 0(a0)
; RV32-FAST-NEXT: ret
;
; RV64-FAST-LABEL: t1:
; RV64-FAST: # %bb.0: # %entry
; RV64-FAST-NEXT: lui a1, %hi(.L.str1)
-; RV64-FAST-NEXT: ld a2, %lo(.L.str1)(a1)
-; RV64-FAST-NEXT: addi a1, a1, %lo(.L.str1)
-; RV64-FAST-NEXT: ld a3, 23(a1)
-; RV64-FAST-NEXT: ld a4, 16(a1)
-; RV64-FAST-NEXT: ld a1, 8(a1)
-; RV64-FAST-NEXT: sd a2, 0(a0)
+; RV64-FAST-NEXT: addi a2, a1, %lo(.L.str1)
+; RV64-FAST-NEXT: ld a3, 23(a2)
+; RV64-FAST-NEXT: ld a1, %lo(.L.str1)(a1)
+; RV64-FAST-NEXT: ld a4, 8(a2)
+; RV64-FAST-NEXT: ld a2, 16(a2)
; RV64-FAST-NEXT: sd a3, 23(a0)
-; RV64-FAST-NEXT: sd a4, 16(a0)
-; RV64-FAST-NEXT: sd a1, 8(a0)
+; RV64-FAST-NEXT: sd a1, 0(a0)
+; RV64-FAST-NEXT: sd a4, 8(a0)
+; RV64-FAST-NEXT: sd a2, 16(a0)
; RV64-FAST-NEXT: ret
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str1, i64 31, i1 false)
@@ -165,17 +165,17 @@ define void @t2(ptr nocapture %C) nounwind {
; RV64-FAST: # %bb.0: # %entry
; RV64-FAST-NEXT: lui a1, %hi(.L.str2)
; RV64-FAST-NEXT: ld a2, %lo(.L.str2)(a1)
-; RV64-FAST-NEXT: sd a2, 0(a0)
+; RV64-FAST-NEXT: lui a3, 1156
+; RV64-FAST-NEXT: addi a3, a3, 332
+; RV64-FAST-NEXT: sw a3, 32(a0)
; RV64-FAST-NEXT: addi a1, a1, %lo(.L.str2)
-; RV64-FAST-NEXT: ld a2, 8(a1)
-; RV64-FAST-NEXT: ld a3, 16(a1)
+; RV64-FAST-NEXT: ld a3, 8(a1)
+; RV64-FAST-NEXT: ld a4, 16(a1)
; RV64-FAST-NEXT: ld a1, 24(a1)
-; RV64-FAST-NEXT: lui a4, 1156
-; RV64-FAST-NEXT: addi a4, a4, 332
-; RV64-FAST-NEXT: sw a4, 32(a0)
+; RV64-FAST-NEXT: sd a2, 0(a0)
+; RV64-FAST-NEXT: sd a3, 8(a0)
+; RV64-FAST-NEXT: sd a4, 16(a0)
; RV64-FAST-NEXT: sd a1, 24(a0)
-; RV64-FAST-NEXT: sd a3, 16(a0)
-; RV64-FAST-NEXT: sd a2, 8(a0)
; RV64-FAST-NEXT: ret
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str2, i64 36, i1 false)
@@ -201,22 +201,22 @@ define void @t3(ptr nocapture %C) nounwind {
; RV32-FAST: # %bb.0: # %entry
; RV32-FAST-NEXT: lui a1, 1109
; RV32-FAST-NEXT: addi a1, a1, -689
+; RV32-FAST-NEXT: lui a2, 340483
+; RV32-FAST-NEXT: addi a2, a2, -947
+; RV32-FAST-NEXT: sw a2, 16(a0)
; RV32-FAST-NEXT: sw a1, 20(a0)
-; RV32-FAST-NEXT: lui a1, 340483
-; RV32-FAST-NEXT: addi a1, a1, -947
-; RV32-FAST-NEXT: sw a1, 16(a0)
; RV32-FAST-NEXT: lui a1, 267556
; RV32-FAST-NEXT: addi a1, a1, 1871
+; RV32-FAST-NEXT: lui a2, 337154
+; RV32-FAST-NEXT: addi a2, a2, 69
+; RV32-FAST-NEXT: lui a3, 320757
+; RV32-FAST-NEXT: addi a3, a3, 1107
+; RV32-FAST-NEXT: lui a4, 365861
+; RV32-FAST-NEXT: addi a4, a4, -1980
+; RV32-FAST-NEXT: sw a4, 0(a0)
+; RV32-FAST-NEXT: sw a3, 4(a0)
+; RV32-FAST-NEXT: sw a2, 8(a0)
; RV32-FAST-NEXT: sw a1, 12(a0)
-; RV32-FAST-NEXT: lui a1, 337154
-; RV32-FAST-NEXT: addi a1, a1, 69
-; RV32-FAST-NEXT: sw a1, 8(a0)
-; RV32-FAST-NEXT: lui a1, 320757
-; RV32-FAST-NEXT: addi a1, a1, 1107
-; RV32-FAST-NEXT: sw a1, 4(a0)
-; RV32-FAST-NEXT: lui a1, 365861
-; RV32-FAST-NEXT: addi a1, a1, -1980
-; RV32-FAST-NEXT: sw a1, 0(a0)
; RV32-FAST-NEXT: ret
;
; RV64-FAST-LABEL: t3:
@@ -224,11 +224,11 @@ define void @t3(ptr nocapture %C) nounwind {
; RV64-FAST-NEXT: lui a1, %hi(.L.str3)
; RV64-FAST-NEXT: ld a2, %lo(.L.str3)(a1)
; RV64-FAST-NEXT: addi a1, a1, %lo(.L.str3)
-; RV64-FAST-NEXT: ld a3, 16(a1)
-; RV64-FAST-NEXT: ld a1, 8(a1)
+; RV64-FAST-NEXT: ld a3, 8(a1)
+; RV64-FAST-NEXT: ld a1, 16(a1)
; RV64-FAST-NEXT: sd a2, 0(a0)
-; RV64-FAST-NEXT: sd a3, 16(a0)
-; RV64-FAST-NEXT: sd a1, 8(a0)
+; RV64-FAST-NEXT: sd a3, 8(a0)
+; RV64-FAST-NEXT: sd a1, 16(a0)
; RV64-FAST-NEXT: ret
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str3, i64 24, i1 false)
@@ -256,16 +256,16 @@ define void @t4(ptr nocapture %C) nounwind {
; RV32-FAST-NEXT: sh a1, 16(a0)
; RV32-FAST-NEXT: lui a1, 132388
; RV32-FAST-NEXT: addi a1, a1, 1871
+; RV32-FAST-NEXT: lui a2, 337154
+; RV32-FAST-NEXT: addi a2, a2, 69
+; RV32-FAST-NEXT: lui a3, 320757
+; RV32-FAST-NEXT: addi a3, a3, 1107
+; RV32-FAST-NEXT: lui a4, 365861
+; RV32-FAST-NEXT: addi a4, a4, -1980
+; RV32-FAST-NEXT: sw a4, 0(a0)
+; RV32-FAST-NEXT: sw a3, 4(a0)
+; RV32-FAST-NEXT: sw a2, 8(a0)
; RV32-FAST-NEXT: sw a1, 12(a0)
-; RV32-FAST-NEXT: lui a1, 337154
-; RV32-FAST-NEXT: addi a1, a1, 69
-; RV32-FAST-NEXT: sw a1, 8(a0)
-; RV32-FAST-NEXT: lui a1, 320757
-; RV32-FAST-NEXT: addi a1, a1, 1107
-; RV32-FAST-NEXT: sw a1, 4(a0)
-; RV32-FAST-NEXT: lui a1, 365861
-; RV32-FAST-NEXT: addi a1, a1, -1980
-; RV32-FAST-NEXT: sw a1, 0(a0)
; RV32-FAST-NEXT: ret
;
; RV64-FAST-LABEL: t4:
@@ -275,9 +275,9 @@ define void @t4(ptr nocapture %C) nounwind {
; RV64-FAST-NEXT: addi a1, a1, %lo(.L.str4)
; RV64-FAST-NEXT: ld a1, 8(a1)
; RV64-FAST-NEXT: li a3, 32
-; RV64-FAST-NEXT: sh a3, 16(a0)
; RV64-FAST-NEXT: sd a2, 0(a0)
; RV64-FAST-NEXT: sd a1, 8(a0)
+; RV64-FAST-NEXT: sh a3, 16(a0)
; RV64-FAST-NEXT: ret
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str4, i64 18, i1 false)
@@ -287,36 +287,36 @@ entry:
define void @t5(ptr nocapture %C) nounwind {
; RV32-LABEL: t5:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: sb zero, 6(a0)
; RV32-NEXT: li a1, 84
+; RV32-NEXT: li a2, 83
+; RV32-NEXT: sb a2, 4(a0)
; RV32-NEXT: sb a1, 5(a0)
-; RV32-NEXT: li a1, 83
-; RV32-NEXT: sb a1, 4(a0)
+; RV32-NEXT: sb zero, 6(a0)
; RV32-NEXT: li a1, 89
+; RV32-NEXT: li a2, 82
+; RV32-NEXT: li a3, 72
+; RV32-NEXT: li a4, 68
+; RV32-NEXT: sb a4, 0(a0)
+; RV32-NEXT: sb a3, 1(a0)
+; RV32-NEXT: sb a2, 2(a0)
; RV32-NEXT: sb a1, 3(a0)
-; RV32-NEXT: li a1, 82
-; RV32-NEXT: sb a1, 2(a0)
-; RV32-NEXT: li a1, 72
-; RV32-NEXT: sb a1, 1(a0)
-; RV32-NEXT: li a1, 68
-; RV32-NEXT: sb a1, 0(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: t5:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: sb zero, 6(a0)
; RV64-NEXT: li a1, 84
+; RV64-NEXT: li a2, 83
+; RV64-NEXT: sb a2, 4(a0)
; RV64-NEXT: sb a1, 5(a0)
-; RV64-NEXT: li a1, 83
-; RV64-NEXT: sb a1, 4(a0)
+; RV64-NEXT: sb zero, 6(a0)
; RV64-NEXT: li a1, 89
+; RV64-NEXT: li a2, 82
+; RV64-NEXT: li a3, 72
+; RV64-NEXT: li a4, 68
+; RV64-NEXT: sb a4, 0(a0)
+; RV64-NEXT: sb a3, 1(a0)
+; RV64-NEXT: sb a2, 2(a0)
; RV64-NEXT: sb a1, 3(a0)
-; RV64-NEXT: li a1, 82
-; RV64-NEXT: sb a1, 2(a0)
-; RV64-NEXT: li a1, 72
-; RV64-NEXT: sb a1, 1(a0)
-; RV64-NEXT: li a1, 68
-; RV64-NEXT: sb a1, 0(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: t5:
diff --git a/llvm/test/CodeGen/RISCV/memset-inline.ll b/llvm/test/CodeGen/RISCV/memset-inline.ll
index 55fe81a58805ed..6ee6e1261e7e91 100644
--- a/llvm/test/CodeGen/RISCV/memset-inline.ll
+++ b/llvm/test/CodeGen/RISCV/memset-inline.ll
@@ -31,14 +31,14 @@ define void @memset_1(ptr %a, i8 %value) nounwind {
define void @memset_2(ptr %a, i8 %value) nounwind {
; RV32-LABEL: memset_2:
; RV32: # %bb.0:
-; RV32-NEXT: sb a1, 1(a0)
; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: sb a1, 1(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: memset_2:
; RV64: # %bb.0:
-; RV64-NEXT: sb a1, 1(a0)
; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: sb a1, 1(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: memset_2:
@@ -63,18 +63,18 @@ define void @memset_2(ptr %a, i8 %value) nounwind {
define void @memset_4(ptr %a, i8 %value) nounwind {
; RV32-LABEL: memset_4:
; RV32: # %bb.0:
-; RV32-NEXT: sb a1, 3(a0)
-; RV32-NEXT: sb a1, 2(a0)
-; RV32-NEXT: sb a1, 1(a0)
; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sb a1, 2(a0)
+; RV32-NEXT: sb a1, 3(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: memset_4:
; RV64: # %bb.0:
-; RV64-NEXT: sb a1, 3(a0)
-; RV64-NEXT: sb a1, 2(a0)
-; RV64-NEXT: sb a1, 1(a0)
; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sb a1, 2(a0)
+; RV64-NEXT: sb a1, 3(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: memset_4:
@@ -102,26 +102,26 @@ define void @memset_4(ptr %a, i8 %value) nounwind {
define void @memset_8(ptr %a, i8 %value) nounwind {
; RV32-LABEL: memset_8:
; RV32: # %bb.0:
-; RV32-NEXT: sb a1, 7(a0)
-; RV32-NEXT: sb a1, 6(a0)
-; RV32-NEXT: sb a1, 5(a0)
; RV32-NEXT: sb a1, 4(a0)
-; RV32-NEXT: sb a1, 3(a0)
-; RV32-NEXT: sb a1, 2(a0)
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sb a1, 5(a0)
+; RV32-NEXT: sb a1, 6(a0)
+; RV32-NEXT: sb a1, 7(a0)
; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sb a1, 2(a0)
+; RV32-NEXT: sb a1, 3(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: memset_8:
; RV64: # %bb.0:
-; RV64-NEXT: sb a1, 7(a0)
-; RV64-NEXT: sb a1, 6(a0)
-; RV64-NEXT: sb a1, 5(a0)
; RV64-NEXT: sb a1, 4(a0)
-; RV64-NEXT: sb a1, 3(a0)
-; RV64-NEXT: sb a1, 2(a0)
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sb a1, 5(a0)
+; RV64-NEXT: sb a1, 6(a0)
+; RV64-NEXT: sb a1, 7(a0)
; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sb a1, 2(a0)
+; RV64-NEXT: sb a1, 3(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: memset_8:
@@ -130,8 +130,8 @@ define void @memset_8(ptr %a, i8 %value) nounwind {
; RV32-FAST-NEXT: lui a2, 4112
; RV32-FAST-NEXT: addi a2, a2, 257
; RV32-FAST-NEXT: mul a1, a1, a2
-; RV32-FAST-NEXT: sw a1, 4(a0)
; RV32-FAST-NEXT: sw a1, 0(a0)
+; RV32-FAST-NEXT: sw a1, 4(a0)
; RV32-FAST-NEXT: ret
;
; RV64-FAST-LABEL: memset_8:
@@ -151,42 +151,42 @@ define void @memset_8(ptr %a, i8 %value) nounwind {
define void @memset_16(ptr %a, i8 %value) nounwind {
; RV32-LABEL: memset_16:
; RV32: # %bb.0:
-; RV32-NEXT: sb a1, 15(a0)
-; RV32-NEXT: sb a1, 14(a0)
-; RV32-NEXT: sb a1, 13(a0)
; RV32-NEXT: sb a1, 12(a0)
-; RV32-NEXT: sb a1, 11(a0)
-; RV32-NEXT: sb a1, 10(a0)
-; RV32-NEXT: sb a1, 9(a0)
+; RV32-NEXT: sb a1, 13(a0)
+; RV32-NEXT: sb a1, 14(a0)
+; RV32-NEXT: sb a1, 15(a0)
; RV32-NEXT: sb a1, 8(a0)
-; RV32-NEXT: sb a1, 7(a0)
-; RV32-NEXT: sb a1, 6(a0)
-; RV32-NEXT: sb a1, 5(a0)
+; RV32-NEXT: sb a1, 9(a0)
+; RV32-NEXT: sb a1, 10(a0)
+; RV32-NEXT: sb a1, 11(a0)
; RV32-NEXT: sb a1, 4(a0)
-; RV32-NEXT: sb a1, 3(a0)
-; RV32-NEXT: sb a1, 2(a0)
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sb a1, 5(a0)
+; RV32-NEXT: sb a1, 6(a0)
+; RV32-NEXT: sb a1, 7(a0)
; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sb a1, 2(a0)
+; RV32-NEXT: sb a1, 3(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: memset_16:
; RV64: # %bb.0:
-; RV64-NEXT: sb a1, 15(a0)
-; RV64-NEXT: sb a1, 14(a0)
-; RV64-NEXT: sb a1, 13(a0)
; RV64-NEXT: sb a1, 12(a0)
-; RV64-NEXT: sb a1, 11(a0)
-; RV64-NEXT: sb a1, 10(a0)
-; RV64-NEXT: sb a1, 9(a0)
+; RV64-NEXT: sb a1, 13(a0)
+; RV64-NEXT: sb a1, 14(a0)
+; RV64-NEXT: sb a1, 15(a0)
; RV64-NEXT: sb a1, 8(a0)
-; RV64-NEXT: sb a1, 7(a0)
-; RV64-NEXT: sb a1, 6(a0)
-; RV64-NEXT: sb a1, 5(a0)
+; RV64-NEXT: sb a1, 9(a0)
+; RV64-NEXT: sb a1, 10(a0)
+; RV64-NEXT: sb a1, 11(a0)
; RV64-NEXT: sb a1, 4(a0)
-; RV64-NEXT: sb a1, 3(a0)
-; RV64-NEXT: sb a1, 2(a0)
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sb a1, 5(a0)
+; RV64-NEXT: sb a1, 6(a0)
+; RV64-NEXT: sb a1, 7(a0)
; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sb a1, 2(a0)
+; RV64-NEXT: sb a1, 3(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: memset_16:
@@ -195,10 +195,10 @@ define void @memset_16(ptr %a, i8 %value) nounwind {
; RV32-FAST-NEXT: lui a2, 4112
; RV32-FAST-NEXT: addi a2, a2, 257
; RV32-FAST-NEXT: mul a1, a1, a2
-; RV32-FAST-NEXT: sw a1, 12(a0)
-; RV32-FAST-NEXT: sw a1, 8(a0)
-; RV32-FAST-NEXT: sw a1, 4(a0)
; RV32-FAST-NEXT: sw a1, 0(a0)
+; RV32-FAST-NEXT: sw a1, 4(a0)
+; RV32-FAST-NEXT: sw a1, 8(a0)
+; RV32-FAST-NEXT: sw a1, 12(a0)
; RV32-FAST-NEXT: ret
;
; RV64-FAST-LABEL: memset_16:
@@ -209,8 +209,8 @@ define void @memset_16(ptr %a, i8 %value) nounwind {
; RV64-FAST-NEXT: slli a3, a2, 32
; RV64-FAST-NEXT: add a2, a2, a3
; RV64-FAST-NEXT: mul a1, a1, a2
-; RV64-FAST-NEXT: sd a1, 8(a0)
; RV64-FAST-NEXT: sd a1, 0(a0)
+; RV64-FAST-NEXT: sd a1, 8(a0)
; RV64-FAST-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 16, i1 0)
ret void
@@ -219,74 +219,74 @@ define void @memset_16(ptr %a, i8 %value) nounwind {
define void @memset_32(ptr %a, i8 %value) nounwind {
; RV32-LABEL: memset_32:
; RV32: # %bb.0:
-; RV32-NEXT: sb a1, 31(a0)
-; RV32-NEXT: sb a1, 30(a0)
-; RV32-NEXT: sb a1, 29(a0)
; RV32-NEXT: sb a1, 28(a0)
-; RV32-NEXT: sb a1, 27(a0)
-; RV32-NEXT: sb a1, 26(a0)
-; RV32-NEXT: sb a1, 25(a0)
+; RV32-NEXT: sb a1, 29(a0)
+; RV32-NEXT: sb a1, 30(a0)
+; RV32-NEXT: sb a1, 31(a0)
; RV32-NEXT: sb a1, 24(a0)
-; RV32-NEXT: sb a1, 23(a0)
-; RV32-NEXT: sb a1, 22(a0)
-; RV32-NEXT: sb a1, 21(a0)
+; RV32-NEXT: sb a1, 25(a0)
+; RV32-NEXT: sb a1, 26(a0)
+; RV32-NEXT: sb a1, 27(a0)
; RV32-NEXT: sb a1, 20(a0)
-; RV32-NEXT: sb a1, 19(a0)
-; RV32-NEXT: sb a1, 18(a0)
-; RV32-NEXT: sb a1, 17(a0)
+; RV32-NEXT: sb a1, 21(a0)
+; RV32-NEXT: sb a1, 22(a0)
+; RV32-NEXT: sb a1, 23(a0)
; RV32-NEXT: sb a1, 16(a0)
-; RV32-NEXT: sb a1, 15(a0)
-; RV32-NEXT: sb a1, 14(a0)
-; RV32-NEXT: sb a1, 13(a0)
+; RV32-NEXT: sb a1, 17(a0)
+; RV32-NEXT: sb a1, 18(a0)
+; RV32-NEXT: sb a1, 19(a0)
; RV32-NEXT: sb a1, 12(a0)
-; RV32-NEXT: sb a1, 11(a0)
-; RV32-NEXT: sb a1, 10(a0)
-; RV32-NEXT: sb a1, 9(a0)
+; RV32-NEXT: sb a1, 13(a0)
+; RV32-NEXT: sb a1, 14(a0)
+; RV32-NEXT: sb a1, 15(a0)
; RV32-NEXT: sb a1, 8(a0)
-; RV32-NEXT: sb a1, 7(a0)
-; RV32-NEXT: sb a1, 6(a0)
-; RV32-NEXT: sb a1, 5(a0)
+; RV32-NEXT: sb a1, 9(a0)
+; RV32-NEXT: sb a1, 10(a0)
+; RV32-NEXT: sb a1, 11(a0)
; RV32-NEXT: sb a1, 4(a0)
-; RV32-NEXT: sb a1, 3(a0)
-; RV32-NEXT: sb a1, 2(a0)
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sb a1, 5(a0)
+; RV32-NEXT: sb a1, 6(a0)
+; RV32-NEXT: sb a1, 7(a0)
; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sb a1, 2(a0)
+; RV32-NEXT: sb a1, 3(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: memset_32:
; RV64: # %bb.0:
-; RV64-NEXT: sb a1, 31(a0)
-; RV64-NEXT: sb a1, 30(a0)
-; RV64-NEXT: sb a1, 29(a0)
; RV64-NEXT: sb a1, 28(a0)
-; RV64-NEXT: sb a1, 27(a0)
-; RV64-NEXT: sb a1, 26(a0)
-; RV64-NEXT: sb a1, 25(a0)
+; RV64-NEXT: sb a1, 29(a0)
+; RV64-NEXT: sb a1, 30(a0)
+; RV64-NEXT: sb a1, 31(a0)
; RV64-NEXT: sb a1, 24(a0)
-; RV64-NEXT: sb a1, 23(a0)
-; RV64-NEXT: sb a1, 22(a0)
-; RV64-NEXT: sb a1, 21(a0)
+; RV64-NEXT: sb a1, 25(a0)
+; RV64-NEXT: sb a1, 26(a0)
+; RV64-NEXT: sb a1, 27(a0)
; RV64-NEXT: sb a1, 20(a0)
-; RV64-NEXT: sb a1, 19(a0)
-; RV64-NEXT: sb a1, 18(a0)
-; RV64-NEXT: sb a1, 17(a0)
+; RV64-NEXT: sb a1, 21(a0)
+; RV64-NEXT: sb a1, 22(a0)
+; RV64-NEXT: sb a1, 23(a0)
; RV64-NEXT: sb a1, 16(a0)
-; RV64-NEXT: sb a1, 15(a0)
-; RV64-NEXT: sb a1, 14(a0)
-; RV64-NEXT: sb a1, 13(a0)
+; RV64-NEXT: sb a1, 17(a0)
+; RV64-NEXT: sb a1, 18(a0)
+; RV64-NEXT: sb a1, 19(a0)
; RV64-NEXT: sb a1, 12(a0)
-; RV64-NEXT: sb a1, 11(a0)
-; RV64-NEXT: sb a1, 10(a0)
-; RV64-NEXT: sb a1, 9(a0)
+; RV64-NEXT: sb a1, 13(a0)
+; RV64-NEXT: sb a1, 14(a0)
+; RV64-NEXT: sb a1, 15(a0)
; RV64-NEXT: sb a1, 8(a0)
-; RV64-NEXT: sb a1, 7(a0)
-; RV64-NEXT: sb a1, 6(a0)
-; RV64-NEXT: sb a1, 5(a0)
+; RV64-NEXT: sb a1, 9(a0)
+; RV64-NEXT: sb a1, 10(a0)
+; RV64-NEXT: sb a1, 11(a0)
; RV64-NEXT: sb a1, 4(a0)
-; RV64-NEXT: sb a1, 3(a0)
-; RV64-NEXT: sb a1, 2(a0)
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sb a1, 5(a0)
+; RV64-NEXT: sb a1, 6(a0)
+; RV64-NEXT: sb a1, 7(a0)
; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sb a1, 2(a0)
+; RV64-NEXT: sb a1, 3(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: memset_32:
@@ -295,14 +295,14 @@ define void @memset_32(ptr %a, i8 %value) nounwind {
; RV32-FAST-NEXT: lui a2, 4112
; RV32-FAST-NEXT: addi a2, a2, 257
; RV32-FAST-NEXT: mul a1, a1, a2
-; RV32-FAST-NEXT: sw a1, 28(a0)
-; RV32-FAST-NEXT: sw a1, 24(a0)
-; RV32-FAST-NEXT: sw a1, 20(a0)
; RV32-FAST-NEXT: sw a1, 16(a0)
-; RV32-FAST-NEXT: sw a1, 12(a0)
-; RV32-FAST-NEXT: sw a1, 8(a0)
-; RV32-FAST-NEXT: sw a1, 4(a0)
+; RV32-FAST-NEXT: sw a1, 20(a0)
+; RV32-FAST-NEXT: sw a1, 24(a0)
+; RV32-FAST-NEXT: sw a1, 28(a0)
; RV32-FAST-NEXT: sw a1, 0(a0)
+; RV32-FAST-NEXT: sw a1, 4(a0)
+; RV32-FAST-NEXT: sw a1, 8(a0)
+; RV32-FAST-NEXT: sw a1, 12(a0)
; RV32-FAST-NEXT: ret
;
; RV64-FAST-LABEL: memset_32:
@@ -313,10 +313,10 @@ define void @memset_32(ptr %a, i8 %value) nounwind {
; RV64-FAST-NEXT: slli a3, a2, 32
; RV64-FAST-NEXT: add a2, a2, a3
; RV64-FAST-NEXT: mul a1, a1, a2
-; RV64-FAST-NEXT: sd a1, 24(a0)
-; RV64-FAST-NEXT: sd a1, 16(a0)
-; RV64-FAST-NEXT: sd a1, 8(a0)
; RV64-FAST-NEXT: sd a1, 0(a0)
+; RV64-FAST-NEXT: sd a1, 8(a0)
+; RV64-FAST-NEXT: sd a1, 16(a0)
+; RV64-FAST-NEXT: sd a1, 24(a0)
; RV64-FAST-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 32, i1 0)
ret void
@@ -325,138 +325,138 @@ define void @memset_32(ptr %a, i8 %value) nounwind {
define void @memset_64(ptr %a, i8 %value) nounwind {
; RV32-LABEL: memset_64:
; RV32: # %bb.0:
-; RV32-NEXT: sb a1, 63(a0)
-; RV32-NEXT: sb a1, 62(a0)
-; RV32-NEXT: sb a1, 61(a0)
; RV32-NEXT: sb a1, 60(a0)
-; RV32-NEXT: sb a1, 59(a0)
-; RV32-NEXT: sb a1, 58(a0)
-; RV32-NEXT: sb a1, 57(a0)
+; RV32-NEXT: sb a1, 61(a0)
+; RV32-NEXT: sb a1, 62(a0)
+; RV32-NEXT: sb a1, 63(a0)
; RV32-NEXT: sb a1, 56(a0)
-; RV32-NEXT: sb a1, 55(a0)
-; RV32-NEXT: sb a1, 54(a0)
-; RV32-NEXT: sb a1, 53(a0)
+; RV32-NEXT: sb a1, 57(a0)
+; RV32-NEXT: sb a1, 58(a0)
+; RV32-NEXT: sb a1, 59(a0)
; RV32-NEXT: sb a1, 52(a0)
-; RV32-NEXT: sb a1, 51(a0)
-; RV32-NEXT: sb a1, 50(a0)
-; RV32-NEXT: sb a1, 49(a0)
+; RV32-NEXT: sb a1, 53(a0)
+; RV32-NEXT: sb a1, 54(a0)
+; RV32-NEXT: sb a1, 55(a0)
; RV32-NEXT: sb a1, 48(a0)
-; RV32-NEXT: sb a1, 47(a0)
-; RV32-NEXT: sb a1, 46(a0)
-; RV32-NEXT: sb a1, 45(a0)
+; RV32-NEXT: sb a1, 49(a0)
+; RV32-NEXT: sb a1, 50(a0)
+; RV32-NEXT: sb a1, 51(a0)
; RV32-NEXT: sb a1, 44(a0)
-; RV32-NEXT: sb a1, 43(a0)
-; RV32-NEXT: sb a1, 42(a0)
-; RV32-NEXT: sb a1, 41(a0)
+; RV32-NEXT: sb a1, 45(a0)
+; RV32-NEXT: sb a1, 46(a0)
+; RV32-NEXT: sb a1, 47(a0)
; RV32-NEXT: sb a1, 40(a0)
-; RV32-NEXT: sb a1, 39(a0)
-; RV32-NEXT: sb a1, 38(a0)
-; RV32-NEXT: sb a1, 37(a0)
+; RV32-NEXT: sb a1, 41(a0)
+; RV32-NEXT: sb a1, 42(a0)
+; RV32-NEXT: sb a1, 43(a0)
; RV32-NEXT: sb a1, 36(a0)
-; RV32-NEXT: sb a1, 35(a0)
-; RV32-NEXT: sb a1, 34(a0)
-; RV32-NEXT: sb a1, 33(a0)
-; RV32-NEXT: sb a1, 32(a0)
-; RV32-NEXT: sb a1, 31(a0)
-; RV32-NEXT: sb a1, 30(a0)
-; RV32-NEXT: sb a1, 29(a0)
+; RV32-NEXT: sb a1, 37(a0)
+; RV32-NEXT: sb a1, 38(a0)
+; RV32-NEXT: sb a1, 39(a0)
+; RV32-NEXT: sb a1, 32(a0)
+; RV32-NEXT: sb a1, 33(a0)
+; RV32-NEXT: sb a1, 34(a0)
+; RV32-NEXT: sb a1, 35(a0)
; RV32-NEXT: sb a1, 28(a0)
-; RV32-NEXT: sb a1, 27(a0)
-; RV32-NEXT: sb a1, 26(a0)
-; RV32-NEXT: sb a1, 25(a0)
+; RV32-NEXT: sb a1, 29(a0)
+; RV32-NEXT: sb a1, 30(a0)
+; RV32-NEXT: sb a1, 31(a0)
; RV32-NEXT: sb a1, 24(a0)
-; RV32-NEXT: sb a1, 23(a0)
-; RV32-NEXT: sb a1, 22(a0)
-; RV32-NEXT: sb a1, 21(a0)
+; RV32-NEXT: sb a1, 25(a0)
+; RV32-NEXT: sb a1, 26(a0)
+; RV32-NEXT: sb a1, 27(a0)
; RV32-NEXT: sb a1, 20(a0)
-; RV32-NEXT: sb a1, 19(a0)
-; RV32-NEXT: sb a1, 18(a0)
-; RV32-NEXT: sb a1, 17(a0)
+; RV32-NEXT: sb a1, 21(a0)
+; RV32-NEXT: sb a1, 22(a0)
+; RV32-NEXT: sb a1, 23(a0)
; RV32-NEXT: sb a1, 16(a0)
-; RV32-NEXT: sb a1, 15(a0)
-; RV32-NEXT: sb a1, 14(a0)
-; RV32-NEXT: sb a1, 13(a0)
+; RV32-NEXT: sb a1, 17(a0)
+; RV32-NEXT: sb a1, 18(a0)
+; RV32-NEXT: sb a1, 19(a0)
; RV32-NEXT: sb a1, 12(a0)
-; RV32-NEXT: sb a1, 11(a0)
-; RV32-NEXT: sb a1, 10(a0)
-; RV32-NEXT: sb a1, 9(a0)
+; RV32-NEXT: sb a1, 13(a0)
+; RV32-NEXT: sb a1, 14(a0)
+; RV32-NEXT: sb a1, 15(a0)
; RV32-NEXT: sb a1, 8(a0)
-; RV32-NEXT: sb a1, 7(a0)
-; RV32-NEXT: sb a1, 6(a0)
-; RV32-NEXT: sb a1, 5(a0)
+; RV32-NEXT: sb a1, 9(a0)
+; RV32-NEXT: sb a1, 10(a0)
+; RV32-NEXT: sb a1, 11(a0)
; RV32-NEXT: sb a1, 4(a0)
-; RV32-NEXT: sb a1, 3(a0)
-; RV32-NEXT: sb a1, 2(a0)
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sb a1, 5(a0)
+; RV32-NEXT: sb a1, 6(a0)
+; RV32-NEXT: sb a1, 7(a0)
; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sb a1, 2(a0)
+; RV32-NEXT: sb a1, 3(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: memset_64:
; RV64: # %bb.0:
-; RV64-NEXT: sb a1, 63(a0)
-; RV64-NEXT: sb a1, 62(a0)
-; RV64-NEXT: sb a1, 61(a0)
; RV64-NEXT: sb a1, 60(a0)
-; RV64-NEXT: sb a1, 59(a0)
-; RV64-NEXT: sb a1, 58(a0)
-; RV64-NEXT: sb a1, 57(a0)
+; RV64-NEXT: sb a1, 61(a0)
+; RV64-NEXT: sb a1, 62(a0)
+; RV64-NEXT: sb a1, 63(a0)
; RV64-NEXT: sb a1, 56(a0)
-; RV64-NEXT: sb a1, 55(a0)
-; RV64-NEXT: sb a1, 54(a0)
-; RV64-NEXT: sb a1, 53(a0)
+; RV64-NEXT: sb a1, 57(a0)
+; RV64-NEXT: sb a1, 58(a0)
+; RV64-NEXT: sb a1, 59(a0)
; RV64-NEXT: sb a1, 52(a0)
-; RV64-NEXT: sb a1, 51(a0)
-; RV64-NEXT: sb a1, 50(a0)
-; RV64-NEXT: sb a1, 49(a0)
+; RV64-NEXT: sb a1, 53(a0)
+; RV64-NEXT: sb a1, 54(a0)
+; RV64-NEXT: sb a1, 55(a0)
; RV64-NEXT: sb a1, 48(a0)
-; RV64-NEXT: sb a1, 47(a0)
-; RV64-NEXT: sb a1, 46(a0)
-; RV64-NEXT: sb a1, 45(a0)
+; RV64-NEXT: sb a1, 49(a0)
+; RV64-NEXT: sb a1, 50(a0)
+; RV64-NEXT: sb a1, 51(a0)
; RV64-NEXT: sb a1, 44(a0)
-; RV64-NEXT: sb a1, 43(a0)
-; RV64-NEXT: sb a1, 42(a0)
-; RV64-NEXT: sb a1, 41(a0)
+; RV64-NEXT: sb a1, 45(a0)
+; RV64-NEXT: sb a1, 46(a0)
+; RV64-NEXT: sb a1, 47(a0)
; RV64-NEXT: sb a1, 40(a0)
-; RV64-NEXT: sb a1, 39(a0)
-; RV64-NEXT: sb a1, 38(a0)
-; RV64-NEXT: sb a1, 37(a0)
+; RV64-NEXT: sb a1, 41(a0)
+; RV64-NEXT: sb a1, 42(a0)
+; RV64-NEXT: sb a1, 43(a0)
; RV64-NEXT: sb a1, 36(a0)
-; RV64-NEXT: sb a1, 35(a0)
-; RV64-NEXT: sb a1, 34(a0)
-; RV64-NEXT: sb a1, 33(a0)
+; RV64-NEXT: sb a1, 37(a0)
+; RV64-NEXT: sb a1, 38(a0)
+; RV64-NEXT: sb a1, 39(a0)
; RV64-NEXT: sb a1, 32(a0)
-; RV64-NEXT: sb a1, 31(a0)
-; RV64-NEXT: sb a1, 30(a0)
-; RV64-NEXT: sb a1, 29(a0)
+; RV64-NEXT: sb a1, 33(a0)
+; RV64-NEXT: sb a1, 34(a0)
+; RV64-NEXT: sb a1, 35(a0)
; RV64-NEXT: sb a1, 28(a0)
-; RV64-NEXT: sb a1, 27(a0)
-; RV64-NEXT: sb a1, 26(a0)
-; RV64-NEXT: sb a1, 25(a0)
+; RV64-NEXT: sb a1, 29(a0)
+; RV64-NEXT: sb a1, 30(a0)
+; RV64-NEXT: sb a1, 31(a0)
; RV64-NEXT: sb a1, 24(a0)
-; RV64-NEXT: sb a1, 23(a0)
-; RV64-NEXT: sb a1, 22(a0)
-; RV64-NEXT: sb a1, 21(a0)
+; RV64-NEXT: sb a1, 25(a0)
+; RV64-NEXT: sb a1, 26(a0)
+; RV64-NEXT: sb a1, 27(a0)
; RV64-NEXT: sb a1, 20(a0)
-; RV64-NEXT: sb a1, 19(a0)
-; RV64-NEXT: sb a1, 18(a0)
-; RV64-NEXT: sb a1, 17(a0)
+; RV64-NEXT: sb a1, 21(a0)
+; RV64-NEXT: sb a1, 22(a0)
+; RV64-NEXT: sb a1, 23(a0)
; RV64-NEXT: sb a1, 16(a0)
-; RV64-NEXT: sb a1, 15(a0)
-; RV64-NEXT: sb a1, 14(a0)
-; RV64-NEXT: sb a1, 13(a0)
+; RV64-NEXT: sb a1, 17(a0)
+; RV64-NEXT: sb a1, 18(a0)
+; RV64-NEXT: sb a1, 19(a0)
; RV64-NEXT: sb a1, 12(a0)
-; RV64-NEXT: sb a1, 11(a0)
-; RV64-NEXT: sb a1, 10(a0)
-; RV64-NEXT: sb a1, 9(a0)
+; RV64-NEXT: sb a1, 13(a0)
+; RV64-NEXT: sb a1, 14(a0)
+; RV64-NEXT: sb a1, 15(a0)
; RV64-NEXT: sb a1, 8(a0)
-; RV64-NEXT: sb a1, 7(a0)
-; RV64-NEXT: sb a1, 6(a0)
-; RV64-NEXT: sb a1, 5(a0)
+; RV64-NEXT: sb a1, 9(a0)
+; RV64-NEXT: sb a1, 10(a0)
+; RV64-NEXT: sb a1, 11(a0)
; RV64-NEXT: sb a1, 4(a0)
-; RV64-NEXT: sb a1, 3(a0)
-; RV64-NEXT: sb a1, 2(a0)
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sb a1, 5(a0)
+; RV64-NEXT: sb a1, 6(a0)
+; RV64-NEXT: sb a1, 7(a0)
; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sb a1, 2(a0)
+; RV64-NEXT: sb a1, 3(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: memset_64:
@@ -465,22 +465,22 @@ define void @memset_64(ptr %a, i8 %value) nounwind {
; RV32-FAST-NEXT: lui a2, 4112
; RV32-FAST-NEXT: addi a2, a2, 257
; RV32-FAST-NEXT: mul a1, a1, a2
-; RV32-FAST-NEXT: sw a1, 60(a0)
-; RV32-FAST-NEXT: sw a1, 56(a0)
-; RV32-FAST-NEXT: sw a1, 52(a0)
; RV32-FAST-NEXT: sw a1, 48(a0)
-; RV32-FAST-NEXT: sw a1, 44(a0)
-; RV32-FAST-NEXT: sw a1, 40(a0)
-; RV32-FAST-NEXT: sw a1, 36(a0)
+; RV32-FAST-NEXT: sw a1, 52(a0)
+; RV32-FAST-NEXT: sw a1, 56(a0)
+; RV32-FAST-NEXT: sw a1, 60(a0)
; RV32-FAST-NEXT: sw a1, 32(a0)
-; RV32-FAST-NEXT: sw a1, 28(a0)
-; RV32-FAST-NEXT: sw a1, 24(a0)
-; RV32-FAST-NEXT: sw a1, 20(a0)
+; RV32-FAST-NEXT: sw a1, 36(a0)
+; RV32-FAST-NEXT: sw a1, 40(a0)
+; RV32-FAST-NEXT: sw a1, 44(a0)
; RV32-FAST-NEXT: sw a1, 16(a0)
-; RV32-FAST-NEXT: sw a1, 12(a0)
-; RV32-FAST-NEXT: sw a1, 8(a0)
-; RV32-FAST-NEXT: sw a1, 4(a0)
+; RV32-FAST-NEXT: sw a1, 20(a0)
+; RV32-FAST-NEXT: sw a1, 24(a0)
+; RV32-FAST-NEXT: sw a1, 28(a0)
; RV32-FAST-NEXT: sw a1, 0(a0)
+; RV32-FAST-NEXT: sw a1, 4(a0)
+; RV32-FAST-NEXT: sw a1, 8(a0)
+; RV32-FAST-NEXT: sw a1, 12(a0)
; RV32-FAST-NEXT: ret
;
; RV64-FAST-LABEL: memset_64:
@@ -491,14 +491,14 @@ define void @memset_64(ptr %a, i8 %value) nounwind {
; RV64-FAST-NEXT: slli a3, a2, 32
; RV64-FAST-NEXT: add a2, a2, a3
; RV64-FAST-NEXT: mul a1, a1, a2
-; RV64-FAST-NEXT: sd a1, 56(a0)
-; RV64-FAST-NEXT: sd a1, 48(a0)
-; RV64-FAST-NEXT: sd a1, 40(a0)
; RV64-FAST-NEXT: sd a1, 32(a0)
-; RV64-FAST-NEXT: sd a1, 24(a0)
-; RV64-FAST-NEXT: sd a1, 16(a0)
-; RV64-FAST-NEXT: sd a1, 8(a0)
+; RV64-FAST-NEXT: sd a1, 40(a0)
+; RV64-FAST-NEXT: sd a1, 48(a0)
+; RV64-FAST-NEXT: sd a1, 56(a0)
; RV64-FAST-NEXT: sd a1, 0(a0)
+; RV64-FAST-NEXT: sd a1, 8(a0)
+; RV64-FAST-NEXT: sd a1, 16(a0)
+; RV64-FAST-NEXT: sd a1, 24(a0)
; RV64-FAST-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 64, i1 0)
ret void
@@ -556,8 +556,8 @@ define void @aligned_memset_8(ptr align 8 %a, i8 %value) nounwind {
; RV32-BOTH-NEXT: lui a2, 4112
; RV32-BOTH-NEXT: addi a2, a2, 257
; RV32-BOTH-NEXT: mul a1, a1, a2
-; RV32-BOTH-NEXT: sw a1, 4(a0)
; RV32-BOTH-NEXT: sw a1, 0(a0)
+; RV32-BOTH-NEXT: sw a1, 4(a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_memset_8:
@@ -581,10 +581,10 @@ define void @aligned_memset_16(ptr align 16 %a, i8 %value) nounwind {
; RV32-BOTH-NEXT: lui a2, 4112
; RV32-BOTH-NEXT: addi a2, a2, 257
; RV32-BOTH-NEXT: mul a1, a1, a2
-; RV32-BOTH-NEXT: sw a1, 12(a0)
-; RV32-BOTH-NEXT: sw a1, 8(a0)
-; RV32-BOTH-NEXT: sw a1, 4(a0)
; RV32-BOTH-NEXT: sw a1, 0(a0)
+; RV32-BOTH-NEXT: sw a1, 4(a0)
+; RV32-BOTH-NEXT: sw a1, 8(a0)
+; RV32-BOTH-NEXT: sw a1, 12(a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_memset_16:
@@ -595,8 +595,8 @@ define void @aligned_memset_16(ptr align 16 %a, i8 %value) nounwind {
; RV64-BOTH-NEXT: slli a3, a2, 32
; RV64-BOTH-NEXT: add a2, a2, a3
; RV64-BOTH-NEXT: mul a1, a1, a2
-; RV64-BOTH-NEXT: sd a1, 8(a0)
; RV64-BOTH-NEXT: sd a1, 0(a0)
+; RV64-BOTH-NEXT: sd a1, 8(a0)
; RV64-BOTH-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr align 16 %a, i8 %value, i64 16, i1 0)
ret void
@@ -609,14 +609,14 @@ define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind {
; RV32-BOTH-NEXT: lui a2, 4112
; RV32-BOTH-NEXT: addi a2, a2, 257
; RV32-BOTH-NEXT: mul a1, a1, a2
-; RV32-BOTH-NEXT: sw a1, 28(a0)
-; RV32-BOTH-NEXT: sw a1, 24(a0)
-; RV32-BOTH-NEXT: sw a1, 20(a0)
; RV32-BOTH-NEXT: sw a1, 16(a0)
-; RV32-BOTH-NEXT: sw a1, 12(a0)
-; RV32-BOTH-NEXT: sw a1, 8(a0)
-; RV32-BOTH-NEXT: sw a1, 4(a0)
+; RV32-BOTH-NEXT: sw a1, 20(a0)
+; RV32-BOTH-NEXT: sw a1, 24(a0)
+; RV32-BOTH-NEXT: sw a1, 28(a0)
; RV32-BOTH-NEXT: sw a1, 0(a0)
+; RV32-BOTH-NEXT: sw a1, 4(a0)
+; RV32-BOTH-NEXT: sw a1, 8(a0)
+; RV32-BOTH-NEXT: sw a1, 12(a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_memset_32:
@@ -627,10 +627,10 @@ define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind {
; RV64-BOTH-NEXT: slli a3, a2, 32
; RV64-BOTH-NEXT: add a2, a2, a3
; RV64-BOTH-NEXT: mul a1, a1, a2
-; RV64-BOTH-NEXT: sd a1, 24(a0)
-; RV64-BOTH-NEXT: sd a1, 16(a0)
-; RV64-BOTH-NEXT: sd a1, 8(a0)
; RV64-BOTH-NEXT: sd a1, 0(a0)
+; RV64-BOTH-NEXT: sd a1, 8(a0)
+; RV64-BOTH-NEXT: sd a1, 16(a0)
+; RV64-BOTH-NEXT: sd a1, 24(a0)
; RV64-BOTH-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 %value, i64 32, i1 0)
ret void
@@ -643,22 +643,22 @@ define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind {
; RV32-BOTH-NEXT: lui a2, 4112
; RV32-BOTH-NEXT: addi a2, a2, 257
; RV32-BOTH-NEXT: mul a1, a1, a2
-; RV32-BOTH-NEXT: sw a1, 60(a0)
-; RV32-BOTH-NEXT: sw a1, 56(a0)
-; RV32-BOTH-NEXT: sw a1, 52(a0)
; RV32-BOTH-NEXT: sw a1, 48(a0)
-; RV32-BOTH-NEXT: sw a1, 44(a0)
-; RV32-BOTH-NEXT: sw a1, 40(a0)
-; RV32-BOTH-NEXT: sw a1, 36(a0)
+; RV32-BOTH-NEXT: sw a1, 52(a0)
+; RV32-BOTH-NEXT: sw a1, 56(a0)
+; RV32-BOTH-NEXT: sw a1, 60(a0)
; RV32-BOTH-NEXT: sw a1, 32(a0)
-; RV32-BOTH-NEXT: sw a1, 28(a0)
-; RV32-BOTH-NEXT: sw a1, 24(a0)
-; RV32-BOTH-NEXT: sw a1, 20(a0)
+; RV32-BOTH-NEXT: sw a1, 36(a0)
+; RV32-BOTH-NEXT: sw a1, 40(a0)
+; RV32-BOTH-NEXT: sw a1, 44(a0)
; RV32-BOTH-NEXT: sw a1, 16(a0)
-; RV32-BOTH-NEXT: sw a1, 12(a0)
-; RV32-BOTH-NEXT: sw a1, 8(a0)
-; RV32-BOTH-NEXT: sw a1, 4(a0)
+; RV32-BOTH-NEXT: sw a1, 20(a0)
+; RV32-BOTH-NEXT: sw a1, 24(a0)
+; RV32-BOTH-NEXT: sw a1, 28(a0)
; RV32-BOTH-NEXT: sw a1, 0(a0)
+; RV32-BOTH-NEXT: sw a1, 4(a0)
+; RV32-BOTH-NEXT: sw a1, 8(a0)
+; RV32-BOTH-NEXT: sw a1, 12(a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_memset_64:
@@ -669,14 +669,14 @@ define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind {
; RV64-BOTH-NEXT: slli a3, a2, 32
; RV64-BOTH-NEXT: add a2, a2, a3
; RV64-BOTH-NEXT: mul a1, a1, a2
-; RV64-BOTH-NEXT: sd a1, 56(a0)
-; RV64-BOTH-NEXT: sd a1, 48(a0)
-; RV64-BOTH-NEXT: sd a1, 40(a0)
; RV64-BOTH-NEXT: sd a1, 32(a0)
-; RV64-BOTH-NEXT: sd a1, 24(a0)
-; RV64-BOTH-NEXT: sd a1, 16(a0)
-; RV64-BOTH-NEXT: sd a1, 8(a0)
+; RV64-BOTH-NEXT: sd a1, 40(a0)
+; RV64-BOTH-NEXT: sd a1, 48(a0)
+; RV64-BOTH-NEXT: sd a1, 56(a0)
; RV64-BOTH-NEXT: sd a1, 0(a0)
+; RV64-BOTH-NEXT: sd a1, 8(a0)
+; RV64-BOTH-NEXT: sd a1, 16(a0)
+; RV64-BOTH-NEXT: sd a1, 24(a0)
; RV64-BOTH-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 %value, i64 64, i1 0)
ret void
@@ -701,14 +701,14 @@ define void @bzero_1(ptr %a) nounwind {
define void @bzero_2(ptr %a) nounwind {
; RV32-LABEL: bzero_2:
; RV32: # %bb.0:
-; RV32-NEXT: sb zero, 1(a0)
; RV32-NEXT: sb zero, 0(a0)
+; RV32-NEXT: sb zero, 1(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: bzero_2:
; RV64: # %bb.0:
-; RV64-NEXT: sb zero, 1(a0)
; RV64-NEXT: sb zero, 0(a0)
+; RV64-NEXT: sb zero, 1(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: bzero_2:
@@ -727,18 +727,18 @@ define void @bzero_2(ptr %a) nounwind {
define void @bzero_4(ptr %a) nounwind {
; RV32-LABEL: bzero_4:
; RV32: # %bb.0:
-; RV32-NEXT: sb zero, 3(a0)
-; RV32-NEXT: sb zero, 2(a0)
-; RV32-NEXT: sb zero, 1(a0)
; RV32-NEXT: sb zero, 0(a0)
+; RV32-NEXT: sb zero, 1(a0)
+; RV32-NEXT: sb zero, 2(a0)
+; RV32-NEXT: sb zero, 3(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: bzero_4:
; RV64: # %bb.0:
-; RV64-NEXT: sb zero, 3(a0)
-; RV64-NEXT: sb zero, 2(a0)
-; RV64-NEXT: sb zero, 1(a0)
; RV64-NEXT: sb zero, 0(a0)
+; RV64-NEXT: sb zero, 1(a0)
+; RV64-NEXT: sb zero, 2(a0)
+; RV64-NEXT: sb zero, 3(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: bzero_4:
@@ -757,32 +757,32 @@ define void @bzero_4(ptr %a) nounwind {
define void @bzero_8(ptr %a) nounwind {
; RV32-LABEL: bzero_8:
; RV32: # %bb.0:
-; RV32-NEXT: sb zero, 7(a0)
-; RV32-NEXT: sb zero, 6(a0)
-; RV32-NEXT: sb zero, 5(a0)
; RV32-NEXT: sb zero, 4(a0)
-; RV32-NEXT: sb zero, 3(a0)
-; RV32-NEXT: sb zero, 2(a0)
-; RV32-NEXT: sb zero, 1(a0)
+; RV32-NEXT: sb zero, 5(a0)
+; RV32-NEXT: sb zero, 6(a0)
+; RV32-NEXT: sb zero, 7(a0)
; RV32-NEXT: sb zero, 0(a0)
+; RV32-NEXT: sb zero, 1(a0)
+; RV32-NEXT: sb zero, 2(a0)
+; RV32-NEXT: sb zero, 3(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: bzero_8:
; RV64: # %bb.0:
-; RV64-NEXT: sb zero, 7(a0)
-; RV64-NEXT: sb zero, 6(a0)
-; RV64-NEXT: sb zero, 5(a0)
; RV64-NEXT: sb zero, 4(a0)
-; RV64-NEXT: sb zero, 3(a0)
-; RV64-NEXT: sb zero, 2(a0)
-; RV64-NEXT: sb zero, 1(a0)
+; RV64-NEXT: sb zero, 5(a0)
+; RV64-NEXT: sb zero, 6(a0)
+; RV64-NEXT: sb zero, 7(a0)
; RV64-NEXT: sb zero, 0(a0)
+; RV64-NEXT: sb zero, 1(a0)
+; RV64-NEXT: sb zero, 2(a0)
+; RV64-NEXT: sb zero, 3(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: bzero_8:
; RV32-FAST: # %bb.0:
-; RV32-FAST-NEXT: sw zero, 4(a0)
; RV32-FAST-NEXT: sw zero, 0(a0)
+; RV32-FAST-NEXT: sw zero, 4(a0)
; RV32-FAST-NEXT: ret
;
; RV64-FAST-LABEL: bzero_8:
@@ -796,56 +796,56 @@ define void @bzero_8(ptr %a) nounwind {
define void @bzero_16(ptr %a) nounwind {
; RV32-LABEL: bzero_16:
; RV32: # %bb.0:
-; RV32-NEXT: sb zero, 15(a0)
-; RV32-NEXT: sb zero, 14(a0)
-; RV32-NEXT: sb zero, 13(a0)
; RV32-NEXT: sb zero, 12(a0)
-; RV32-NEXT: sb zero, 11(a0)
-; RV32-NEXT: sb zero, 10(a0)
-; RV32-NEXT: sb zero, 9(a0)
+; RV32-NEXT: sb zero, 13(a0)
+; RV32-NEXT: sb zero, 14(a0)
+; RV32-NEXT: sb zero, 15(a0)
; RV32-NEXT: sb zero, 8(a0)
-; RV32-NEXT: sb zero, 7(a0)
-; RV32-NEXT: sb zero, 6(a0)
-; RV32-NEXT: sb zero, 5(a0)
+; RV32-NEXT: sb zero, 9(a0)
+; RV32-NEXT: sb zero, 10(a0)
+; RV32-NEXT: sb zero, 11(a0)
; RV32-NEXT: sb zero, 4(a0)
-; RV32-NEXT: sb zero, 3(a0)
-; RV32-NEXT: sb zero, 2(a0)
-; RV32-NEXT: sb zero, 1(a0)
+; RV32-NEXT: sb zero, 5(a0)
+; RV32-NEXT: sb zero, 6(a0)
+; RV32-NEXT: sb zero, 7(a0)
; RV32-NEXT: sb zero, 0(a0)
+; RV32-NEXT: sb zero, 1(a0)
+; RV32-NEXT: sb zero, 2(a0)
+; RV32-NEXT: sb zero, 3(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: bzero_16:
; RV64: # %bb.0:
-; RV64-NEXT: sb zero, 15(a0)
-; RV64-NEXT: sb zero, 14(a0)
-; RV64-NEXT: sb zero, 13(a0)
; RV64-NEXT: sb zero, 12(a0)
-; RV64-NEXT: sb zero, 11(a0)
-; RV64-NEXT: sb zero, 10(a0)
-; RV64-NEXT: sb zero, 9(a0)
+; RV64-NEXT: sb zero, 13(a0)
+; RV64-NEXT: sb zero, 14(a0)
+; RV64-NEXT: sb zero, 15(a0)
; RV64-NEXT: sb zero, 8(a0)
-; RV64-NEXT: sb zero, 7(a0)
-; RV64-NEXT: sb zero, 6(a0)
-; RV64-NEXT: sb zero, 5(a0)
+; RV64-NEXT: sb zero, 9(a0)
+; RV64-NEXT: sb zero, 10(a0)
+; RV64-NEXT: sb zero, 11(a0)
; RV64-NEXT: sb zero, 4(a0)
-; RV64-NEXT: sb zero, 3(a0)
-; RV64-NEXT: sb zero, 2(a0)
-; RV64-NEXT: sb zero, 1(a0)
+; RV64-NEXT: sb zero, 5(a0)
+; RV64-NEXT: sb zero, 6(a0)
+; RV64-NEXT: sb zero, 7(a0)
; RV64-NEXT: sb zero, 0(a0)
+; RV64-NEXT: sb zero, 1(a0)
+; RV64-NEXT: sb zero, 2(a0)
+; RV64-NEXT: sb zero, 3(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: bzero_16:
; RV32-FAST: # %bb.0:
-; RV32-FAST-NEXT: sw zero, 12(a0)
-; RV32-FAST-NEXT: sw zero, 8(a0)
-; RV32-FAST-NEXT: sw zero, 4(a0)
; RV32-FAST-NEXT: sw zero, 0(a0)
+; RV32-FAST-NEXT: sw zero, 4(a0)
+; RV32-FAST-NEXT: sw zero, 8(a0)
+; RV32-FAST-NEXT: sw zero, 12(a0)
; RV32-FAST-NEXT: ret
;
; RV64-FAST-LABEL: bzero_16:
; RV64-FAST: # %bb.0:
-; RV64-FAST-NEXT: sd zero, 8(a0)
; RV64-FAST-NEXT: sd zero, 0(a0)
+; RV64-FAST-NEXT: sd zero, 8(a0)
; RV64-FAST-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 16, i1 0)
ret void
@@ -854,94 +854,94 @@ define void @bzero_16(ptr %a) nounwind {
define void @bzero_32(ptr %a) nounwind {
; RV32-LABEL: bzero_32:
; RV32: # %bb.0:
-; RV32-NEXT: sb zero, 31(a0)
-; RV32-NEXT: sb zero, 30(a0)
-; RV32-NEXT: sb zero, 29(a0)
; RV32-NEXT: sb zero, 28(a0)
-; RV32-NEXT: sb zero, 27(a0)
-; RV32-NEXT: sb zero, 26(a0)
-; RV32-NEXT: sb zero, 25(a0)
+; RV32-NEXT: sb zero, 29(a0)
+; RV32-NEXT: sb zero, 30(a0)
+; RV32-NEXT: sb zero, 31(a0)
; RV32-NEXT: sb zero, 24(a0)
-; RV32-NEXT: sb zero, 23(a0)
-; RV32-NEXT: sb zero, 22(a0)
-; RV32-NEXT: sb zero, 21(a0)
+; RV32-NEXT: sb zero, 25(a0)
+; RV32-NEXT: sb zero, 26(a0)
+; RV32-NEXT: sb zero, 27(a0)
; RV32-NEXT: sb zero, 20(a0)
-; RV32-NEXT: sb zero, 19(a0)
-; RV32-NEXT: sb zero, 18(a0)
-; RV32-NEXT: sb zero, 17(a0)
+; RV32-NEXT: sb zero, 21(a0)
+; RV32-NEXT: sb zero, 22(a0)
+; RV32-NEXT: sb zero, 23(a0)
; RV32-NEXT: sb zero, 16(a0)
-; RV32-NEXT: sb zero, 15(a0)
-; RV32-NEXT: sb zero, 14(a0)
-; RV32-NEXT: sb zero, 13(a0)
+; RV32-NEXT: sb zero, 17(a0)
+; RV32-NEXT: sb zero, 18(a0)
+; RV32-NEXT: sb zero, 19(a0)
; RV32-NEXT: sb zero, 12(a0)
-; RV32-NEXT: sb zero, 11(a0)
-; RV32-NEXT: sb zero, 10(a0)
-; RV32-NEXT: sb zero, 9(a0)
+; RV32-NEXT: sb zero, 13(a0)
+; RV32-NEXT: sb zero, 14(a0)
+; RV32-NEXT: sb zero, 15(a0)
; RV32-NEXT: sb zero, 8(a0)
-; RV32-NEXT: sb zero, 7(a0)
-; RV32-NEXT: sb zero, 6(a0)
-; RV32-NEXT: sb zero, 5(a0)
+; RV32-NEXT: sb zero, 9(a0)
+; RV32-NEXT: sb zero, 10(a0)
+; RV32-NEXT: sb zero, 11(a0)
; RV32-NEXT: sb zero, 4(a0)
-; RV32-NEXT: sb zero, 3(a0)
-; RV32-NEXT: sb zero, 2(a0)
-; RV32-NEXT: sb zero, 1(a0)
+; RV32-NEXT: sb zero, 5(a0)
+; RV32-NEXT: sb zero, 6(a0)
+; RV32-NEXT: sb zero, 7(a0)
; RV32-NEXT: sb zero, 0(a0)
+; RV32-NEXT: sb zero, 1(a0)
+; RV32-NEXT: sb zero, 2(a0)
+; RV32-NEXT: sb zero, 3(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: bzero_32:
; RV64: # %bb.0:
-; RV64-NEXT: sb zero, 31(a0)
-; RV64-NEXT: sb zero, 30(a0)
-; RV64-NEXT: sb zero, 29(a0)
; RV64-NEXT: sb zero, 28(a0)
-; RV64-NEXT: sb zero, 27(a0)
-; RV64-NEXT: sb zero, 26(a0)
-; RV64-NEXT: sb zero, 25(a0)
+; RV64-NEXT: sb zero, 29(a0)
+; RV64-NEXT: sb zero, 30(a0)
+; RV64-NEXT: sb zero, 31(a0)
; RV64-NEXT: sb zero, 24(a0)
-; RV64-NEXT: sb zero, 23(a0)
-; RV64-NEXT: sb zero, 22(a0)
-; RV64-NEXT: sb zero, 21(a0)
+; RV64-NEXT: sb zero, 25(a0)
+; RV64-NEXT: sb zero, 26(a0)
+; RV64-NEXT: sb zero, 27(a0)
; RV64-NEXT: sb zero, 20(a0)
-; RV64-NEXT: sb zero, 19(a0)
-; RV64-NEXT: sb zero, 18(a0)
-; RV64-NEXT: sb zero, 17(a0)
+; RV64-NEXT: sb zero, 21(a0)
+; RV64-NEXT: sb zero, 22(a0)
+; RV64-NEXT: sb zero, 23(a0)
; RV64-NEXT: sb zero, 16(a0)
-; RV64-NEXT: sb zero, 15(a0)
-; RV64-NEXT: sb zero, 14(a0)
-; RV64-NEXT: sb zero, 13(a0)
+; RV64-NEXT: sb zero, 17(a0)
+; RV64-NEXT: sb zero, 18(a0)
+; RV64-NEXT: sb zero, 19(a0)
; RV64-NEXT: sb zero, 12(a0)
-; RV64-NEXT: sb zero, 11(a0)
-; RV64-NEXT: sb zero, 10(a0)
-; RV64-NEXT: sb zero, 9(a0)
+; RV64-NEXT: sb zero, 13(a0)
+; RV64-NEXT: sb zero, 14(a0)
+; RV64-NEXT: sb zero, 15(a0)
; RV64-NEXT: sb zero, 8(a0)
-; RV64-NEXT: sb zero, 7(a0)
-; RV64-NEXT: sb zero, 6(a0)
-; RV64-NEXT: sb zero, 5(a0)
+; RV64-NEXT: sb zero, 9(a0)
+; RV64-NEXT: sb zero, 10(a0)
+; RV64-NEXT: sb zero, 11(a0)
; RV64-NEXT: sb zero, 4(a0)
-; RV64-NEXT: sb zero, 3(a0)
-; RV64-NEXT: sb zero, 2(a0)
-; RV64-NEXT: sb zero, 1(a0)
+; RV64-NEXT: sb zero, 5(a0)
+; RV64-NEXT: sb zero, 6(a0)
+; RV64-NEXT: sb zero, 7(a0)
; RV64-NEXT: sb zero, 0(a0)
+; RV64-NEXT: sb zero, 1(a0)
+; RV64-NEXT: sb zero, 2(a0)
+; RV64-NEXT: sb zero, 3(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: bzero_32:
; RV32-FAST: # %bb.0:
-; RV32-FAST-NEXT: sw zero, 28(a0)
-; RV32-FAST-NEXT: sw zero, 24(a0)
-; RV32-FAST-NEXT: sw zero, 20(a0)
; RV32-FAST-NEXT: sw zero, 16(a0)
-; RV32-FAST-NEXT: sw zero, 12(a0)
-; RV32-FAST-NEXT: sw zero, 8(a0)
-; RV32-FAST-NEXT: sw zero, 4(a0)
+; RV32-FAST-NEXT: sw zero, 20(a0)
+; RV32-FAST-NEXT: sw zero, 24(a0)
+; RV32-FAST-NEXT: sw zero, 28(a0)
; RV32-FAST-NEXT: sw zero, 0(a0)
+; RV32-FAST-NEXT: sw zero, 4(a0)
+; RV32-FAST-NEXT: sw zero, 8(a0)
+; RV32-FAST-NEXT: sw zero, 12(a0)
; RV32-FAST-NEXT: ret
;
; RV64-FAST-LABEL: bzero_32:
; RV64-FAST: # %bb.0:
-; RV64-FAST-NEXT: sd zero, 24(a0)
-; RV64-FAST-NEXT: sd zero, 16(a0)
-; RV64-FAST-NEXT: sd zero, 8(a0)
; RV64-FAST-NEXT: sd zero, 0(a0)
+; RV64-FAST-NEXT: sd zero, 8(a0)
+; RV64-FAST-NEXT: sd zero, 16(a0)
+; RV64-FAST-NEXT: sd zero, 24(a0)
; RV64-FAST-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 32, i1 0)
ret void
@@ -950,170 +950,170 @@ define void @bzero_32(ptr %a) nounwind {
define void @bzero_64(ptr %a) nounwind {
; RV32-LABEL: bzero_64:
; RV32: # %bb.0:
-; RV32-NEXT: sb zero, 63(a0)
-; RV32-NEXT: sb zero, 62(a0)
-; RV32-NEXT: sb zero, 61(a0)
; RV32-NEXT: sb zero, 60(a0)
-; RV32-NEXT: sb zero, 59(a0)
-; RV32-NEXT: sb zero, 58(a0)
-; RV32-NEXT: sb zero, 57(a0)
+; RV32-NEXT: sb zero, 61(a0)
+; RV32-NEXT: sb zero, 62(a0)
+; RV32-NEXT: sb zero, 63(a0)
; RV32-NEXT: sb zero, 56(a0)
-; RV32-NEXT: sb zero, 55(a0)
-; RV32-NEXT: sb zero, 54(a0)
-; RV32-NEXT: sb zero, 53(a0)
+; RV32-NEXT: sb zero, 57(a0)
+; RV32-NEXT: sb zero, 58(a0)
+; RV32-NEXT: sb zero, 59(a0)
; RV32-NEXT: sb zero, 52(a0)
-; RV32-NEXT: sb zero, 51(a0)
-; RV32-NEXT: sb zero, 50(a0)
-; RV32-NEXT: sb zero, 49(a0)
+; RV32-NEXT: sb zero, 53(a0)
+; RV32-NEXT: sb zero, 54(a0)
+; RV32-NEXT: sb zero, 55(a0)
; RV32-NEXT: sb zero, 48(a0)
-; RV32-NEXT: sb zero, 47(a0)
-; RV32-NEXT: sb zero, 46(a0)
-; RV32-NEXT: sb zero, 45(a0)
+; RV32-NEXT: sb zero, 49(a0)
+; RV32-NEXT: sb zero, 50(a0)
+; RV32-NEXT: sb zero, 51(a0)
; RV32-NEXT: sb zero, 44(a0)
-; RV32-NEXT: sb zero, 43(a0)
-; RV32-NEXT: sb zero, 42(a0)
-; RV32-NEXT: sb zero, 41(a0)
+; RV32-NEXT: sb zero, 45(a0)
+; RV32-NEXT: sb zero, 46(a0)
+; RV32-NEXT: sb zero, 47(a0)
; RV32-NEXT: sb zero, 40(a0)
-; RV32-NEXT: sb zero, 39(a0)
-; RV32-NEXT: sb zero, 38(a0)
-; RV32-NEXT: sb zero, 37(a0)
+; RV32-NEXT: sb zero, 41(a0)
+; RV32-NEXT: sb zero, 42(a0)
+; RV32-NEXT: sb zero, 43(a0)
; RV32-NEXT: sb zero, 36(a0)
-; RV32-NEXT: sb zero, 35(a0)
-; RV32-NEXT: sb zero, 34(a0)
-; RV32-NEXT: sb zero, 33(a0)
+; RV32-NEXT: sb zero, 37(a0)
+; RV32-NEXT: sb zero, 38(a0)
+; RV32-NEXT: sb zero, 39(a0)
; RV32-NEXT: sb zero, 32(a0)
-; RV32-NEXT: sb zero, 31(a0)
-; RV32-NEXT: sb zero, 30(a0)
-; RV32-NEXT: sb zero, 29(a0)
+; RV32-NEXT: sb zero, 33(a0)
+; RV32-NEXT: sb zero, 34(a0)
+; RV32-NEXT: sb zero, 35(a0)
; RV32-NEXT: sb zero, 28(a0)
-; RV32-NEXT: sb zero, 27(a0)
-; RV32-NEXT: sb zero, 26(a0)
-; RV32-NEXT: sb zero, 25(a0)
+; RV32-NEXT: sb zero, 29(a0)
+; RV32-NEXT: sb zero, 30(a0)
+; RV32-NEXT: sb zero, 31(a0)
; RV32-NEXT: sb zero, 24(a0)
-; RV32-NEXT: sb zero, 23(a0)
-; RV32-NEXT: sb zero, 22(a0)
-; RV32-NEXT: sb zero, 21(a0)
+; RV32-NEXT: sb zero, 25(a0)
+; RV32-NEXT: sb zero, 26(a0)
+; RV32-NEXT: sb zero, 27(a0)
; RV32-NEXT: sb zero, 20(a0)
-; RV32-NEXT: sb zero, 19(a0)
-; RV32-NEXT: sb zero, 18(a0)
-; RV32-NEXT: sb zero, 17(a0)
+; RV32-NEXT: sb zero, 21(a0)
+; RV32-NEXT: sb zero, 22(a0)
+; RV32-NEXT: sb zero, 23(a0)
; RV32-NEXT: sb zero, 16(a0)
-; RV32-NEXT: sb zero, 15(a0)
-; RV32-NEXT: sb zero, 14(a0)
-; RV32-NEXT: sb zero, 13(a0)
+; RV32-NEXT: sb zero, 17(a0)
+; RV32-NEXT: sb zero, 18(a0)
+; RV32-NEXT: sb zero, 19(a0)
; RV32-NEXT: sb zero, 12(a0)
-; RV32-NEXT: sb zero, 11(a0)
-; RV32-NEXT: sb zero, 10(a0)
-; RV32-NEXT: sb zero, 9(a0)
+; RV32-NEXT: sb zero, 13(a0)
+; RV32-NEXT: sb zero, 14(a0)
+; RV32-NEXT: sb zero, 15(a0)
; RV32-NEXT: sb zero, 8(a0)
-; RV32-NEXT: sb zero, 7(a0)
-; RV32-NEXT: sb zero, 6(a0)
-; RV32-NEXT: sb zero, 5(a0)
+; RV32-NEXT: sb zero, 9(a0)
+; RV32-NEXT: sb zero, 10(a0)
+; RV32-NEXT: sb zero, 11(a0)
; RV32-NEXT: sb zero, 4(a0)
-; RV32-NEXT: sb zero, 3(a0)
-; RV32-NEXT: sb zero, 2(a0)
-; RV32-NEXT: sb zero, 1(a0)
+; RV32-NEXT: sb zero, 5(a0)
+; RV32-NEXT: sb zero, 6(a0)
+; RV32-NEXT: sb zero, 7(a0)
; RV32-NEXT: sb zero, 0(a0)
+; RV32-NEXT: sb zero, 1(a0)
+; RV32-NEXT: sb zero, 2(a0)
+; RV32-NEXT: sb zero, 3(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: bzero_64:
; RV64: # %bb.0:
-; RV64-NEXT: sb zero, 63(a0)
-; RV64-NEXT: sb zero, 62(a0)
-; RV64-NEXT: sb zero, 61(a0)
; RV64-NEXT: sb zero, 60(a0)
-; RV64-NEXT: sb zero, 59(a0)
-; RV64-NEXT: sb zero, 58(a0)
-; RV64-NEXT: sb zero, 57(a0)
+; RV64-NEXT: sb zero, 61(a0)
+; RV64-NEXT: sb zero, 62(a0)
+; RV64-NEXT: sb zero, 63(a0)
; RV64-NEXT: sb zero, 56(a0)
-; RV64-NEXT: sb zero, 55(a0)
-; RV64-NEXT: sb zero, 54(a0)
-; RV64-NEXT: sb zero, 53(a0)
+; RV64-NEXT: sb zero, 57(a0)
+; RV64-NEXT: sb zero, 58(a0)
+; RV64-NEXT: sb zero, 59(a0)
; RV64-NEXT: sb zero, 52(a0)
-; RV64-NEXT: sb zero, 51(a0)
-; RV64-NEXT: sb zero, 50(a0)
-; RV64-NEXT: sb zero, 49(a0)
+; RV64-NEXT: sb zero, 53(a0)
+; RV64-NEXT: sb zero, 54(a0)
+; RV64-NEXT: sb zero, 55(a0)
; RV64-NEXT: sb zero, 48(a0)
-; RV64-NEXT: sb zero, 47(a0)
-; RV64-NEXT: sb zero, 46(a0)
-; RV64-NEXT: sb zero, 45(a0)
+; RV64-NEXT: sb zero, 49(a0)
+; RV64-NEXT: sb zero, 50(a0)
+; RV64-NEXT: sb zero, 51(a0)
; RV64-NEXT: sb zero, 44(a0)
-; RV64-NEXT: sb zero, 43(a0)
-; RV64-NEXT: sb zero, 42(a0)
-; RV64-NEXT: sb zero, 41(a0)
+; RV64-NEXT: sb zero, 45(a0)
+; RV64-NEXT: sb zero, 46(a0)
+; RV64-NEXT: sb zero, 47(a0)
; RV64-NEXT: sb zero, 40(a0)
-; RV64-NEXT: sb zero, 39(a0)
-; RV64-NEXT: sb zero, 38(a0)
-; RV64-NEXT: sb zero, 37(a0)
+; RV64-NEXT: sb zero, 41(a0)
+; RV64-NEXT: sb zero, 42(a0)
+; RV64-NEXT: sb zero, 43(a0)
; RV64-NEXT: sb zero, 36(a0)
-; RV64-NEXT: sb zero, 35(a0)
-; RV64-NEXT: sb zero, 34(a0)
-; RV64-NEXT: sb zero, 33(a0)
+; RV64-NEXT: sb zero, 37(a0)
+; RV64-NEXT: sb zero, 38(a0)
+; RV64-NEXT: sb zero, 39(a0)
; RV64-NEXT: sb zero, 32(a0)
-; RV64-NEXT: sb zero, 31(a0)
-; RV64-NEXT: sb zero, 30(a0)
-; RV64-NEXT: sb zero, 29(a0)
+; RV64-NEXT: sb zero, 33(a0)
+; RV64-NEXT: sb zero, 34(a0)
+; RV64-NEXT: sb zero, 35(a0)
; RV64-NEXT: sb zero, 28(a0)
-; RV64-NEXT: sb zero, 27(a0)
-; RV64-NEXT: sb zero, 26(a0)
-; RV64-NEXT: sb zero, 25(a0)
+; RV64-NEXT: sb zero, 29(a0)
+; RV64-NEXT: sb zero, 30(a0)
+; RV64-NEXT: sb zero, 31(a0)
; RV64-NEXT: sb zero, 24(a0)
-; RV64-NEXT: sb zero, 23(a0)
-; RV64-NEXT: sb zero, 22(a0)
-; RV64-NEXT: sb zero, 21(a0)
+; RV64-NEXT: sb zero, 25(a0)
+; RV64-NEXT: sb zero, 26(a0)
+; RV64-NEXT: sb zero, 27(a0)
; RV64-NEXT: sb zero, 20(a0)
-; RV64-NEXT: sb zero, 19(a0)
-; RV64-NEXT: sb zero, 18(a0)
-; RV64-NEXT: sb zero, 17(a0)
+; RV64-NEXT: sb zero, 21(a0)
+; RV64-NEXT: sb zero, 22(a0)
+; RV64-NEXT: sb zero, 23(a0)
; RV64-NEXT: sb zero, 16(a0)
-; RV64-NEXT: sb zero, 15(a0)
-; RV64-NEXT: sb zero, 14(a0)
-; RV64-NEXT: sb zero, 13(a0)
+; RV64-NEXT: sb zero, 17(a0)
+; RV64-NEXT: sb zero, 18(a0)
+; RV64-NEXT: sb zero, 19(a0)
; RV64-NEXT: sb zero, 12(a0)
-; RV64-NEXT: sb zero, 11(a0)
-; RV64-NEXT: sb zero, 10(a0)
-; RV64-NEXT: sb zero, 9(a0)
+; RV64-NEXT: sb zero, 13(a0)
+; RV64-NEXT: sb zero, 14(a0)
+; RV64-NEXT: sb zero, 15(a0)
; RV64-NEXT: sb zero, 8(a0)
-; RV64-NEXT: sb zero, 7(a0)
-; RV64-NEXT: sb zero, 6(a0)
-; RV64-NEXT: sb zero, 5(a0)
+; RV64-NEXT: sb zero, 9(a0)
+; RV64-NEXT: sb zero, 10(a0)
+; RV64-NEXT: sb zero, 11(a0)
; RV64-NEXT: sb zero, 4(a0)
-; RV64-NEXT: sb zero, 3(a0)
-; RV64-NEXT: sb zero, 2(a0)
-; RV64-NEXT: sb zero, 1(a0)
+; RV64-NEXT: sb zero, 5(a0)
+; RV64-NEXT: sb zero, 6(a0)
+; RV64-NEXT: sb zero, 7(a0)
; RV64-NEXT: sb zero, 0(a0)
+; RV64-NEXT: sb zero, 1(a0)
+; RV64-NEXT: sb zero, 2(a0)
+; RV64-NEXT: sb zero, 3(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: bzero_64:
; RV32-FAST: # %bb.0:
-; RV32-FAST-NEXT: sw zero, 60(a0)
-; RV32-FAST-NEXT: sw zero, 56(a0)
-; RV32-FAST-NEXT: sw zero, 52(a0)
; RV32-FAST-NEXT: sw zero, 48(a0)
-; RV32-FAST-NEXT: sw zero, 44(a0)
-; RV32-FAST-NEXT: sw zero, 40(a0)
-; RV32-FAST-NEXT: sw zero, 36(a0)
+; RV32-FAST-NEXT: sw zero, 52(a0)
+; RV32-FAST-NEXT: sw zero, 56(a0)
+; RV32-FAST-NEXT: sw zero, 60(a0)
; RV32-FAST-NEXT: sw zero, 32(a0)
-; RV32-FAST-NEXT: sw zero, 28(a0)
-; RV32-FAST-NEXT: sw zero, 24(a0)
-; RV32-FAST-NEXT: sw zero, 20(a0)
+; RV32-FAST-NEXT: sw zero, 36(a0)
+; RV32-FAST-NEXT: sw zero, 40(a0)
+; RV32-FAST-NEXT: sw zero, 44(a0)
; RV32-FAST-NEXT: sw zero, 16(a0)
-; RV32-FAST-NEXT: sw zero, 12(a0)
-; RV32-FAST-NEXT: sw zero, 8(a0)
-; RV32-FAST-NEXT: sw zero, 4(a0)
+; RV32-FAST-NEXT: sw zero, 20(a0)
+; RV32-FAST-NEXT: sw zero, 24(a0)
+; RV32-FAST-NEXT: sw zero, 28(a0)
; RV32-FAST-NEXT: sw zero, 0(a0)
+; RV32-FAST-NEXT: sw zero, 4(a0)
+; RV32-FAST-NEXT: sw zero, 8(a0)
+; RV32-FAST-NEXT: sw zero, 12(a0)
; RV32-FAST-NEXT: ret
;
; RV64-FAST-LABEL: bzero_64:
; RV64-FAST: # %bb.0:
-; RV64-FAST-NEXT: sd zero, 56(a0)
-; RV64-FAST-NEXT: sd zero, 48(a0)
-; RV64-FAST-NEXT: sd zero, 40(a0)
; RV64-FAST-NEXT: sd zero, 32(a0)
-; RV64-FAST-NEXT: sd zero, 24(a0)
-; RV64-FAST-NEXT: sd zero, 16(a0)
-; RV64-FAST-NEXT: sd zero, 8(a0)
+; RV64-FAST-NEXT: sd zero, 40(a0)
+; RV64-FAST-NEXT: sd zero, 48(a0)
+; RV64-FAST-NEXT: sd zero, 56(a0)
; RV64-FAST-NEXT: sd zero, 0(a0)
+; RV64-FAST-NEXT: sd zero, 8(a0)
+; RV64-FAST-NEXT: sd zero, 16(a0)
+; RV64-FAST-NEXT: sd zero, 24(a0)
; RV64-FAST-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 64, i1 0)
ret void
@@ -1152,8 +1152,8 @@ define void @aligned_bzero_4(ptr %a) nounwind {
define void @aligned_bzero_8(ptr %a) nounwind {
; RV32-BOTH-LABEL: aligned_bzero_8:
; RV32-BOTH: # %bb.0:
-; RV32-BOTH-NEXT: sw zero, 4(a0)
; RV32-BOTH-NEXT: sw zero, 0(a0)
+; RV32-BOTH-NEXT: sw zero, 4(a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_bzero_8:
@@ -1168,16 +1168,16 @@ define void @aligned_bzero_8(ptr %a) nounwind {
define void @aligned_bzero_16(ptr %a) nounwind {
; RV32-BOTH-LABEL: aligned_bzero_16:
; RV32-BOTH: # %bb.0:
-; RV32-BOTH-NEXT: sw zero, 12(a0)
-; RV32-BOTH-NEXT: sw zero, 8(a0)
-; RV32-BOTH-NEXT: sw zero, 4(a0)
; RV32-BOTH-NEXT: sw zero, 0(a0)
+; RV32-BOTH-NEXT: sw zero, 4(a0)
+; RV32-BOTH-NEXT: sw zero, 8(a0)
+; RV32-BOTH-NEXT: sw zero, 12(a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_bzero_16:
; RV64-BOTH: # %bb.0:
-; RV64-BOTH-NEXT: sd zero, 8(a0)
; RV64-BOTH-NEXT: sd zero, 0(a0)
+; RV64-BOTH-NEXT: sd zero, 8(a0)
; RV64-BOTH-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr align 16 %a, i8 0, i64 16, i1 0)
ret void
@@ -1186,22 +1186,22 @@ define void @aligned_bzero_16(ptr %a) nounwind {
define void @aligned_bzero_32(ptr %a) nounwind {
; RV32-BOTH-LABEL: aligned_bzero_32:
; RV32-BOTH: # %bb.0:
-; RV32-BOTH-NEXT: sw zero, 28(a0)
-; RV32-BOTH-NEXT: sw zero, 24(a0)
-; RV32-BOTH-NEXT: sw zero, 20(a0)
; RV32-BOTH-NEXT: sw zero, 16(a0)
-; RV32-BOTH-NEXT: sw zero, 12(a0)
-; RV32-BOTH-NEXT: sw zero, 8(a0)
-; RV32-BOTH-NEXT: sw zero, 4(a0)
+; RV32-BOTH-NEXT: sw zero, 20(a0)
+; RV32-BOTH-NEXT: sw zero, 24(a0)
+; RV32-BOTH-NEXT: sw zero, 28(a0)
; RV32-BOTH-NEXT: sw zero, 0(a0)
+; RV32-BOTH-NEXT: sw zero, 4(a0)
+; RV32-BOTH-NEXT: sw zero, 8(a0)
+; RV32-BOTH-NEXT: sw zero, 12(a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_bzero_32:
; RV64-BOTH: # %bb.0:
-; RV64-BOTH-NEXT: sd zero, 24(a0)
-; RV64-BOTH-NEXT: sd zero, 16(a0)
-; RV64-BOTH-NEXT: sd zero, 8(a0)
; RV64-BOTH-NEXT: sd zero, 0(a0)
+; RV64-BOTH-NEXT: sd zero, 8(a0)
+; RV64-BOTH-NEXT: sd zero, 16(a0)
+; RV64-BOTH-NEXT: sd zero, 24(a0)
; RV64-BOTH-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 0, i64 32, i1 0)
ret void
@@ -1210,34 +1210,34 @@ define void @aligned_bzero_32(ptr %a) nounwind {
define void @aligned_bzero_64(ptr %a) nounwind {
; RV32-BOTH-LABEL: aligned_bzero_64:
; RV32-BOTH: # %bb.0:
-; RV32-BOTH-NEXT: sw zero, 60(a0)
-; RV32-BOTH-NEXT: sw zero, 56(a0)
-; RV32-BOTH-NEXT: sw zero, 52(a0)
; RV32-BOTH-NEXT: sw zero, 48(a0)
-; RV32-BOTH-NEXT: sw zero, 44(a0)
-; RV32-BOTH-NEXT: sw zero, 40(a0)
-; RV32-BOTH-NEXT: sw zero, 36(a0)
+; RV32-BOTH-NEXT: sw zero, 52(a0)
+; RV32-BOTH-NEXT: sw zero, 56(a0)
+; RV32-BOTH-NEXT: sw zero, 60(a0)
; RV32-BOTH-NEXT: sw zero, 32(a0)
-; RV32-BOTH-NEXT: sw zero, 28(a0)
-; RV32-BOTH-NEXT: sw zero, 24(a0)
-; RV32-BOTH-NEXT: sw zero, 20(a0)
+; RV32-BOTH-NEXT: sw zero, 36(a0)
+; RV32-BOTH-NEXT: sw zero, 40(a0)
+; RV32-BOTH-NEXT: sw zero, 44(a0)
; RV32-BOTH-NEXT: sw zero, 16(a0)
-; RV32-BOTH-NEXT: sw zero, 12(a0)
-; RV32-BOTH-NEXT: sw zero, 8(a0)
-; RV32-BOTH-NEXT: sw zero, 4(a0)
+; RV32-BOTH-NEXT: sw zero, 20(a0)
+; RV32-BOTH-NEXT: sw zero, 24(a0)
+; RV32-BOTH-NEXT: sw zero, 28(a0)
; RV32-BOTH-NEXT: sw zero, 0(a0)
+; RV32-BOTH-NEXT: sw zero, 4(a0)
+; RV32-BOTH-NEXT: sw zero, 8(a0)
+; RV32-BOTH-NEXT: sw zero, 12(a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_bzero_64:
; RV64-BOTH: # %bb.0:
-; RV64-BOTH-NEXT: sd zero, 56(a0)
-; RV64-BOTH-NEXT: sd zero, 48(a0)
-; RV64-BOTH-NEXT: sd zero, 40(a0)
; RV64-BOTH-NEXT: sd zero, 32(a0)
-; RV64-BOTH-NEXT: sd zero, 24(a0)
-; RV64-BOTH-NEXT: sd zero, 16(a0)
-; RV64-BOTH-NEXT: sd zero, 8(a0)
+; RV64-BOTH-NEXT: sd zero, 40(a0)
+; RV64-BOTH-NEXT: sd zero, 48(a0)
+; RV64-BOTH-NEXT: sd zero, 56(a0)
; RV64-BOTH-NEXT: sd zero, 0(a0)
+; RV64-BOTH-NEXT: sd zero, 8(a0)
+; RV64-BOTH-NEXT: sd zero, 16(a0)
+; RV64-BOTH-NEXT: sd zero, 24(a0)
; RV64-BOTH-NEXT: ret
tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 64, i1 0)
ret void
@@ -1250,16 +1250,16 @@ define void @aligned_bzero_64(ptr %a) nounwind {
define void @aligned_bzero_7(ptr %a) nounwind {
; RV32-LABEL: aligned_bzero_7:
; RV32: # %bb.0:
-; RV32-NEXT: sb zero, 6(a0)
-; RV32-NEXT: sh zero, 4(a0)
; RV32-NEXT: sw zero, 0(a0)
+; RV32-NEXT: sh zero, 4(a0)
+; RV32-NEXT: sb zero, 6(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: aligned_bzero_7:
; RV64: # %bb.0:
-; RV64-NEXT: sb zero, 6(a0)
-; RV64-NEXT: sh zero, 4(a0)
; RV64-NEXT: sw zero, 0(a0)
+; RV64-NEXT: sh zero, 4(a0)
+; RV64-NEXT: sb zero, 6(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: aligned_bzero_7:
@@ -1281,26 +1281,26 @@ define void @aligned_bzero_15(ptr %a) nounwind {
; RV32-LABEL: aligned_bzero_15:
; RV32: # %bb.0:
; RV32-NEXT: sb zero, 14(a0)
-; RV32-NEXT: sh zero, 12(a0)
-; RV32-NEXT: sw zero, 8(a0)
-; RV32-NEXT: sw zero, 4(a0)
; RV32-NEXT: sw zero, 0(a0)
+; RV32-NEXT: sw zero, 4(a0)
+; RV32-NEXT: sw zero, 8(a0)
+; RV32-NEXT: sh zero, 12(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: aligned_bzero_15:
; RV64: # %bb.0:
-; RV64-NEXT: sb zero, 14(a0)
-; RV64-NEXT: sh zero, 12(a0)
-; RV64-NEXT: sw zero, 8(a0)
; RV64-NEXT: sd zero, 0(a0)
+; RV64-NEXT: sw zero, 8(a0)
+; RV64-NEXT: sh zero, 12(a0)
+; RV64-NEXT: sb zero, 14(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: aligned_bzero_15:
; RV32-FAST: # %bb.0:
; RV32-FAST-NEXT: sw zero, 11(a0)
-; RV32-FAST-NEXT: sw zero, 8(a0)
-; RV32-FAST-NEXT: sw zero, 4(a0)
; RV32-FAST-NEXT: sw zero, 0(a0)
+; RV32-FAST-NEXT: sw zero, 4(a0)
+; RV32-FAST-NEXT: sw zero, 8(a0)
; RV32-FAST-NEXT: ret
;
; RV64-FAST-LABEL: aligned_bzero_15:
diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll
index 6f301882b452c0..676b4134461163 100644
--- a/llvm/test/CodeGen/RISCV/neg-abs.ll
+++ b/llvm/test/CodeGen/RISCV/neg-abs.ll
@@ -208,12 +208,13 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) {
; RV32I-NEXT: sub a1, a1, a3
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: .LBB5_2:
-; RV32I-NEXT: sw a0, 0(a2)
; RV32I-NEXT: snez a3, a0
; RV32I-NEXT: neg a4, a1
; RV32I-NEXT: sub a3, a4, a3
-; RV32I-NEXT: neg a0, a0
+; RV32I-NEXT: neg a4, a0
+; RV32I-NEXT: sw a0, 0(a2)
; RV32I-NEXT: sw a1, 4(a2)
+; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: mv a1, a3
; RV32I-NEXT: ret
;
@@ -226,12 +227,13 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) {
; RV32ZBB-NEXT: sub a1, a1, a3
; RV32ZBB-NEXT: neg a0, a0
; RV32ZBB-NEXT: .LBB5_2:
-; RV32ZBB-NEXT: sw a0, 0(a2)
; RV32ZBB-NEXT: snez a3, a0
; RV32ZBB-NEXT: neg a4, a1
; RV32ZBB-NEXT: sub a3, a4, a3
-; RV32ZBB-NEXT: neg a0, a0
+; RV32ZBB-NEXT: neg a4, a0
+; RV32ZBB-NEXT: sw a0, 0(a2)
; RV32ZBB-NEXT: sw a1, 4(a2)
+; RV32ZBB-NEXT: mv a0, a4
; RV32ZBB-NEXT: mv a1, a3
; RV32ZBB-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/nontemporal.ll b/llvm/test/CodeGen/RISCV/nontemporal.ll
index 55bd32e4857345..ada0758831a529 100644
--- a/llvm/test/CodeGen/RISCV/nontemporal.ll
+++ b/llvm/test/CodeGen/RISCV/nontemporal.ll
@@ -327,53 +327,53 @@ define <16 x i8> @test_nontemporal_load_v16i8(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_load_v16i8:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: ld a2, 8(a1)
+; CHECK-RV64-NEXT: ld a2, 0(a1)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: ld a1, 0(a1)
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ld a1, 8(a1)
+; CHECK-RV64-NEXT: sd a2, 0(a0)
+; CHECK-RV64-NEXT: sd a1, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_load_v16i8:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_load_v16i8:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: ld a2, 8(a1)
+; CHECK-RV64C-NEXT: ld a2, 0(a1)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: ld a1, 0(a1)
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: ld a1, 8(a1)
+; CHECK-RV64C-NEXT: sd a2, 0(a0)
+; CHECK-RV64C-NEXT: sd a1, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_load_v16i8:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_load_v16i8:
@@ -398,53 +398,53 @@ define <8 x i16> @test_nontemporal_load_v8i16(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_load_v8i16:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: ld a2, 8(a1)
+; CHECK-RV64-NEXT: ld a2, 0(a1)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: ld a1, 0(a1)
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ld a1, 8(a1)
+; CHECK-RV64-NEXT: sd a2, 0(a0)
+; CHECK-RV64-NEXT: sd a1, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_load_v8i16:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_load_v8i16:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: ld a2, 8(a1)
+; CHECK-RV64C-NEXT: ld a2, 0(a1)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: ld a1, 0(a1)
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: ld a1, 8(a1)
+; CHECK-RV64C-NEXT: sd a2, 0(a0)
+; CHECK-RV64C-NEXT: sd a1, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_load_v8i16:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_load_v8i16:
@@ -469,53 +469,53 @@ define <4 x i32> @test_nontemporal_load_v4i32(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_load_v4i32:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: ld a2, 8(a1)
+; CHECK-RV64-NEXT: ld a2, 0(a1)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: ld a1, 0(a1)
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ld a1, 8(a1)
+; CHECK-RV64-NEXT: sd a2, 0(a0)
+; CHECK-RV64-NEXT: sd a1, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_load_v4i32:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_load_v4i32:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: ld a2, 8(a1)
+; CHECK-RV64C-NEXT: ld a2, 0(a1)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: ld a1, 0(a1)
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: ld a1, 8(a1)
+; CHECK-RV64C-NEXT: sd a2, 0(a0)
+; CHECK-RV64C-NEXT: sd a1, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_load_v4i32:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_load_v4i32:
@@ -549,17 +549,17 @@ define <2 x i64> @test_nontemporal_load_v2i64(ptr %p) {
; CHECK-RV32-LABEL: test_nontemporal_load_v2i64:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_load_v2i64:
@@ -574,17 +574,17 @@ define <2 x i64> @test_nontemporal_load_v2i64(ptr %p) {
; CHECK-RV32C-LABEL: test_nontemporal_load_v2i64:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_load_v2i64:
@@ -615,9 +615,9 @@ define void @test_nontemporal_store_i64(ptr %p, i64 %v) {
; CHECK-RV32-LABEL: test_nontemporal_store_i64:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a2, 4(a0)
-; CHECK-RV32-NEXT: ntl.all
; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: ntl.all
+; CHECK-RV32-NEXT: sw a2, 4(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_store_i64:
@@ -629,9 +629,9 @@ define void @test_nontemporal_store_i64(ptr %p, i64 %v) {
; CHECK-RV32C-LABEL: test_nontemporal_store_i64:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a2, 4(a0)
-; CHECK-RV32C-NEXT: c.ntl.all
; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: c.ntl.all
+; CHECK-RV32C-NEXT: sw a2, 4(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_store_i64:
@@ -643,9 +643,9 @@ define void @test_nontemporal_store_i64(ptr %p, i64 %v) {
; CHECK-RV32V-LABEL: test_nontemporal_store_i64:
; CHECK-RV32V: # %bb.0:
; CHECK-RV32V-NEXT: ntl.all
-; CHECK-RV32V-NEXT: sw a2, 4(a0)
-; CHECK-RV32V-NEXT: ntl.all
; CHECK-RV32V-NEXT: sw a1, 0(a0)
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: sw a2, 4(a0)
; CHECK-RV32V-NEXT: ret
store i64 %v, ptr %p, !nontemporal !0
@@ -915,46 +915,46 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64-NEXT: lbu a7, 40(a1)
; CHECK-RV64-NEXT: lbu t0, 48(a1)
; CHECK-RV64-NEXT: lbu t1, 56(a1)
-; CHECK-RV64-NEXT: lbu t2, 96(a1)
-; CHECK-RV64-NEXT: lbu t3, 104(a1)
-; CHECK-RV64-NEXT: lbu t4, 112(a1)
-; CHECK-RV64-NEXT: lbu t5, 120(a1)
-; CHECK-RV64-NEXT: lbu t6, 64(a1)
-; CHECK-RV64-NEXT: lbu s0, 72(a1)
-; CHECK-RV64-NEXT: lbu s1, 80(a1)
-; CHECK-RV64-NEXT: lbu a1, 88(a1)
+; CHECK-RV64-NEXT: lbu t2, 64(a1)
+; CHECK-RV64-NEXT: lbu t3, 72(a1)
+; CHECK-RV64-NEXT: lbu t4, 80(a1)
+; CHECK-RV64-NEXT: lbu t5, 88(a1)
+; CHECK-RV64-NEXT: lbu t6, 96(a1)
+; CHECK-RV64-NEXT: lbu s0, 104(a1)
+; CHECK-RV64-NEXT: lbu s1, 112(a1)
+; CHECK-RV64-NEXT: lbu a1, 120(a1)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t5, 15(a0)
+; CHECK-RV64-NEXT: sb t6, 12(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t4, 14(a0)
+; CHECK-RV64-NEXT: sb s0, 13(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t3, 13(a0)
+; CHECK-RV64-NEXT: sb s1, 14(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t2, 12(a0)
+; CHECK-RV64-NEXT: sb a1, 15(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb a1, 11(a0)
+; CHECK-RV64-NEXT: sb t2, 8(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb s1, 10(a0)
+; CHECK-RV64-NEXT: sb t3, 9(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb s0, 9(a0)
+; CHECK-RV64-NEXT: sb t4, 10(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t6, 8(a0)
+; CHECK-RV64-NEXT: sb t5, 11(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t1, 7(a0)
-; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t0, 6(a0)
+; CHECK-RV64-NEXT: sb a6, 4(a0)
; CHECK-RV64-NEXT: ntl.all
; CHECK-RV64-NEXT: sb a7, 5(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb a6, 4(a0)
+; CHECK-RV64-NEXT: sb t0, 6(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb a5, 3(a0)
+; CHECK-RV64-NEXT: sb t1, 7(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb a4, 2(a0)
+; CHECK-RV64-NEXT: sb a2, 0(a0)
; CHECK-RV64-NEXT: ntl.all
; CHECK-RV64-NEXT: sb a3, 1(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb a2, 0(a0)
+; CHECK-RV64-NEXT: sb a4, 2(a0)
+; CHECK-RV64-NEXT: ntl.all
+; CHECK-RV64-NEXT: sb a5, 3(a0)
; CHECK-RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; CHECK-RV64-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
; CHECK-RV64-NEXT: addi sp, sp, 16
@@ -976,46 +976,46 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32-NEXT: lbu a7, 20(a1)
; CHECK-RV32-NEXT: lbu t0, 24(a1)
; CHECK-RV32-NEXT: lbu t1, 28(a1)
-; CHECK-RV32-NEXT: lbu t2, 48(a1)
-; CHECK-RV32-NEXT: lbu t3, 52(a1)
-; CHECK-RV32-NEXT: lbu t4, 56(a1)
-; CHECK-RV32-NEXT: lbu t5, 60(a1)
-; CHECK-RV32-NEXT: lbu t6, 32(a1)
-; CHECK-RV32-NEXT: lbu s0, 36(a1)
-; CHECK-RV32-NEXT: lbu s1, 40(a1)
-; CHECK-RV32-NEXT: lbu a1, 44(a1)
+; CHECK-RV32-NEXT: lbu t2, 32(a1)
+; CHECK-RV32-NEXT: lbu t3, 36(a1)
+; CHECK-RV32-NEXT: lbu t4, 40(a1)
+; CHECK-RV32-NEXT: lbu t5, 44(a1)
+; CHECK-RV32-NEXT: lbu t6, 48(a1)
+; CHECK-RV32-NEXT: lbu s0, 52(a1)
+; CHECK-RV32-NEXT: lbu s1, 56(a1)
+; CHECK-RV32-NEXT: lbu a1, 60(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t5, 15(a0)
+; CHECK-RV32-NEXT: sb t6, 12(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t4, 14(a0)
+; CHECK-RV32-NEXT: sb s0, 13(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t3, 13(a0)
+; CHECK-RV32-NEXT: sb s1, 14(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t2, 12(a0)
+; CHECK-RV32-NEXT: sb a1, 15(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb a1, 11(a0)
+; CHECK-RV32-NEXT: sb t2, 8(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb s1, 10(a0)
+; CHECK-RV32-NEXT: sb t3, 9(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb s0, 9(a0)
+; CHECK-RV32-NEXT: sb t4, 10(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t6, 8(a0)
-; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t1, 7(a0)
+; CHECK-RV32-NEXT: sb t5, 11(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t0, 6(a0)
+; CHECK-RV32-NEXT: sb a6, 4(a0)
; CHECK-RV32-NEXT: ntl.all
; CHECK-RV32-NEXT: sb a7, 5(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb a6, 4(a0)
+; CHECK-RV32-NEXT: sb t0, 6(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb a5, 3(a0)
+; CHECK-RV32-NEXT: sb t1, 7(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb a4, 2(a0)
+; CHECK-RV32-NEXT: sb a2, 0(a0)
; CHECK-RV32-NEXT: ntl.all
; CHECK-RV32-NEXT: sb a3, 1(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb a2, 0(a0)
+; CHECK-RV32-NEXT: sb a4, 2(a0)
+; CHECK-RV32-NEXT: ntl.all
+; CHECK-RV32-NEXT: sb a5, 3(a0)
; CHECK-RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: addi sp, sp, 16
@@ -1037,46 +1037,46 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64C-NEXT: lbu t3, 40(a1)
; CHECK-RV64C-NEXT: lbu t4, 48(a1)
; CHECK-RV64C-NEXT: lbu t5, 56(a1)
-; CHECK-RV64C-NEXT: lbu a2, 96(a1)
-; CHECK-RV64C-NEXT: lbu a3, 104(a1)
-; CHECK-RV64C-NEXT: lbu a4, 112(a1)
-; CHECK-RV64C-NEXT: lbu a5, 120(a1)
; CHECK-RV64C-NEXT: lbu t6, 64(a1)
-; CHECK-RV64C-NEXT: lbu s0, 72(a1)
-; CHECK-RV64C-NEXT: lbu s1, 80(a1)
-; CHECK-RV64C-NEXT: lbu a1, 88(a1)
-; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a5, 15(a0)
-; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a4, 14(a0)
-; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a3, 13(a0)
+; CHECK-RV64C-NEXT: lbu a3, 72(a1)
+; CHECK-RV64C-NEXT: lbu a4, 80(a1)
+; CHECK-RV64C-NEXT: lbu a5, 88(a1)
+; CHECK-RV64C-NEXT: lbu a2, 96(a1)
+; CHECK-RV64C-NEXT: lbu s0, 104(a1)
+; CHECK-RV64C-NEXT: lbu s1, 112(a1)
+; CHECK-RV64C-NEXT: lbu a1, 120(a1)
; CHECK-RV64C-NEXT: c.ntl.all
; CHECK-RV64C-NEXT: sb a2, 12(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a1, 11(a0)
+; CHECK-RV64C-NEXT: sb s0, 13(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb s1, 10(a0)
+; CHECK-RV64C-NEXT: sb s1, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb s0, 9(a0)
+; CHECK-RV64C-NEXT: sb a1, 15(a0)
; CHECK-RV64C-NEXT: c.ntl.all
; CHECK-RV64C-NEXT: sb t6, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb t5, 7(a0)
+; CHECK-RV64C-NEXT: sb a3, 9(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb t4, 6(a0)
+; CHECK-RV64C-NEXT: sb a4, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb t3, 5(a0)
+; CHECK-RV64C-NEXT: sb a5, 11(a0)
; CHECK-RV64C-NEXT: c.ntl.all
; CHECK-RV64C-NEXT: sb t2, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb t1, 3(a0)
+; CHECK-RV64C-NEXT: sb t3, 5(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb t0, 2(a0)
+; CHECK-RV64C-NEXT: sb t4, 6(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a7, 1(a0)
+; CHECK-RV64C-NEXT: sb t5, 7(a0)
; CHECK-RV64C-NEXT: c.ntl.all
; CHECK-RV64C-NEXT: sb a6, 0(a0)
+; CHECK-RV64C-NEXT: c.ntl.all
+; CHECK-RV64C-NEXT: sb a7, 1(a0)
+; CHECK-RV64C-NEXT: c.ntl.all
+; CHECK-RV64C-NEXT: sb t0, 2(a0)
+; CHECK-RV64C-NEXT: c.ntl.all
+; CHECK-RV64C-NEXT: sb t1, 3(a0)
; CHECK-RV64C-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; CHECK-RV64C-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
; CHECK-RV64C-NEXT: addi sp, sp, 16
@@ -1098,46 +1098,46 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32C-NEXT: lbu t3, 20(a1)
; CHECK-RV32C-NEXT: lbu t4, 24(a1)
; CHECK-RV32C-NEXT: lbu t5, 28(a1)
-; CHECK-RV32C-NEXT: lbu a2, 48(a1)
-; CHECK-RV32C-NEXT: lbu a3, 52(a1)
-; CHECK-RV32C-NEXT: lbu a4, 56(a1)
-; CHECK-RV32C-NEXT: lbu a5, 60(a1)
; CHECK-RV32C-NEXT: lbu t6, 32(a1)
-; CHECK-RV32C-NEXT: lbu s0, 36(a1)
-; CHECK-RV32C-NEXT: lbu s1, 40(a1)
-; CHECK-RV32C-NEXT: lbu a1, 44(a1)
-; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a5, 15(a0)
-; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a4, 14(a0)
-; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a3, 13(a0)
+; CHECK-RV32C-NEXT: lbu a3, 36(a1)
+; CHECK-RV32C-NEXT: lbu a4, 40(a1)
+; CHECK-RV32C-NEXT: lbu a5, 44(a1)
+; CHECK-RV32C-NEXT: lbu a2, 48(a1)
+; CHECK-RV32C-NEXT: lbu s0, 52(a1)
+; CHECK-RV32C-NEXT: lbu s1, 56(a1)
+; CHECK-RV32C-NEXT: lbu a1, 60(a1)
; CHECK-RV32C-NEXT: c.ntl.all
; CHECK-RV32C-NEXT: sb a2, 12(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a1, 11(a0)
+; CHECK-RV32C-NEXT: sb s0, 13(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb s1, 10(a0)
+; CHECK-RV32C-NEXT: sb s1, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb s0, 9(a0)
+; CHECK-RV32C-NEXT: sb a1, 15(a0)
; CHECK-RV32C-NEXT: c.ntl.all
; CHECK-RV32C-NEXT: sb t6, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb t5, 7(a0)
+; CHECK-RV32C-NEXT: sb a3, 9(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb t4, 6(a0)
+; CHECK-RV32C-NEXT: sb a4, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb t3, 5(a0)
+; CHECK-RV32C-NEXT: sb a5, 11(a0)
; CHECK-RV32C-NEXT: c.ntl.all
; CHECK-RV32C-NEXT: sb t2, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb t1, 3(a0)
+; CHECK-RV32C-NEXT: sb t3, 5(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb t0, 2(a0)
+; CHECK-RV32C-NEXT: sb t4, 6(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a7, 1(a0)
+; CHECK-RV32C-NEXT: sb t5, 7(a0)
; CHECK-RV32C-NEXT: c.ntl.all
; CHECK-RV32C-NEXT: sb a6, 0(a0)
+; CHECK-RV32C-NEXT: c.ntl.all
+; CHECK-RV32C-NEXT: sb a7, 1(a0)
+; CHECK-RV32C-NEXT: c.ntl.all
+; CHECK-RV32C-NEXT: sb t0, 2(a0)
+; CHECK-RV32C-NEXT: c.ntl.all
+; CHECK-RV32C-NEXT: sb t1, 3(a0)
; CHECK-RV32C-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; CHECK-RV32C-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; CHECK-RV32C-NEXT: addi sp, sp, 16
@@ -1163,114 +1163,114 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) {
define void @test_nontemporal_store_v8i16(ptr %p, <8 x i16> %v) {
; CHECK-RV64-LABEL: test_nontemporal_store_v8i16:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lh a2, 32(a1)
-; CHECK-RV64-NEXT: lh a3, 40(a1)
-; CHECK-RV64-NEXT: lh a4, 48(a1)
-; CHECK-RV64-NEXT: lh a5, 56(a1)
-; CHECK-RV64-NEXT: lh a6, 0(a1)
-; CHECK-RV64-NEXT: lh a7, 8(a1)
-; CHECK-RV64-NEXT: lh t0, 16(a1)
-; CHECK-RV64-NEXT: lh a1, 24(a1)
+; CHECK-RV64-NEXT: lh a2, 0(a1)
+; CHECK-RV64-NEXT: lh a3, 8(a1)
+; CHECK-RV64-NEXT: lh a4, 16(a1)
+; CHECK-RV64-NEXT: lh a5, 24(a1)
+; CHECK-RV64-NEXT: lh a6, 32(a1)
+; CHECK-RV64-NEXT: lh a7, 40(a1)
+; CHECK-RV64-NEXT: lh t0, 48(a1)
+; CHECK-RV64-NEXT: lh a1, 56(a1)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a5, 14(a0)
+; CHECK-RV64-NEXT: sh a6, 8(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a4, 12(a0)
+; CHECK-RV64-NEXT: sh a7, 10(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a3, 10(a0)
+; CHECK-RV64-NEXT: sh t0, 12(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a2, 8(a0)
+; CHECK-RV64-NEXT: sh a1, 14(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a1, 6(a0)
+; CHECK-RV64-NEXT: sh a2, 0(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh t0, 4(a0)
+; CHECK-RV64-NEXT: sh a3, 2(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a7, 2(a0)
+; CHECK-RV64-NEXT: sh a4, 4(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a6, 0(a0)
+; CHECK-RV64-NEXT: sh a5, 6(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_store_v8i16:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lh a2, 16(a1)
-; CHECK-RV32-NEXT: lh a3, 20(a1)
-; CHECK-RV32-NEXT: lh a4, 24(a1)
-; CHECK-RV32-NEXT: lh a5, 28(a1)
-; CHECK-RV32-NEXT: lh a6, 0(a1)
-; CHECK-RV32-NEXT: lh a7, 4(a1)
-; CHECK-RV32-NEXT: lh t0, 8(a1)
-; CHECK-RV32-NEXT: lh a1, 12(a1)
+; CHECK-RV32-NEXT: lh a2, 0(a1)
+; CHECK-RV32-NEXT: lh a3, 4(a1)
+; CHECK-RV32-NEXT: lh a4, 8(a1)
+; CHECK-RV32-NEXT: lh a5, 12(a1)
+; CHECK-RV32-NEXT: lh a6, 16(a1)
+; CHECK-RV32-NEXT: lh a7, 20(a1)
+; CHECK-RV32-NEXT: lh t0, 24(a1)
+; CHECK-RV32-NEXT: lh a1, 28(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a5, 14(a0)
+; CHECK-RV32-NEXT: sh a6, 8(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a4, 12(a0)
+; CHECK-RV32-NEXT: sh a7, 10(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a3, 10(a0)
+; CHECK-RV32-NEXT: sh t0, 12(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a2, 8(a0)
+; CHECK-RV32-NEXT: sh a1, 14(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a1, 6(a0)
+; CHECK-RV32-NEXT: sh a2, 0(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh t0, 4(a0)
+; CHECK-RV32-NEXT: sh a3, 2(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a7, 2(a0)
+; CHECK-RV32-NEXT: sh a4, 4(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a6, 0(a0)
+; CHECK-RV32-NEXT: sh a5, 6(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_store_v8i16:
; CHECK-RV64C: # %bb.0:
-; CHECK-RV64C-NEXT: lh a7, 32(a1)
+; CHECK-RV64C-NEXT: lh a6, 0(a1)
+; CHECK-RV64C-NEXT: lh a7, 8(a1)
+; CHECK-RV64C-NEXT: lh t0, 16(a1)
+; CHECK-RV64C-NEXT: lh a5, 24(a1)
+; CHECK-RV64C-NEXT: lh a2, 32(a1)
; CHECK-RV64C-NEXT: lh a3, 40(a1)
; CHECK-RV64C-NEXT: lh a4, 48(a1)
-; CHECK-RV64C-NEXT: lh a5, 56(a1)
-; CHECK-RV64C-NEXT: lh a6, 0(a1)
-; CHECK-RV64C-NEXT: lh t0, 8(a1)
-; CHECK-RV64C-NEXT: lh a2, 16(a1)
-; CHECK-RV64C-NEXT: lh a1, 24(a1)
+; CHECK-RV64C-NEXT: lh a1, 56(a1)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a5, 14(a0)
-; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a4, 12(a0)
+; CHECK-RV64C-NEXT: sh a2, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.all
; CHECK-RV64C-NEXT: sh a3, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a7, 8(a0)
+; CHECK-RV64C-NEXT: sh a4, 12(a0)
+; CHECK-RV64C-NEXT: c.ntl.all
+; CHECK-RV64C-NEXT: sh a1, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a1, 6(a0)
+; CHECK-RV64C-NEXT: sh a6, 0(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a2, 4(a0)
+; CHECK-RV64C-NEXT: sh a7, 2(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh t0, 2(a0)
+; CHECK-RV64C-NEXT: sh t0, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a6, 0(a0)
+; CHECK-RV64C-NEXT: sh a5, 6(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_store_v8i16:
; CHECK-RV32C: # %bb.0:
-; CHECK-RV32C-NEXT: lh a7, 16(a1)
+; CHECK-RV32C-NEXT: lh a6, 0(a1)
+; CHECK-RV32C-NEXT: lh a7, 4(a1)
+; CHECK-RV32C-NEXT: lh t0, 8(a1)
+; CHECK-RV32C-NEXT: lh a5, 12(a1)
+; CHECK-RV32C-NEXT: lh a2, 16(a1)
; CHECK-RV32C-NEXT: lh a3, 20(a1)
; CHECK-RV32C-NEXT: lh a4, 24(a1)
-; CHECK-RV32C-NEXT: lh a5, 28(a1)
-; CHECK-RV32C-NEXT: lh a6, 0(a1)
-; CHECK-RV32C-NEXT: lh t0, 4(a1)
-; CHECK-RV32C-NEXT: lh a2, 8(a1)
-; CHECK-RV32C-NEXT: lh a1, 12(a1)
+; CHECK-RV32C-NEXT: lh a1, 28(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a5, 14(a0)
-; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a4, 12(a0)
+; CHECK-RV32C-NEXT: sh a2, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.all
; CHECK-RV32C-NEXT: sh a3, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a7, 8(a0)
+; CHECK-RV32C-NEXT: sh a4, 12(a0)
+; CHECK-RV32C-NEXT: c.ntl.all
+; CHECK-RV32C-NEXT: sh a1, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a1, 6(a0)
+; CHECK-RV32C-NEXT: sh a6, 0(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a2, 4(a0)
+; CHECK-RV32C-NEXT: sh a7, 2(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh t0, 2(a0)
+; CHECK-RV32C-NEXT: sh t0, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a6, 0(a0)
+; CHECK-RV32C-NEXT: sh a5, 6(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_store_v8i16:
@@ -1293,66 +1293,66 @@ define void @test_nontemporal_store_v8i16(ptr %p, <8 x i16> %v) {
define void @test_nontemporal_store_v4i32(ptr %p, <4 x i32> %v) {
; CHECK-RV64-LABEL: test_nontemporal_store_v4i32:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lw a2, 24(a1)
-; CHECK-RV64-NEXT: lw a3, 16(a1)
-; CHECK-RV64-NEXT: lw a4, 8(a1)
-; CHECK-RV64-NEXT: lw a1, 0(a1)
+; CHECK-RV64-NEXT: lw a2, 0(a1)
+; CHECK-RV64-NEXT: lw a3, 8(a1)
+; CHECK-RV64-NEXT: lw a4, 16(a1)
+; CHECK-RV64-NEXT: lw a1, 24(a1)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sw a2, 12(a0)
+; CHECK-RV64-NEXT: sw a2, 0(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sw a3, 8(a0)
+; CHECK-RV64-NEXT: sw a3, 4(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sw a4, 4(a0)
+; CHECK-RV64-NEXT: sw a4, 8(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sw a1, 0(a0)
+; CHECK-RV64-NEXT: sw a1, 12(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_store_v4i32:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lw a2, 12(a1)
-; CHECK-RV32-NEXT: lw a3, 8(a1)
-; CHECK-RV32-NEXT: lw a4, 4(a1)
-; CHECK-RV32-NEXT: lw a1, 0(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a2, 12(a0)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a3, 8(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a4, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_store_v4i32:
; CHECK-RV64C: # %bb.0:
-; CHECK-RV64C-NEXT: lw a2, 24(a1)
-; CHECK-RV64C-NEXT: lw a3, 16(a1)
-; CHECK-RV64C-NEXT: lw a4, 8(a1)
-; CHECK-RV64C-NEXT: lw a1, 0(a1)
+; CHECK-RV64C-NEXT: lw a2, 0(a1)
+; CHECK-RV64C-NEXT: lw a3, 8(a1)
+; CHECK-RV64C-NEXT: lw a4, 16(a1)
+; CHECK-RV64C-NEXT: lw a1, 24(a1)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sw a2, 12(a0)
+; CHECK-RV64C-NEXT: sw a2, 0(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sw a3, 8(a0)
+; CHECK-RV64C-NEXT: sw a3, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sw a4, 4(a0)
+; CHECK-RV64C-NEXT: sw a4, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sw a1, 0(a0)
+; CHECK-RV64C-NEXT: sw a1, 12(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_store_v4i32:
; CHECK-RV32C: # %bb.0:
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_store_v4i32:
@@ -1376,49 +1376,49 @@ define void @test_nontemporal_store_v2i64(ptr %p, <2 x i64> %v) {
; CHECK-RV64-LABEL: test_nontemporal_store_v2i64:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: ntl.all
; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ntl.all
+; CHECK-RV64-NEXT: sd a2, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_store_v2i64:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lw a2, 12(a1)
-; CHECK-RV32-NEXT: lw a3, 8(a1)
-; CHECK-RV32-NEXT: lw a4, 4(a1)
-; CHECK-RV32-NEXT: lw a1, 0(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a2, 12(a0)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a3, 8(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a4, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_store_v2i64:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: c.ntl.all
; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: c.ntl.all
+; CHECK-RV64C-NEXT: sd a2, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_store_v2i64:
; CHECK-RV32C: # %bb.0:
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_store_v2i64:
@@ -1752,53 +1752,53 @@ define <16 x i8> @test_nontemporal_P1_load_v16i8(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_P1_load_v16i8:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: ld a2, 8(a1)
+; CHECK-RV64-NEXT: ld a2, 0(a1)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: ld a1, 0(a1)
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ld a1, 8(a1)
+; CHECK-RV64-NEXT: sd a2, 0(a0)
+; CHECK-RV64-NEXT: sd a1, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_P1_load_v16i8:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_P1_load_v16i8:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: ld a2, 8(a1)
+; CHECK-RV64C-NEXT: ld a2, 0(a1)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: ld a1, 0(a1)
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: ld a1, 8(a1)
+; CHECK-RV64C-NEXT: sd a2, 0(a0)
+; CHECK-RV64C-NEXT: sd a1, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_P1_load_v16i8:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_P1_load_v16i8:
@@ -1822,53 +1822,53 @@ define <8 x i16> @test_nontemporal_P1_load_v8i16(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_P1_load_v8i16:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: ld a2, 8(a1)
+; CHECK-RV64-NEXT: ld a2, 0(a1)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: ld a1, 0(a1)
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ld a1, 8(a1)
+; CHECK-RV64-NEXT: sd a2, 0(a0)
+; CHECK-RV64-NEXT: sd a1, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_P1_load_v8i16:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_P1_load_v8i16:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: ld a2, 8(a1)
+; CHECK-RV64C-NEXT: ld a2, 0(a1)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: ld a1, 0(a1)
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: ld a1, 8(a1)
+; CHECK-RV64C-NEXT: sd a2, 0(a0)
+; CHECK-RV64C-NEXT: sd a1, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_P1_load_v8i16:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_P1_load_v8i16:
@@ -1892,53 +1892,53 @@ define <4 x i32> @test_nontemporal_P1_load_v4i32(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_P1_load_v4i32:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: ld a2, 8(a1)
+; CHECK-RV64-NEXT: ld a2, 0(a1)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: ld a1, 0(a1)
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ld a1, 8(a1)
+; CHECK-RV64-NEXT: sd a2, 0(a0)
+; CHECK-RV64-NEXT: sd a1, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_P1_load_v4i32:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_P1_load_v4i32:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: ld a2, 8(a1)
+; CHECK-RV64C-NEXT: ld a2, 0(a1)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: ld a1, 0(a1)
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: ld a1, 8(a1)
+; CHECK-RV64C-NEXT: sd a2, 0(a0)
+; CHECK-RV64C-NEXT: sd a1, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_P1_load_v4i32:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_P1_load_v4i32:
@@ -1971,17 +1971,17 @@ define <2 x i64> @test_nontemporal_P1_load_v2i64(ptr %p) {
; CHECK-RV32-LABEL: test_nontemporal_P1_load_v2i64:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_P1_load_v2i64:
@@ -1996,17 +1996,17 @@ define <2 x i64> @test_nontemporal_P1_load_v2i64(ptr %p) {
; CHECK-RV32C-LABEL: test_nontemporal_P1_load_v2i64:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_P1_load_v2i64:
@@ -2036,9 +2036,9 @@ define void @test_nontemporal_P1_store_i64(ptr %p, i64 %v) {
; CHECK-RV32-LABEL: test_nontemporal_P1_store_i64:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sw a2, 4(a0)
-; CHECK-RV32-NEXT: ntl.p1
; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: ntl.p1
+; CHECK-RV32-NEXT: sw a2, 4(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_P1_store_i64:
@@ -2050,9 +2050,9 @@ define void @test_nontemporal_P1_store_i64(ptr %p, i64 %v) {
; CHECK-RV32C-LABEL: test_nontemporal_P1_store_i64:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sw a2, 4(a0)
-; CHECK-RV32C-NEXT: c.ntl.p1
; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: c.ntl.p1
+; CHECK-RV32C-NEXT: sw a2, 4(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_P1_store_i64:
@@ -2064,9 +2064,9 @@ define void @test_nontemporal_P1_store_i64(ptr %p, i64 %v) {
; CHECK-RV32V-LABEL: test_nontemporal_P1_store_i64:
; CHECK-RV32V: # %bb.0:
; CHECK-RV32V-NEXT: ntl.p1
-; CHECK-RV32V-NEXT: sw a2, 4(a0)
-; CHECK-RV32V-NEXT: ntl.p1
; CHECK-RV32V-NEXT: sw a1, 0(a0)
+; CHECK-RV32V-NEXT: ntl.p1
+; CHECK-RV32V-NEXT: sw a2, 4(a0)
; CHECK-RV32V-NEXT: ret
store i64 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
ret void
@@ -2329,46 +2329,46 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64-NEXT: lbu a7, 40(a1)
; CHECK-RV64-NEXT: lbu t0, 48(a1)
; CHECK-RV64-NEXT: lbu t1, 56(a1)
-; CHECK-RV64-NEXT: lbu t2, 96(a1)
-; CHECK-RV64-NEXT: lbu t3, 104(a1)
-; CHECK-RV64-NEXT: lbu t4, 112(a1)
-; CHECK-RV64-NEXT: lbu t5, 120(a1)
-; CHECK-RV64-NEXT: lbu t6, 64(a1)
-; CHECK-RV64-NEXT: lbu s0, 72(a1)
-; CHECK-RV64-NEXT: lbu s1, 80(a1)
-; CHECK-RV64-NEXT: lbu a1, 88(a1)
+; CHECK-RV64-NEXT: lbu t2, 64(a1)
+; CHECK-RV64-NEXT: lbu t3, 72(a1)
+; CHECK-RV64-NEXT: lbu t4, 80(a1)
+; CHECK-RV64-NEXT: lbu t5, 88(a1)
+; CHECK-RV64-NEXT: lbu t6, 96(a1)
+; CHECK-RV64-NEXT: lbu s0, 104(a1)
+; CHECK-RV64-NEXT: lbu s1, 112(a1)
+; CHECK-RV64-NEXT: lbu a1, 120(a1)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb t5, 15(a0)
+; CHECK-RV64-NEXT: sb t6, 12(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb t4, 14(a0)
+; CHECK-RV64-NEXT: sb s0, 13(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb t3, 13(a0)
+; CHECK-RV64-NEXT: sb s1, 14(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb t2, 12(a0)
+; CHECK-RV64-NEXT: sb a1, 15(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb a1, 11(a0)
+; CHECK-RV64-NEXT: sb t2, 8(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb s1, 10(a0)
+; CHECK-RV64-NEXT: sb t3, 9(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb s0, 9(a0)
+; CHECK-RV64-NEXT: sb t4, 10(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb t6, 8(a0)
-; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb t1, 7(a0)
+; CHECK-RV64-NEXT: sb t5, 11(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb t0, 6(a0)
+; CHECK-RV64-NEXT: sb a6, 4(a0)
; CHECK-RV64-NEXT: ntl.p1
; CHECK-RV64-NEXT: sb a7, 5(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb a6, 4(a0)
+; CHECK-RV64-NEXT: sb t0, 6(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb a5, 3(a0)
+; CHECK-RV64-NEXT: sb t1, 7(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb a4, 2(a0)
+; CHECK-RV64-NEXT: sb a2, 0(a0)
; CHECK-RV64-NEXT: ntl.p1
; CHECK-RV64-NEXT: sb a3, 1(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb a2, 0(a0)
+; CHECK-RV64-NEXT: sb a4, 2(a0)
+; CHECK-RV64-NEXT: ntl.p1
+; CHECK-RV64-NEXT: sb a5, 3(a0)
; CHECK-RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; CHECK-RV64-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
; CHECK-RV64-NEXT: addi sp, sp, 16
@@ -2390,46 +2390,46 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32-NEXT: lbu a7, 20(a1)
; CHECK-RV32-NEXT: lbu t0, 24(a1)
; CHECK-RV32-NEXT: lbu t1, 28(a1)
-; CHECK-RV32-NEXT: lbu t2, 48(a1)
-; CHECK-RV32-NEXT: lbu t3, 52(a1)
-; CHECK-RV32-NEXT: lbu t4, 56(a1)
-; CHECK-RV32-NEXT: lbu t5, 60(a1)
-; CHECK-RV32-NEXT: lbu t6, 32(a1)
-; CHECK-RV32-NEXT: lbu s0, 36(a1)
-; CHECK-RV32-NEXT: lbu s1, 40(a1)
-; CHECK-RV32-NEXT: lbu a1, 44(a1)
+; CHECK-RV32-NEXT: lbu t2, 32(a1)
+; CHECK-RV32-NEXT: lbu t3, 36(a1)
+; CHECK-RV32-NEXT: lbu t4, 40(a1)
+; CHECK-RV32-NEXT: lbu t5, 44(a1)
+; CHECK-RV32-NEXT: lbu t6, 48(a1)
+; CHECK-RV32-NEXT: lbu s0, 52(a1)
+; CHECK-RV32-NEXT: lbu s1, 56(a1)
+; CHECK-RV32-NEXT: lbu a1, 60(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb t5, 15(a0)
+; CHECK-RV32-NEXT: sb t6, 12(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb t4, 14(a0)
+; CHECK-RV32-NEXT: sb s0, 13(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb t3, 13(a0)
+; CHECK-RV32-NEXT: sb s1, 14(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb t2, 12(a0)
+; CHECK-RV32-NEXT: sb a1, 15(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb a1, 11(a0)
+; CHECK-RV32-NEXT: sb t2, 8(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb s1, 10(a0)
+; CHECK-RV32-NEXT: sb t3, 9(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb s0, 9(a0)
+; CHECK-RV32-NEXT: sb t4, 10(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb t6, 8(a0)
+; CHECK-RV32-NEXT: sb t5, 11(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb t1, 7(a0)
-; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb t0, 6(a0)
+; CHECK-RV32-NEXT: sb a6, 4(a0)
; CHECK-RV32-NEXT: ntl.p1
; CHECK-RV32-NEXT: sb a7, 5(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb a6, 4(a0)
+; CHECK-RV32-NEXT: sb t0, 6(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb a5, 3(a0)
+; CHECK-RV32-NEXT: sb t1, 7(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb a4, 2(a0)
+; CHECK-RV32-NEXT: sb a2, 0(a0)
; CHECK-RV32-NEXT: ntl.p1
; CHECK-RV32-NEXT: sb a3, 1(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb a2, 0(a0)
+; CHECK-RV32-NEXT: sb a4, 2(a0)
+; CHECK-RV32-NEXT: ntl.p1
+; CHECK-RV32-NEXT: sb a5, 3(a0)
; CHECK-RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: addi sp, sp, 16
@@ -2451,46 +2451,46 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64C-NEXT: lbu t3, 40(a1)
; CHECK-RV64C-NEXT: lbu t4, 48(a1)
; CHECK-RV64C-NEXT: lbu t5, 56(a1)
-; CHECK-RV64C-NEXT: lbu a2, 96(a1)
-; CHECK-RV64C-NEXT: lbu a3, 104(a1)
-; CHECK-RV64C-NEXT: lbu a4, 112(a1)
-; CHECK-RV64C-NEXT: lbu a5, 120(a1)
; CHECK-RV64C-NEXT: lbu t6, 64(a1)
-; CHECK-RV64C-NEXT: lbu s0, 72(a1)
-; CHECK-RV64C-NEXT: lbu s1, 80(a1)
-; CHECK-RV64C-NEXT: lbu a1, 88(a1)
-; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb a5, 15(a0)
-; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb a4, 14(a0)
-; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb a3, 13(a0)
+; CHECK-RV64C-NEXT: lbu a3, 72(a1)
+; CHECK-RV64C-NEXT: lbu a4, 80(a1)
+; CHECK-RV64C-NEXT: lbu a5, 88(a1)
+; CHECK-RV64C-NEXT: lbu a2, 96(a1)
+; CHECK-RV64C-NEXT: lbu s0, 104(a1)
+; CHECK-RV64C-NEXT: lbu s1, 112(a1)
+; CHECK-RV64C-NEXT: lbu a1, 120(a1)
; CHECK-RV64C-NEXT: c.ntl.p1
; CHECK-RV64C-NEXT: sb a2, 12(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb a1, 11(a0)
+; CHECK-RV64C-NEXT: sb s0, 13(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb s1, 10(a0)
+; CHECK-RV64C-NEXT: sb s1, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb s0, 9(a0)
+; CHECK-RV64C-NEXT: sb a1, 15(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
; CHECK-RV64C-NEXT: sb t6, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb t5, 7(a0)
+; CHECK-RV64C-NEXT: sb a3, 9(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb t4, 6(a0)
+; CHECK-RV64C-NEXT: sb a4, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb t3, 5(a0)
+; CHECK-RV64C-NEXT: sb a5, 11(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
; CHECK-RV64C-NEXT: sb t2, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb t1, 3(a0)
+; CHECK-RV64C-NEXT: sb t3, 5(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb t0, 2(a0)
+; CHECK-RV64C-NEXT: sb t4, 6(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb a7, 1(a0)
+; CHECK-RV64C-NEXT: sb t5, 7(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
; CHECK-RV64C-NEXT: sb a6, 0(a0)
+; CHECK-RV64C-NEXT: c.ntl.p1
+; CHECK-RV64C-NEXT: sb a7, 1(a0)
+; CHECK-RV64C-NEXT: c.ntl.p1
+; CHECK-RV64C-NEXT: sb t0, 2(a0)
+; CHECK-RV64C-NEXT: c.ntl.p1
+; CHECK-RV64C-NEXT: sb t1, 3(a0)
; CHECK-RV64C-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; CHECK-RV64C-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
; CHECK-RV64C-NEXT: addi sp, sp, 16
@@ -2512,46 +2512,46 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32C-NEXT: lbu t3, 20(a1)
; CHECK-RV32C-NEXT: lbu t4, 24(a1)
; CHECK-RV32C-NEXT: lbu t5, 28(a1)
-; CHECK-RV32C-NEXT: lbu a2, 48(a1)
-; CHECK-RV32C-NEXT: lbu a3, 52(a1)
-; CHECK-RV32C-NEXT: lbu a4, 56(a1)
-; CHECK-RV32C-NEXT: lbu a5, 60(a1)
; CHECK-RV32C-NEXT: lbu t6, 32(a1)
-; CHECK-RV32C-NEXT: lbu s0, 36(a1)
-; CHECK-RV32C-NEXT: lbu s1, 40(a1)
-; CHECK-RV32C-NEXT: lbu a1, 44(a1)
-; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb a5, 15(a0)
-; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb a4, 14(a0)
-; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb a3, 13(a0)
+; CHECK-RV32C-NEXT: lbu a3, 36(a1)
+; CHECK-RV32C-NEXT: lbu a4, 40(a1)
+; CHECK-RV32C-NEXT: lbu a5, 44(a1)
+; CHECK-RV32C-NEXT: lbu a2, 48(a1)
+; CHECK-RV32C-NEXT: lbu s0, 52(a1)
+; CHECK-RV32C-NEXT: lbu s1, 56(a1)
+; CHECK-RV32C-NEXT: lbu a1, 60(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
; CHECK-RV32C-NEXT: sb a2, 12(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb a1, 11(a0)
+; CHECK-RV32C-NEXT: sb s0, 13(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb s1, 10(a0)
+; CHECK-RV32C-NEXT: sb s1, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb s0, 9(a0)
+; CHECK-RV32C-NEXT: sb a1, 15(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
; CHECK-RV32C-NEXT: sb t6, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb t5, 7(a0)
+; CHECK-RV32C-NEXT: sb a3, 9(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb t4, 6(a0)
+; CHECK-RV32C-NEXT: sb a4, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb t3, 5(a0)
+; CHECK-RV32C-NEXT: sb a5, 11(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
; CHECK-RV32C-NEXT: sb t2, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb t1, 3(a0)
+; CHECK-RV32C-NEXT: sb t3, 5(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb t0, 2(a0)
+; CHECK-RV32C-NEXT: sb t4, 6(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb a7, 1(a0)
+; CHECK-RV32C-NEXT: sb t5, 7(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
; CHECK-RV32C-NEXT: sb a6, 0(a0)
+; CHECK-RV32C-NEXT: c.ntl.p1
+; CHECK-RV32C-NEXT: sb a7, 1(a0)
+; CHECK-RV32C-NEXT: c.ntl.p1
+; CHECK-RV32C-NEXT: sb t0, 2(a0)
+; CHECK-RV32C-NEXT: c.ntl.p1
+; CHECK-RV32C-NEXT: sb t1, 3(a0)
; CHECK-RV32C-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; CHECK-RV32C-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; CHECK-RV32C-NEXT: addi sp, sp, 16
@@ -2577,114 +2577,114 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) {
define void @test_nontemporal_P1_store_v8i16(ptr %p, <8 x i16> %v) {
; CHECK-RV64-LABEL: test_nontemporal_P1_store_v8i16:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lh a2, 32(a1)
-; CHECK-RV64-NEXT: lh a3, 40(a1)
-; CHECK-RV64-NEXT: lh a4, 48(a1)
-; CHECK-RV64-NEXT: lh a5, 56(a1)
-; CHECK-RV64-NEXT: lh a6, 0(a1)
-; CHECK-RV64-NEXT: lh a7, 8(a1)
-; CHECK-RV64-NEXT: lh t0, 16(a1)
-; CHECK-RV64-NEXT: lh a1, 24(a1)
+; CHECK-RV64-NEXT: lh a2, 0(a1)
+; CHECK-RV64-NEXT: lh a3, 8(a1)
+; CHECK-RV64-NEXT: lh a4, 16(a1)
+; CHECK-RV64-NEXT: lh a5, 24(a1)
+; CHECK-RV64-NEXT: lh a6, 32(a1)
+; CHECK-RV64-NEXT: lh a7, 40(a1)
+; CHECK-RV64-NEXT: lh t0, 48(a1)
+; CHECK-RV64-NEXT: lh a1, 56(a1)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sh a5, 14(a0)
+; CHECK-RV64-NEXT: sh a6, 8(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sh a4, 12(a0)
+; CHECK-RV64-NEXT: sh a7, 10(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sh a3, 10(a0)
+; CHECK-RV64-NEXT: sh t0, 12(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sh a2, 8(a0)
+; CHECK-RV64-NEXT: sh a1, 14(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sh a1, 6(a0)
+; CHECK-RV64-NEXT: sh a2, 0(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sh t0, 4(a0)
+; CHECK-RV64-NEXT: sh a3, 2(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sh a7, 2(a0)
+; CHECK-RV64-NEXT: sh a4, 4(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sh a6, 0(a0)
+; CHECK-RV64-NEXT: sh a5, 6(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_P1_store_v8i16:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lh a2, 16(a1)
-; CHECK-RV32-NEXT: lh a3, 20(a1)
-; CHECK-RV32-NEXT: lh a4, 24(a1)
-; CHECK-RV32-NEXT: lh a5, 28(a1)
-; CHECK-RV32-NEXT: lh a6, 0(a1)
-; CHECK-RV32-NEXT: lh a7, 4(a1)
-; CHECK-RV32-NEXT: lh t0, 8(a1)
-; CHECK-RV32-NEXT: lh a1, 12(a1)
+; CHECK-RV32-NEXT: lh a2, 0(a1)
+; CHECK-RV32-NEXT: lh a3, 4(a1)
+; CHECK-RV32-NEXT: lh a4, 8(a1)
+; CHECK-RV32-NEXT: lh a5, 12(a1)
+; CHECK-RV32-NEXT: lh a6, 16(a1)
+; CHECK-RV32-NEXT: lh a7, 20(a1)
+; CHECK-RV32-NEXT: lh t0, 24(a1)
+; CHECK-RV32-NEXT: lh a1, 28(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sh a5, 14(a0)
+; CHECK-RV32-NEXT: sh a6, 8(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sh a4, 12(a0)
+; CHECK-RV32-NEXT: sh a7, 10(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sh a3, 10(a0)
+; CHECK-RV32-NEXT: sh t0, 12(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sh a2, 8(a0)
+; CHECK-RV32-NEXT: sh a1, 14(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sh a1, 6(a0)
+; CHECK-RV32-NEXT: sh a2, 0(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sh t0, 4(a0)
+; CHECK-RV32-NEXT: sh a3, 2(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sh a7, 2(a0)
+; CHECK-RV32-NEXT: sh a4, 4(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sh a6, 0(a0)
+; CHECK-RV32-NEXT: sh a5, 6(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_P1_store_v8i16:
; CHECK-RV64C: # %bb.0:
-; CHECK-RV64C-NEXT: lh a7, 32(a1)
+; CHECK-RV64C-NEXT: lh a6, 0(a1)
+; CHECK-RV64C-NEXT: lh a7, 8(a1)
+; CHECK-RV64C-NEXT: lh t0, 16(a1)
+; CHECK-RV64C-NEXT: lh a5, 24(a1)
+; CHECK-RV64C-NEXT: lh a2, 32(a1)
; CHECK-RV64C-NEXT: lh a3, 40(a1)
; CHECK-RV64C-NEXT: lh a4, 48(a1)
-; CHECK-RV64C-NEXT: lh a5, 56(a1)
-; CHECK-RV64C-NEXT: lh a6, 0(a1)
-; CHECK-RV64C-NEXT: lh t0, 8(a1)
-; CHECK-RV64C-NEXT: lh a2, 16(a1)
-; CHECK-RV64C-NEXT: lh a1, 24(a1)
+; CHECK-RV64C-NEXT: lh a1, 56(a1)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sh a5, 14(a0)
-; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sh a4, 12(a0)
+; CHECK-RV64C-NEXT: sh a2, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
; CHECK-RV64C-NEXT: sh a3, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sh a7, 8(a0)
+; CHECK-RV64C-NEXT: sh a4, 12(a0)
+; CHECK-RV64C-NEXT: c.ntl.p1
+; CHECK-RV64C-NEXT: sh a1, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sh a1, 6(a0)
+; CHECK-RV64C-NEXT: sh a6, 0(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sh a2, 4(a0)
+; CHECK-RV64C-NEXT: sh a7, 2(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sh t0, 2(a0)
+; CHECK-RV64C-NEXT: sh t0, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sh a6, 0(a0)
+; CHECK-RV64C-NEXT: sh a5, 6(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_P1_store_v8i16:
; CHECK-RV32C: # %bb.0:
-; CHECK-RV32C-NEXT: lh a7, 16(a1)
+; CHECK-RV32C-NEXT: lh a6, 0(a1)
+; CHECK-RV32C-NEXT: lh a7, 4(a1)
+; CHECK-RV32C-NEXT: lh t0, 8(a1)
+; CHECK-RV32C-NEXT: lh a5, 12(a1)
+; CHECK-RV32C-NEXT: lh a2, 16(a1)
; CHECK-RV32C-NEXT: lh a3, 20(a1)
; CHECK-RV32C-NEXT: lh a4, 24(a1)
-; CHECK-RV32C-NEXT: lh a5, 28(a1)
-; CHECK-RV32C-NEXT: lh a6, 0(a1)
-; CHECK-RV32C-NEXT: lh t0, 4(a1)
-; CHECK-RV32C-NEXT: lh a2, 8(a1)
-; CHECK-RV32C-NEXT: lh a1, 12(a1)
+; CHECK-RV32C-NEXT: lh a1, 28(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sh a5, 14(a0)
-; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sh a4, 12(a0)
+; CHECK-RV32C-NEXT: sh a2, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
; CHECK-RV32C-NEXT: sh a3, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sh a7, 8(a0)
+; CHECK-RV32C-NEXT: sh a4, 12(a0)
+; CHECK-RV32C-NEXT: c.ntl.p1
+; CHECK-RV32C-NEXT: sh a1, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sh a1, 6(a0)
+; CHECK-RV32C-NEXT: sh a6, 0(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sh a2, 4(a0)
+; CHECK-RV32C-NEXT: sh a7, 2(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sh t0, 2(a0)
+; CHECK-RV32C-NEXT: sh t0, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sh a6, 0(a0)
+; CHECK-RV32C-NEXT: sh a5, 6(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_P1_store_v8i16:
@@ -2707,66 +2707,66 @@ define void @test_nontemporal_P1_store_v8i16(ptr %p, <8 x i16> %v) {
define void @test_nontemporal_P1_store_v4i32(ptr %p, <4 x i32> %v) {
; CHECK-RV64-LABEL: test_nontemporal_P1_store_v4i32:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lw a2, 24(a1)
-; CHECK-RV64-NEXT: lw a3, 16(a1)
-; CHECK-RV64-NEXT: lw a4, 8(a1)
-; CHECK-RV64-NEXT: lw a1, 0(a1)
+; CHECK-RV64-NEXT: lw a2, 0(a1)
+; CHECK-RV64-NEXT: lw a3, 8(a1)
+; CHECK-RV64-NEXT: lw a4, 16(a1)
+; CHECK-RV64-NEXT: lw a1, 24(a1)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sw a2, 12(a0)
+; CHECK-RV64-NEXT: sw a2, 0(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sw a3, 8(a0)
+; CHECK-RV64-NEXT: sw a3, 4(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sw a4, 4(a0)
+; CHECK-RV64-NEXT: sw a4, 8(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sw a1, 0(a0)
+; CHECK-RV64-NEXT: sw a1, 12(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_P1_store_v4i32:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lw a2, 12(a1)
-; CHECK-RV32-NEXT: lw a3, 8(a1)
-; CHECK-RV32-NEXT: lw a4, 4(a1)
-; CHECK-RV32-NEXT: lw a1, 0(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sw a2, 12(a0)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sw a3, 8(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sw a4, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_P1_store_v4i32:
; CHECK-RV64C: # %bb.0:
-; CHECK-RV64C-NEXT: lw a2, 24(a1)
-; CHECK-RV64C-NEXT: lw a3, 16(a1)
-; CHECK-RV64C-NEXT: lw a4, 8(a1)
-; CHECK-RV64C-NEXT: lw a1, 0(a1)
+; CHECK-RV64C-NEXT: lw a2, 0(a1)
+; CHECK-RV64C-NEXT: lw a3, 8(a1)
+; CHECK-RV64C-NEXT: lw a4, 16(a1)
+; CHECK-RV64C-NEXT: lw a1, 24(a1)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sw a2, 12(a0)
+; CHECK-RV64C-NEXT: sw a2, 0(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sw a3, 8(a0)
+; CHECK-RV64C-NEXT: sw a3, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sw a4, 4(a0)
+; CHECK-RV64C-NEXT: sw a4, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sw a1, 0(a0)
+; CHECK-RV64C-NEXT: sw a1, 12(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_P1_store_v4i32:
; CHECK-RV32C: # %bb.0:
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_P1_store_v4i32:
@@ -2790,49 +2790,49 @@ define void @test_nontemporal_P1_store_v2i64(ptr %p, <2 x i64> %v) {
; CHECK-RV64-LABEL: test_nontemporal_P1_store_v2i64:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: ntl.p1
; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ntl.p1
+; CHECK-RV64-NEXT: sd a2, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_P1_store_v2i64:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lw a2, 12(a1)
-; CHECK-RV32-NEXT: lw a3, 8(a1)
-; CHECK-RV32-NEXT: lw a4, 4(a1)
-; CHECK-RV32-NEXT: lw a1, 0(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sw a2, 12(a0)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sw a3, 8(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sw a4, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_P1_store_v2i64:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: c.ntl.p1
; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: c.ntl.p1
+; CHECK-RV64C-NEXT: sd a2, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_P1_store_v2i64:
; CHECK-RV32C: # %bb.0:
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_P1_store_v2i64:
@@ -3166,53 +3166,53 @@ define <16 x i8> @test_nontemporal_PALL_load_v16i8(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_PALL_load_v16i8:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: ld a2, 8(a1)
+; CHECK-RV64-NEXT: ld a2, 0(a1)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: ld a1, 0(a1)
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ld a1, 8(a1)
+; CHECK-RV64-NEXT: sd a2, 0(a0)
+; CHECK-RV64-NEXT: sd a1, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_PALL_load_v16i8:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_v16i8:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: ld a2, 8(a1)
+; CHECK-RV64C-NEXT: ld a2, 0(a1)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: ld a1, 0(a1)
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: ld a1, 8(a1)
+; CHECK-RV64C-NEXT: sd a2, 0(a0)
+; CHECK-RV64C-NEXT: sd a1, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_v16i8:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_v16i8:
@@ -3236,53 +3236,53 @@ define <8 x i16> @test_nontemporal_PALL_load_v8i16(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_PALL_load_v8i16:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: ld a2, 8(a1)
+; CHECK-RV64-NEXT: ld a2, 0(a1)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: ld a1, 0(a1)
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ld a1, 8(a1)
+; CHECK-RV64-NEXT: sd a2, 0(a0)
+; CHECK-RV64-NEXT: sd a1, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_PALL_load_v8i16:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_v8i16:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: ld a2, 8(a1)
+; CHECK-RV64C-NEXT: ld a2, 0(a1)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: ld a1, 0(a1)
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: ld a1, 8(a1)
+; CHECK-RV64C-NEXT: sd a2, 0(a0)
+; CHECK-RV64C-NEXT: sd a1, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_v8i16:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_v8i16:
@@ -3306,53 +3306,53 @@ define <4 x i32> @test_nontemporal_PALL_load_v4i32(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_PALL_load_v4i32:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: ld a2, 8(a1)
+; CHECK-RV64-NEXT: ld a2, 0(a1)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: ld a1, 0(a1)
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ld a1, 8(a1)
+; CHECK-RV64-NEXT: sd a2, 0(a0)
+; CHECK-RV64-NEXT: sd a1, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_PALL_load_v4i32:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_v4i32:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: ld a2, 8(a1)
+; CHECK-RV64C-NEXT: ld a2, 0(a1)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: ld a1, 0(a1)
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: ld a1, 8(a1)
+; CHECK-RV64C-NEXT: sd a2, 0(a0)
+; CHECK-RV64C-NEXT: sd a1, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_v4i32:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_v4i32:
@@ -3385,17 +3385,17 @@ define <2 x i64> @test_nontemporal_PALL_load_v2i64(ptr %p) {
; CHECK-RV32-LABEL: test_nontemporal_PALL_load_v2i64:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_v2i64:
@@ -3410,17 +3410,17 @@ define <2 x i64> @test_nontemporal_PALL_load_v2i64(ptr %p) {
; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_v2i64:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_v2i64:
@@ -3450,9 +3450,9 @@ define void @test_nontemporal_PALL_store_i64(ptr %p, i64 %v) {
; CHECK-RV32-LABEL: test_nontemporal_PALL_store_i64:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sw a2, 4(a0)
-; CHECK-RV32-NEXT: ntl.pall
; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: ntl.pall
+; CHECK-RV32-NEXT: sw a2, 4(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_i64:
@@ -3464,9 +3464,9 @@ define void @test_nontemporal_PALL_store_i64(ptr %p, i64 %v) {
; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_i64:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sw a2, 4(a0)
-; CHECK-RV32C-NEXT: c.ntl.pall
; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: c.ntl.pall
+; CHECK-RV32C-NEXT: sw a2, 4(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_i64:
@@ -3478,9 +3478,9 @@ define void @test_nontemporal_PALL_store_i64(ptr %p, i64 %v) {
; CHECK-RV32V-LABEL: test_nontemporal_PALL_store_i64:
; CHECK-RV32V: # %bb.0:
; CHECK-RV32V-NEXT: ntl.pall
-; CHECK-RV32V-NEXT: sw a2, 4(a0)
-; CHECK-RV32V-NEXT: ntl.pall
; CHECK-RV32V-NEXT: sw a1, 0(a0)
+; CHECK-RV32V-NEXT: ntl.pall
+; CHECK-RV32V-NEXT: sw a2, 4(a0)
; CHECK-RV32V-NEXT: ret
store i64 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
ret void
@@ -3743,46 +3743,46 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64-NEXT: lbu a7, 40(a1)
; CHECK-RV64-NEXT: lbu t0, 48(a1)
; CHECK-RV64-NEXT: lbu t1, 56(a1)
-; CHECK-RV64-NEXT: lbu t2, 96(a1)
-; CHECK-RV64-NEXT: lbu t3, 104(a1)
-; CHECK-RV64-NEXT: lbu t4, 112(a1)
-; CHECK-RV64-NEXT: lbu t5, 120(a1)
-; CHECK-RV64-NEXT: lbu t6, 64(a1)
-; CHECK-RV64-NEXT: lbu s0, 72(a1)
-; CHECK-RV64-NEXT: lbu s1, 80(a1)
-; CHECK-RV64-NEXT: lbu a1, 88(a1)
-; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb t5, 15(a0)
+; CHECK-RV64-NEXT: lbu t2, 64(a1)
+; CHECK-RV64-NEXT: lbu t3, 72(a1)
+; CHECK-RV64-NEXT: lbu t4, 80(a1)
+; CHECK-RV64-NEXT: lbu t5, 88(a1)
+; CHECK-RV64-NEXT: lbu t6, 96(a1)
+; CHECK-RV64-NEXT: lbu s0, 104(a1)
+; CHECK-RV64-NEXT: lbu s1, 112(a1)
+; CHECK-RV64-NEXT: lbu a1, 120(a1)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb t4, 14(a0)
+; CHECK-RV64-NEXT: sb t6, 12(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb t3, 13(a0)
+; CHECK-RV64-NEXT: sb s0, 13(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb t2, 12(a0)
+; CHECK-RV64-NEXT: sb s1, 14(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb a1, 11(a0)
+; CHECK-RV64-NEXT: sb a1, 15(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb s1, 10(a0)
+; CHECK-RV64-NEXT: sb t2, 8(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb s0, 9(a0)
+; CHECK-RV64-NEXT: sb t3, 9(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb t6, 8(a0)
+; CHECK-RV64-NEXT: sb t4, 10(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb t1, 7(a0)
+; CHECK-RV64-NEXT: sb t5, 11(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb t0, 6(a0)
+; CHECK-RV64-NEXT: sb a6, 4(a0)
; CHECK-RV64-NEXT: ntl.pall
; CHECK-RV64-NEXT: sb a7, 5(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb a6, 4(a0)
+; CHECK-RV64-NEXT: sb t0, 6(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb a5, 3(a0)
+; CHECK-RV64-NEXT: sb t1, 7(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb a4, 2(a0)
+; CHECK-RV64-NEXT: sb a2, 0(a0)
; CHECK-RV64-NEXT: ntl.pall
; CHECK-RV64-NEXT: sb a3, 1(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb a2, 0(a0)
+; CHECK-RV64-NEXT: sb a4, 2(a0)
+; CHECK-RV64-NEXT: ntl.pall
+; CHECK-RV64-NEXT: sb a5, 3(a0)
; CHECK-RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; CHECK-RV64-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
; CHECK-RV64-NEXT: addi sp, sp, 16
@@ -3804,46 +3804,46 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32-NEXT: lbu a7, 20(a1)
; CHECK-RV32-NEXT: lbu t0, 24(a1)
; CHECK-RV32-NEXT: lbu t1, 28(a1)
-; CHECK-RV32-NEXT: lbu t2, 48(a1)
-; CHECK-RV32-NEXT: lbu t3, 52(a1)
-; CHECK-RV32-NEXT: lbu t4, 56(a1)
-; CHECK-RV32-NEXT: lbu t5, 60(a1)
-; CHECK-RV32-NEXT: lbu t6, 32(a1)
-; CHECK-RV32-NEXT: lbu s0, 36(a1)
-; CHECK-RV32-NEXT: lbu s1, 40(a1)
-; CHECK-RV32-NEXT: lbu a1, 44(a1)
+; CHECK-RV32-NEXT: lbu t2, 32(a1)
+; CHECK-RV32-NEXT: lbu t3, 36(a1)
+; CHECK-RV32-NEXT: lbu t4, 40(a1)
+; CHECK-RV32-NEXT: lbu t5, 44(a1)
+; CHECK-RV32-NEXT: lbu t6, 48(a1)
+; CHECK-RV32-NEXT: lbu s0, 52(a1)
+; CHECK-RV32-NEXT: lbu s1, 56(a1)
+; CHECK-RV32-NEXT: lbu a1, 60(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb t5, 15(a0)
+; CHECK-RV32-NEXT: sb t6, 12(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb t4, 14(a0)
+; CHECK-RV32-NEXT: sb s0, 13(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb t3, 13(a0)
+; CHECK-RV32-NEXT: sb s1, 14(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb t2, 12(a0)
+; CHECK-RV32-NEXT: sb a1, 15(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb a1, 11(a0)
+; CHECK-RV32-NEXT: sb t2, 8(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb s1, 10(a0)
+; CHECK-RV32-NEXT: sb t3, 9(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb s0, 9(a0)
+; CHECK-RV32-NEXT: sb t4, 10(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb t6, 8(a0)
+; CHECK-RV32-NEXT: sb t5, 11(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb t1, 7(a0)
-; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb t0, 6(a0)
+; CHECK-RV32-NEXT: sb a6, 4(a0)
; CHECK-RV32-NEXT: ntl.pall
; CHECK-RV32-NEXT: sb a7, 5(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb a6, 4(a0)
+; CHECK-RV32-NEXT: sb t0, 6(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb a5, 3(a0)
+; CHECK-RV32-NEXT: sb t1, 7(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb a4, 2(a0)
+; CHECK-RV32-NEXT: sb a2, 0(a0)
; CHECK-RV32-NEXT: ntl.pall
; CHECK-RV32-NEXT: sb a3, 1(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb a2, 0(a0)
+; CHECK-RV32-NEXT: sb a4, 2(a0)
+; CHECK-RV32-NEXT: ntl.pall
+; CHECK-RV32-NEXT: sb a5, 3(a0)
; CHECK-RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: addi sp, sp, 16
@@ -3865,46 +3865,46 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64C-NEXT: lbu t3, 40(a1)
; CHECK-RV64C-NEXT: lbu t4, 48(a1)
; CHECK-RV64C-NEXT: lbu t5, 56(a1)
-; CHECK-RV64C-NEXT: lbu a2, 96(a1)
-; CHECK-RV64C-NEXT: lbu a3, 104(a1)
-; CHECK-RV64C-NEXT: lbu a4, 112(a1)
-; CHECK-RV64C-NEXT: lbu a5, 120(a1)
; CHECK-RV64C-NEXT: lbu t6, 64(a1)
-; CHECK-RV64C-NEXT: lbu s0, 72(a1)
-; CHECK-RV64C-NEXT: lbu s1, 80(a1)
-; CHECK-RV64C-NEXT: lbu a1, 88(a1)
-; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb a5, 15(a0)
-; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb a4, 14(a0)
-; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb a3, 13(a0)
+; CHECK-RV64C-NEXT: lbu a3, 72(a1)
+; CHECK-RV64C-NEXT: lbu a4, 80(a1)
+; CHECK-RV64C-NEXT: lbu a5, 88(a1)
+; CHECK-RV64C-NEXT: lbu a2, 96(a1)
+; CHECK-RV64C-NEXT: lbu s0, 104(a1)
+; CHECK-RV64C-NEXT: lbu s1, 112(a1)
+; CHECK-RV64C-NEXT: lbu a1, 120(a1)
; CHECK-RV64C-NEXT: c.ntl.pall
; CHECK-RV64C-NEXT: sb a2, 12(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb a1, 11(a0)
+; CHECK-RV64C-NEXT: sb s0, 13(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb s1, 10(a0)
+; CHECK-RV64C-NEXT: sb s1, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb s0, 9(a0)
+; CHECK-RV64C-NEXT: sb a1, 15(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
; CHECK-RV64C-NEXT: sb t6, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb t5, 7(a0)
+; CHECK-RV64C-NEXT: sb a3, 9(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb t4, 6(a0)
+; CHECK-RV64C-NEXT: sb a4, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb t3, 5(a0)
+; CHECK-RV64C-NEXT: sb a5, 11(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
; CHECK-RV64C-NEXT: sb t2, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb t1, 3(a0)
+; CHECK-RV64C-NEXT: sb t3, 5(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb t0, 2(a0)
+; CHECK-RV64C-NEXT: sb t4, 6(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb a7, 1(a0)
+; CHECK-RV64C-NEXT: sb t5, 7(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
; CHECK-RV64C-NEXT: sb a6, 0(a0)
+; CHECK-RV64C-NEXT: c.ntl.pall
+; CHECK-RV64C-NEXT: sb a7, 1(a0)
+; CHECK-RV64C-NEXT: c.ntl.pall
+; CHECK-RV64C-NEXT: sb t0, 2(a0)
+; CHECK-RV64C-NEXT: c.ntl.pall
+; CHECK-RV64C-NEXT: sb t1, 3(a0)
; CHECK-RV64C-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; CHECK-RV64C-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
; CHECK-RV64C-NEXT: addi sp, sp, 16
@@ -3926,46 +3926,46 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32C-NEXT: lbu t3, 20(a1)
; CHECK-RV32C-NEXT: lbu t4, 24(a1)
; CHECK-RV32C-NEXT: lbu t5, 28(a1)
-; CHECK-RV32C-NEXT: lbu a2, 48(a1)
-; CHECK-RV32C-NEXT: lbu a3, 52(a1)
-; CHECK-RV32C-NEXT: lbu a4, 56(a1)
-; CHECK-RV32C-NEXT: lbu a5, 60(a1)
; CHECK-RV32C-NEXT: lbu t6, 32(a1)
-; CHECK-RV32C-NEXT: lbu s0, 36(a1)
-; CHECK-RV32C-NEXT: lbu s1, 40(a1)
-; CHECK-RV32C-NEXT: lbu a1, 44(a1)
-; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb a5, 15(a0)
-; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb a4, 14(a0)
-; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb a3, 13(a0)
+; CHECK-RV32C-NEXT: lbu a3, 36(a1)
+; CHECK-RV32C-NEXT: lbu a4, 40(a1)
+; CHECK-RV32C-NEXT: lbu a5, 44(a1)
+; CHECK-RV32C-NEXT: lbu a2, 48(a1)
+; CHECK-RV32C-NEXT: lbu s0, 52(a1)
+; CHECK-RV32C-NEXT: lbu s1, 56(a1)
+; CHECK-RV32C-NEXT: lbu a1, 60(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
; CHECK-RV32C-NEXT: sb a2, 12(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb a1, 11(a0)
+; CHECK-RV32C-NEXT: sb s0, 13(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb s1, 10(a0)
+; CHECK-RV32C-NEXT: sb s1, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb s0, 9(a0)
+; CHECK-RV32C-NEXT: sb a1, 15(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
; CHECK-RV32C-NEXT: sb t6, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb t5, 7(a0)
+; CHECK-RV32C-NEXT: sb a3, 9(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb t4, 6(a0)
+; CHECK-RV32C-NEXT: sb a4, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb t3, 5(a0)
+; CHECK-RV32C-NEXT: sb a5, 11(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
; CHECK-RV32C-NEXT: sb t2, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb t1, 3(a0)
+; CHECK-RV32C-NEXT: sb t3, 5(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb t0, 2(a0)
+; CHECK-RV32C-NEXT: sb t4, 6(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb a7, 1(a0)
+; CHECK-RV32C-NEXT: sb t5, 7(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
; CHECK-RV32C-NEXT: sb a6, 0(a0)
+; CHECK-RV32C-NEXT: c.ntl.pall
+; CHECK-RV32C-NEXT: sb a7, 1(a0)
+; CHECK-RV32C-NEXT: c.ntl.pall
+; CHECK-RV32C-NEXT: sb t0, 2(a0)
+; CHECK-RV32C-NEXT: c.ntl.pall
+; CHECK-RV32C-NEXT: sb t1, 3(a0)
; CHECK-RV32C-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; CHECK-RV32C-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; CHECK-RV32C-NEXT: addi sp, sp, 16
@@ -3991,114 +3991,114 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) {
define void @test_nontemporal_PALL_store_v8i16(ptr %p, <8 x i16> %v) {
; CHECK-RV64-LABEL: test_nontemporal_PALL_store_v8i16:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lh a2, 32(a1)
-; CHECK-RV64-NEXT: lh a3, 40(a1)
-; CHECK-RV64-NEXT: lh a4, 48(a1)
-; CHECK-RV64-NEXT: lh a5, 56(a1)
-; CHECK-RV64-NEXT: lh a6, 0(a1)
-; CHECK-RV64-NEXT: lh a7, 8(a1)
-; CHECK-RV64-NEXT: lh t0, 16(a1)
-; CHECK-RV64-NEXT: lh a1, 24(a1)
+; CHECK-RV64-NEXT: lh a2, 0(a1)
+; CHECK-RV64-NEXT: lh a3, 8(a1)
+; CHECK-RV64-NEXT: lh a4, 16(a1)
+; CHECK-RV64-NEXT: lh a5, 24(a1)
+; CHECK-RV64-NEXT: lh a6, 32(a1)
+; CHECK-RV64-NEXT: lh a7, 40(a1)
+; CHECK-RV64-NEXT: lh t0, 48(a1)
+; CHECK-RV64-NEXT: lh a1, 56(a1)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sh a5, 14(a0)
+; CHECK-RV64-NEXT: sh a6, 8(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sh a4, 12(a0)
+; CHECK-RV64-NEXT: sh a7, 10(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sh a3, 10(a0)
+; CHECK-RV64-NEXT: sh t0, 12(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sh a2, 8(a0)
+; CHECK-RV64-NEXT: sh a1, 14(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sh a1, 6(a0)
+; CHECK-RV64-NEXT: sh a2, 0(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sh t0, 4(a0)
+; CHECK-RV64-NEXT: sh a3, 2(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sh a7, 2(a0)
+; CHECK-RV64-NEXT: sh a4, 4(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sh a6, 0(a0)
+; CHECK-RV64-NEXT: sh a5, 6(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_PALL_store_v8i16:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lh a2, 16(a1)
-; CHECK-RV32-NEXT: lh a3, 20(a1)
-; CHECK-RV32-NEXT: lh a4, 24(a1)
-; CHECK-RV32-NEXT: lh a5, 28(a1)
-; CHECK-RV32-NEXT: lh a6, 0(a1)
-; CHECK-RV32-NEXT: lh a7, 4(a1)
-; CHECK-RV32-NEXT: lh t0, 8(a1)
-; CHECK-RV32-NEXT: lh a1, 12(a1)
+; CHECK-RV32-NEXT: lh a2, 0(a1)
+; CHECK-RV32-NEXT: lh a3, 4(a1)
+; CHECK-RV32-NEXT: lh a4, 8(a1)
+; CHECK-RV32-NEXT: lh a5, 12(a1)
+; CHECK-RV32-NEXT: lh a6, 16(a1)
+; CHECK-RV32-NEXT: lh a7, 20(a1)
+; CHECK-RV32-NEXT: lh t0, 24(a1)
+; CHECK-RV32-NEXT: lh a1, 28(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sh a5, 14(a0)
+; CHECK-RV32-NEXT: sh a6, 8(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sh a4, 12(a0)
+; CHECK-RV32-NEXT: sh a7, 10(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sh a3, 10(a0)
+; CHECK-RV32-NEXT: sh t0, 12(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sh a2, 8(a0)
+; CHECK-RV32-NEXT: sh a1, 14(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sh a1, 6(a0)
+; CHECK-RV32-NEXT: sh a2, 0(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sh t0, 4(a0)
+; CHECK-RV32-NEXT: sh a3, 2(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sh a7, 2(a0)
+; CHECK-RV32-NEXT: sh a4, 4(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sh a6, 0(a0)
+; CHECK-RV32-NEXT: sh a5, 6(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_v8i16:
; CHECK-RV64C: # %bb.0:
-; CHECK-RV64C-NEXT: lh a7, 32(a1)
+; CHECK-RV64C-NEXT: lh a6, 0(a1)
+; CHECK-RV64C-NEXT: lh a7, 8(a1)
+; CHECK-RV64C-NEXT: lh t0, 16(a1)
+; CHECK-RV64C-NEXT: lh a5, 24(a1)
+; CHECK-RV64C-NEXT: lh a2, 32(a1)
; CHECK-RV64C-NEXT: lh a3, 40(a1)
; CHECK-RV64C-NEXT: lh a4, 48(a1)
-; CHECK-RV64C-NEXT: lh a5, 56(a1)
-; CHECK-RV64C-NEXT: lh a6, 0(a1)
-; CHECK-RV64C-NEXT: lh t0, 8(a1)
-; CHECK-RV64C-NEXT: lh a2, 16(a1)
-; CHECK-RV64C-NEXT: lh a1, 24(a1)
+; CHECK-RV64C-NEXT: lh a1, 56(a1)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sh a5, 14(a0)
-; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sh a4, 12(a0)
+; CHECK-RV64C-NEXT: sh a2, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
; CHECK-RV64C-NEXT: sh a3, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sh a7, 8(a0)
+; CHECK-RV64C-NEXT: sh a4, 12(a0)
+; CHECK-RV64C-NEXT: c.ntl.pall
+; CHECK-RV64C-NEXT: sh a1, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sh a1, 6(a0)
+; CHECK-RV64C-NEXT: sh a6, 0(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sh a2, 4(a0)
+; CHECK-RV64C-NEXT: sh a7, 2(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sh t0, 2(a0)
+; CHECK-RV64C-NEXT: sh t0, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sh a6, 0(a0)
+; CHECK-RV64C-NEXT: sh a5, 6(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_v8i16:
; CHECK-RV32C: # %bb.0:
-; CHECK-RV32C-NEXT: lh a7, 16(a1)
+; CHECK-RV32C-NEXT: lh a6, 0(a1)
+; CHECK-RV32C-NEXT: lh a7, 4(a1)
+; CHECK-RV32C-NEXT: lh t0, 8(a1)
+; CHECK-RV32C-NEXT: lh a5, 12(a1)
+; CHECK-RV32C-NEXT: lh a2, 16(a1)
; CHECK-RV32C-NEXT: lh a3, 20(a1)
; CHECK-RV32C-NEXT: lh a4, 24(a1)
-; CHECK-RV32C-NEXT: lh a5, 28(a1)
-; CHECK-RV32C-NEXT: lh a6, 0(a1)
-; CHECK-RV32C-NEXT: lh t0, 4(a1)
-; CHECK-RV32C-NEXT: lh a2, 8(a1)
-; CHECK-RV32C-NEXT: lh a1, 12(a1)
-; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sh a5, 14(a0)
+; CHECK-RV32C-NEXT: lh a1, 28(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sh a4, 12(a0)
+; CHECK-RV32C-NEXT: sh a2, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
; CHECK-RV32C-NEXT: sh a3, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sh a7, 8(a0)
+; CHECK-RV32C-NEXT: sh a4, 12(a0)
+; CHECK-RV32C-NEXT: c.ntl.pall
+; CHECK-RV32C-NEXT: sh a1, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sh a1, 6(a0)
+; CHECK-RV32C-NEXT: sh a6, 0(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sh a2, 4(a0)
+; CHECK-RV32C-NEXT: sh a7, 2(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sh t0, 2(a0)
+; CHECK-RV32C-NEXT: sh t0, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sh a6, 0(a0)
+; CHECK-RV32C-NEXT: sh a5, 6(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_v8i16:
@@ -4121,66 +4121,66 @@ define void @test_nontemporal_PALL_store_v8i16(ptr %p, <8 x i16> %v) {
define void @test_nontemporal_PALL_store_v4i32(ptr %p, <4 x i32> %v) {
; CHECK-RV64-LABEL: test_nontemporal_PALL_store_v4i32:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lw a2, 24(a1)
-; CHECK-RV64-NEXT: lw a3, 16(a1)
-; CHECK-RV64-NEXT: lw a4, 8(a1)
-; CHECK-RV64-NEXT: lw a1, 0(a1)
+; CHECK-RV64-NEXT: lw a2, 0(a1)
+; CHECK-RV64-NEXT: lw a3, 8(a1)
+; CHECK-RV64-NEXT: lw a4, 16(a1)
+; CHECK-RV64-NEXT: lw a1, 24(a1)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sw a2, 12(a0)
+; CHECK-RV64-NEXT: sw a2, 0(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sw a3, 8(a0)
+; CHECK-RV64-NEXT: sw a3, 4(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sw a4, 4(a0)
+; CHECK-RV64-NEXT: sw a4, 8(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sw a1, 0(a0)
+; CHECK-RV64-NEXT: sw a1, 12(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_PALL_store_v4i32:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lw a2, 12(a1)
-; CHECK-RV32-NEXT: lw a3, 8(a1)
-; CHECK-RV32-NEXT: lw a4, 4(a1)
-; CHECK-RV32-NEXT: lw a1, 0(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sw a2, 12(a0)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sw a3, 8(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sw a4, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_v4i32:
; CHECK-RV64C: # %bb.0:
-; CHECK-RV64C-NEXT: lw a2, 24(a1)
-; CHECK-RV64C-NEXT: lw a3, 16(a1)
-; CHECK-RV64C-NEXT: lw a4, 8(a1)
-; CHECK-RV64C-NEXT: lw a1, 0(a1)
+; CHECK-RV64C-NEXT: lw a2, 0(a1)
+; CHECK-RV64C-NEXT: lw a3, 8(a1)
+; CHECK-RV64C-NEXT: lw a4, 16(a1)
+; CHECK-RV64C-NEXT: lw a1, 24(a1)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sw a2, 12(a0)
+; CHECK-RV64C-NEXT: sw a2, 0(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sw a3, 8(a0)
+; CHECK-RV64C-NEXT: sw a3, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sw a4, 4(a0)
+; CHECK-RV64C-NEXT: sw a4, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sw a1, 0(a0)
+; CHECK-RV64C-NEXT: sw a1, 12(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_v4i32:
; CHECK-RV32C: # %bb.0:
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_v4i32:
@@ -4204,49 +4204,49 @@ define void @test_nontemporal_PALL_store_v2i64(ptr %p, <2 x i64> %v) {
; CHECK-RV64-LABEL: test_nontemporal_PALL_store_v2i64:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: ntl.pall
; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ntl.pall
+; CHECK-RV64-NEXT: sd a2, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_PALL_store_v2i64:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lw a2, 12(a1)
-; CHECK-RV32-NEXT: lw a3, 8(a1)
-; CHECK-RV32-NEXT: lw a4, 4(a1)
-; CHECK-RV32-NEXT: lw a1, 0(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sw a2, 12(a0)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sw a3, 8(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sw a4, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_v2i64:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: c.ntl.pall
; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: c.ntl.pall
+; CHECK-RV64C-NEXT: sd a2, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_v2i64:
; CHECK-RV32C: # %bb.0:
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_v2i64:
@@ -4580,53 +4580,53 @@ define <16 x i8> @test_nontemporal_S1_load_v16i8(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_S1_load_v16i8:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: ld a2, 8(a1)
+; CHECK-RV64-NEXT: ld a2, 0(a1)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: ld a1, 0(a1)
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ld a1, 8(a1)
+; CHECK-RV64-NEXT: sd a2, 0(a0)
+; CHECK-RV64-NEXT: sd a1, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_S1_load_v16i8:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_S1_load_v16i8:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: ld a2, 8(a1)
+; CHECK-RV64C-NEXT: ld a2, 0(a1)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: ld a1, 0(a1)
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: ld a1, 8(a1)
+; CHECK-RV64C-NEXT: sd a2, 0(a0)
+; CHECK-RV64C-NEXT: sd a1, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_S1_load_v16i8:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_S1_load_v16i8:
@@ -4650,53 +4650,53 @@ define <8 x i16> @test_nontemporal_S1_load_v8i16(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_S1_load_v8i16:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: ld a2, 8(a1)
+; CHECK-RV64-NEXT: ld a2, 0(a1)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: ld a1, 0(a1)
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ld a1, 8(a1)
+; CHECK-RV64-NEXT: sd a2, 0(a0)
+; CHECK-RV64-NEXT: sd a1, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_S1_load_v8i16:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_S1_load_v8i16:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: ld a2, 8(a1)
+; CHECK-RV64C-NEXT: ld a2, 0(a1)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: ld a1, 0(a1)
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: ld a1, 8(a1)
+; CHECK-RV64C-NEXT: sd a2, 0(a0)
+; CHECK-RV64C-NEXT: sd a1, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_S1_load_v8i16:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_S1_load_v8i16:
@@ -4720,53 +4720,53 @@ define <4 x i32> @test_nontemporal_S1_load_v4i32(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_S1_load_v4i32:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: ld a2, 8(a1)
+; CHECK-RV64-NEXT: ld a2, 0(a1)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: ld a1, 0(a1)
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ld a1, 8(a1)
+; CHECK-RV64-NEXT: sd a2, 0(a0)
+; CHECK-RV64-NEXT: sd a1, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_S1_load_v4i32:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_S1_load_v4i32:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: ld a2, 8(a1)
+; CHECK-RV64C-NEXT: ld a2, 0(a1)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: ld a1, 0(a1)
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: ld a1, 8(a1)
+; CHECK-RV64C-NEXT: sd a2, 0(a0)
+; CHECK-RV64C-NEXT: sd a1, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_S1_load_v4i32:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_S1_load_v4i32:
@@ -4799,17 +4799,17 @@ define <2 x i64> @test_nontemporal_S1_load_v2i64(ptr %p) {
; CHECK-RV32-LABEL: test_nontemporal_S1_load_v2i64:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_S1_load_v2i64:
@@ -4824,17 +4824,17 @@ define <2 x i64> @test_nontemporal_S1_load_v2i64(ptr %p) {
; CHECK-RV32C-LABEL: test_nontemporal_S1_load_v2i64:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_S1_load_v2i64:
@@ -4864,9 +4864,9 @@ define void @test_nontemporal_S1_store_i64(ptr %p, i64 %v) {
; CHECK-RV32-LABEL: test_nontemporal_S1_store_i64:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sw a2, 4(a0)
-; CHECK-RV32-NEXT: ntl.s1
; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: ntl.s1
+; CHECK-RV32-NEXT: sw a2, 4(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_S1_store_i64:
@@ -4878,9 +4878,9 @@ define void @test_nontemporal_S1_store_i64(ptr %p, i64 %v) {
; CHECK-RV32C-LABEL: test_nontemporal_S1_store_i64:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sw a2, 4(a0)
-; CHECK-RV32C-NEXT: c.ntl.s1
; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: c.ntl.s1
+; CHECK-RV32C-NEXT: sw a2, 4(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_S1_store_i64:
@@ -4892,9 +4892,9 @@ define void @test_nontemporal_S1_store_i64(ptr %p, i64 %v) {
; CHECK-RV32V-LABEL: test_nontemporal_S1_store_i64:
; CHECK-RV32V: # %bb.0:
; CHECK-RV32V-NEXT: ntl.s1
-; CHECK-RV32V-NEXT: sw a2, 4(a0)
-; CHECK-RV32V-NEXT: ntl.s1
; CHECK-RV32V-NEXT: sw a1, 0(a0)
+; CHECK-RV32V-NEXT: ntl.s1
+; CHECK-RV32V-NEXT: sw a2, 4(a0)
; CHECK-RV32V-NEXT: ret
store i64 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
ret void
@@ -5157,46 +5157,46 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64-NEXT: lbu a7, 40(a1)
; CHECK-RV64-NEXT: lbu t0, 48(a1)
; CHECK-RV64-NEXT: lbu t1, 56(a1)
-; CHECK-RV64-NEXT: lbu t2, 96(a1)
-; CHECK-RV64-NEXT: lbu t3, 104(a1)
-; CHECK-RV64-NEXT: lbu t4, 112(a1)
-; CHECK-RV64-NEXT: lbu t5, 120(a1)
-; CHECK-RV64-NEXT: lbu t6, 64(a1)
-; CHECK-RV64-NEXT: lbu s0, 72(a1)
-; CHECK-RV64-NEXT: lbu s1, 80(a1)
-; CHECK-RV64-NEXT: lbu a1, 88(a1)
-; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb t5, 15(a0)
+; CHECK-RV64-NEXT: lbu t2, 64(a1)
+; CHECK-RV64-NEXT: lbu t3, 72(a1)
+; CHECK-RV64-NEXT: lbu t4, 80(a1)
+; CHECK-RV64-NEXT: lbu t5, 88(a1)
+; CHECK-RV64-NEXT: lbu t6, 96(a1)
+; CHECK-RV64-NEXT: lbu s0, 104(a1)
+; CHECK-RV64-NEXT: lbu s1, 112(a1)
+; CHECK-RV64-NEXT: lbu a1, 120(a1)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb t4, 14(a0)
+; CHECK-RV64-NEXT: sb t6, 12(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb t3, 13(a0)
+; CHECK-RV64-NEXT: sb s0, 13(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb t2, 12(a0)
+; CHECK-RV64-NEXT: sb s1, 14(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb a1, 11(a0)
+; CHECK-RV64-NEXT: sb a1, 15(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb s1, 10(a0)
+; CHECK-RV64-NEXT: sb t2, 8(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb s0, 9(a0)
+; CHECK-RV64-NEXT: sb t3, 9(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb t6, 8(a0)
+; CHECK-RV64-NEXT: sb t4, 10(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb t1, 7(a0)
+; CHECK-RV64-NEXT: sb t5, 11(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb t0, 6(a0)
+; CHECK-RV64-NEXT: sb a6, 4(a0)
; CHECK-RV64-NEXT: ntl.s1
; CHECK-RV64-NEXT: sb a7, 5(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb a6, 4(a0)
+; CHECK-RV64-NEXT: sb t0, 6(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb a5, 3(a0)
+; CHECK-RV64-NEXT: sb t1, 7(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb a4, 2(a0)
+; CHECK-RV64-NEXT: sb a2, 0(a0)
; CHECK-RV64-NEXT: ntl.s1
; CHECK-RV64-NEXT: sb a3, 1(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb a2, 0(a0)
+; CHECK-RV64-NEXT: sb a4, 2(a0)
+; CHECK-RV64-NEXT: ntl.s1
+; CHECK-RV64-NEXT: sb a5, 3(a0)
; CHECK-RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; CHECK-RV64-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
; CHECK-RV64-NEXT: addi sp, sp, 16
@@ -5218,46 +5218,46 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32-NEXT: lbu a7, 20(a1)
; CHECK-RV32-NEXT: lbu t0, 24(a1)
; CHECK-RV32-NEXT: lbu t1, 28(a1)
-; CHECK-RV32-NEXT: lbu t2, 48(a1)
-; CHECK-RV32-NEXT: lbu t3, 52(a1)
-; CHECK-RV32-NEXT: lbu t4, 56(a1)
-; CHECK-RV32-NEXT: lbu t5, 60(a1)
-; CHECK-RV32-NEXT: lbu t6, 32(a1)
-; CHECK-RV32-NEXT: lbu s0, 36(a1)
-; CHECK-RV32-NEXT: lbu s1, 40(a1)
-; CHECK-RV32-NEXT: lbu a1, 44(a1)
-; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb t5, 15(a0)
+; CHECK-RV32-NEXT: lbu t2, 32(a1)
+; CHECK-RV32-NEXT: lbu t3, 36(a1)
+; CHECK-RV32-NEXT: lbu t4, 40(a1)
+; CHECK-RV32-NEXT: lbu t5, 44(a1)
+; CHECK-RV32-NEXT: lbu t6, 48(a1)
+; CHECK-RV32-NEXT: lbu s0, 52(a1)
+; CHECK-RV32-NEXT: lbu s1, 56(a1)
+; CHECK-RV32-NEXT: lbu a1, 60(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb t4, 14(a0)
+; CHECK-RV32-NEXT: sb t6, 12(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb t3, 13(a0)
+; CHECK-RV32-NEXT: sb s0, 13(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb t2, 12(a0)
+; CHECK-RV32-NEXT: sb s1, 14(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb a1, 11(a0)
+; CHECK-RV32-NEXT: sb a1, 15(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb s1, 10(a0)
+; CHECK-RV32-NEXT: sb t2, 8(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb s0, 9(a0)
+; CHECK-RV32-NEXT: sb t3, 9(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb t6, 8(a0)
+; CHECK-RV32-NEXT: sb t4, 10(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb t1, 7(a0)
+; CHECK-RV32-NEXT: sb t5, 11(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb t0, 6(a0)
+; CHECK-RV32-NEXT: sb a6, 4(a0)
; CHECK-RV32-NEXT: ntl.s1
; CHECK-RV32-NEXT: sb a7, 5(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb a6, 4(a0)
+; CHECK-RV32-NEXT: sb t0, 6(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb a5, 3(a0)
+; CHECK-RV32-NEXT: sb t1, 7(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb a4, 2(a0)
+; CHECK-RV32-NEXT: sb a2, 0(a0)
; CHECK-RV32-NEXT: ntl.s1
; CHECK-RV32-NEXT: sb a3, 1(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb a2, 0(a0)
+; CHECK-RV32-NEXT: sb a4, 2(a0)
+; CHECK-RV32-NEXT: ntl.s1
+; CHECK-RV32-NEXT: sb a5, 3(a0)
; CHECK-RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: addi sp, sp, 16
@@ -5279,46 +5279,46 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64C-NEXT: lbu t3, 40(a1)
; CHECK-RV64C-NEXT: lbu t4, 48(a1)
; CHECK-RV64C-NEXT: lbu t5, 56(a1)
-; CHECK-RV64C-NEXT: lbu a2, 96(a1)
-; CHECK-RV64C-NEXT: lbu a3, 104(a1)
-; CHECK-RV64C-NEXT: lbu a4, 112(a1)
-; CHECK-RV64C-NEXT: lbu a5, 120(a1)
; CHECK-RV64C-NEXT: lbu t6, 64(a1)
-; CHECK-RV64C-NEXT: lbu s0, 72(a1)
-; CHECK-RV64C-NEXT: lbu s1, 80(a1)
-; CHECK-RV64C-NEXT: lbu a1, 88(a1)
-; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb a5, 15(a0)
-; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb a4, 14(a0)
-; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb a3, 13(a0)
+; CHECK-RV64C-NEXT: lbu a3, 72(a1)
+; CHECK-RV64C-NEXT: lbu a4, 80(a1)
+; CHECK-RV64C-NEXT: lbu a5, 88(a1)
+; CHECK-RV64C-NEXT: lbu a2, 96(a1)
+; CHECK-RV64C-NEXT: lbu s0, 104(a1)
+; CHECK-RV64C-NEXT: lbu s1, 112(a1)
+; CHECK-RV64C-NEXT: lbu a1, 120(a1)
; CHECK-RV64C-NEXT: c.ntl.s1
; CHECK-RV64C-NEXT: sb a2, 12(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb a1, 11(a0)
+; CHECK-RV64C-NEXT: sb s0, 13(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb s1, 10(a0)
+; CHECK-RV64C-NEXT: sb s1, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb s0, 9(a0)
+; CHECK-RV64C-NEXT: sb a1, 15(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
; CHECK-RV64C-NEXT: sb t6, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb t5, 7(a0)
+; CHECK-RV64C-NEXT: sb a3, 9(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb t4, 6(a0)
+; CHECK-RV64C-NEXT: sb a4, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb t3, 5(a0)
+; CHECK-RV64C-NEXT: sb a5, 11(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
; CHECK-RV64C-NEXT: sb t2, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb t1, 3(a0)
+; CHECK-RV64C-NEXT: sb t3, 5(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb t0, 2(a0)
+; CHECK-RV64C-NEXT: sb t4, 6(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb a7, 1(a0)
+; CHECK-RV64C-NEXT: sb t5, 7(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
; CHECK-RV64C-NEXT: sb a6, 0(a0)
+; CHECK-RV64C-NEXT: c.ntl.s1
+; CHECK-RV64C-NEXT: sb a7, 1(a0)
+; CHECK-RV64C-NEXT: c.ntl.s1
+; CHECK-RV64C-NEXT: sb t0, 2(a0)
+; CHECK-RV64C-NEXT: c.ntl.s1
+; CHECK-RV64C-NEXT: sb t1, 3(a0)
; CHECK-RV64C-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; CHECK-RV64C-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
; CHECK-RV64C-NEXT: addi sp, sp, 16
@@ -5340,46 +5340,46 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32C-NEXT: lbu t3, 20(a1)
; CHECK-RV32C-NEXT: lbu t4, 24(a1)
; CHECK-RV32C-NEXT: lbu t5, 28(a1)
-; CHECK-RV32C-NEXT: lbu a2, 48(a1)
-; CHECK-RV32C-NEXT: lbu a3, 52(a1)
-; CHECK-RV32C-NEXT: lbu a4, 56(a1)
-; CHECK-RV32C-NEXT: lbu a5, 60(a1)
; CHECK-RV32C-NEXT: lbu t6, 32(a1)
-; CHECK-RV32C-NEXT: lbu s0, 36(a1)
-; CHECK-RV32C-NEXT: lbu s1, 40(a1)
-; CHECK-RV32C-NEXT: lbu a1, 44(a1)
-; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb a5, 15(a0)
-; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb a4, 14(a0)
-; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb a3, 13(a0)
+; CHECK-RV32C-NEXT: lbu a3, 36(a1)
+; CHECK-RV32C-NEXT: lbu a4, 40(a1)
+; CHECK-RV32C-NEXT: lbu a5, 44(a1)
+; CHECK-RV32C-NEXT: lbu a2, 48(a1)
+; CHECK-RV32C-NEXT: lbu s0, 52(a1)
+; CHECK-RV32C-NEXT: lbu s1, 56(a1)
+; CHECK-RV32C-NEXT: lbu a1, 60(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
; CHECK-RV32C-NEXT: sb a2, 12(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb a1, 11(a0)
+; CHECK-RV32C-NEXT: sb s0, 13(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb s1, 10(a0)
+; CHECK-RV32C-NEXT: sb s1, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb s0, 9(a0)
+; CHECK-RV32C-NEXT: sb a1, 15(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
; CHECK-RV32C-NEXT: sb t6, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb t5, 7(a0)
+; CHECK-RV32C-NEXT: sb a3, 9(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb t4, 6(a0)
+; CHECK-RV32C-NEXT: sb a4, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb t3, 5(a0)
+; CHECK-RV32C-NEXT: sb a5, 11(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
; CHECK-RV32C-NEXT: sb t2, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb t1, 3(a0)
+; CHECK-RV32C-NEXT: sb t3, 5(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb t0, 2(a0)
+; CHECK-RV32C-NEXT: sb t4, 6(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb a7, 1(a0)
+; CHECK-RV32C-NEXT: sb t5, 7(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
; CHECK-RV32C-NEXT: sb a6, 0(a0)
+; CHECK-RV32C-NEXT: c.ntl.s1
+; CHECK-RV32C-NEXT: sb a7, 1(a0)
+; CHECK-RV32C-NEXT: c.ntl.s1
+; CHECK-RV32C-NEXT: sb t0, 2(a0)
+; CHECK-RV32C-NEXT: c.ntl.s1
+; CHECK-RV32C-NEXT: sb t1, 3(a0)
; CHECK-RV32C-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; CHECK-RV32C-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; CHECK-RV32C-NEXT: addi sp, sp, 16
@@ -5405,114 +5405,114 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) {
define void @test_nontemporal_S1_store_v8i16(ptr %p, <8 x i16> %v) {
; CHECK-RV64-LABEL: test_nontemporal_S1_store_v8i16:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lh a2, 32(a1)
-; CHECK-RV64-NEXT: lh a3, 40(a1)
-; CHECK-RV64-NEXT: lh a4, 48(a1)
-; CHECK-RV64-NEXT: lh a5, 56(a1)
-; CHECK-RV64-NEXT: lh a6, 0(a1)
-; CHECK-RV64-NEXT: lh a7, 8(a1)
-; CHECK-RV64-NEXT: lh t0, 16(a1)
-; CHECK-RV64-NEXT: lh a1, 24(a1)
+; CHECK-RV64-NEXT: lh a2, 0(a1)
+; CHECK-RV64-NEXT: lh a3, 8(a1)
+; CHECK-RV64-NEXT: lh a4, 16(a1)
+; CHECK-RV64-NEXT: lh a5, 24(a1)
+; CHECK-RV64-NEXT: lh a6, 32(a1)
+; CHECK-RV64-NEXT: lh a7, 40(a1)
+; CHECK-RV64-NEXT: lh t0, 48(a1)
+; CHECK-RV64-NEXT: lh a1, 56(a1)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sh a5, 14(a0)
+; CHECK-RV64-NEXT: sh a6, 8(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sh a4, 12(a0)
+; CHECK-RV64-NEXT: sh a7, 10(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sh a3, 10(a0)
+; CHECK-RV64-NEXT: sh t0, 12(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sh a2, 8(a0)
+; CHECK-RV64-NEXT: sh a1, 14(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sh a1, 6(a0)
+; CHECK-RV64-NEXT: sh a2, 0(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sh t0, 4(a0)
+; CHECK-RV64-NEXT: sh a3, 2(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sh a7, 2(a0)
+; CHECK-RV64-NEXT: sh a4, 4(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sh a6, 0(a0)
+; CHECK-RV64-NEXT: sh a5, 6(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_S1_store_v8i16:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lh a2, 16(a1)
-; CHECK-RV32-NEXT: lh a3, 20(a1)
-; CHECK-RV32-NEXT: lh a4, 24(a1)
-; CHECK-RV32-NEXT: lh a5, 28(a1)
-; CHECK-RV32-NEXT: lh a6, 0(a1)
-; CHECK-RV32-NEXT: lh a7, 4(a1)
-; CHECK-RV32-NEXT: lh t0, 8(a1)
-; CHECK-RV32-NEXT: lh a1, 12(a1)
+; CHECK-RV32-NEXT: lh a2, 0(a1)
+; CHECK-RV32-NEXT: lh a3, 4(a1)
+; CHECK-RV32-NEXT: lh a4, 8(a1)
+; CHECK-RV32-NEXT: lh a5, 12(a1)
+; CHECK-RV32-NEXT: lh a6, 16(a1)
+; CHECK-RV32-NEXT: lh a7, 20(a1)
+; CHECK-RV32-NEXT: lh t0, 24(a1)
+; CHECK-RV32-NEXT: lh a1, 28(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sh a5, 14(a0)
+; CHECK-RV32-NEXT: sh a6, 8(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sh a4, 12(a0)
+; CHECK-RV32-NEXT: sh a7, 10(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sh a3, 10(a0)
+; CHECK-RV32-NEXT: sh t0, 12(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sh a2, 8(a0)
+; CHECK-RV32-NEXT: sh a1, 14(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sh a1, 6(a0)
+; CHECK-RV32-NEXT: sh a2, 0(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sh t0, 4(a0)
+; CHECK-RV32-NEXT: sh a3, 2(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sh a7, 2(a0)
+; CHECK-RV32-NEXT: sh a4, 4(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sh a6, 0(a0)
+; CHECK-RV32-NEXT: sh a5, 6(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_S1_store_v8i16:
; CHECK-RV64C: # %bb.0:
-; CHECK-RV64C-NEXT: lh a7, 32(a1)
+; CHECK-RV64C-NEXT: lh a6, 0(a1)
+; CHECK-RV64C-NEXT: lh a7, 8(a1)
+; CHECK-RV64C-NEXT: lh t0, 16(a1)
+; CHECK-RV64C-NEXT: lh a5, 24(a1)
+; CHECK-RV64C-NEXT: lh a2, 32(a1)
; CHECK-RV64C-NEXT: lh a3, 40(a1)
; CHECK-RV64C-NEXT: lh a4, 48(a1)
-; CHECK-RV64C-NEXT: lh a5, 56(a1)
-; CHECK-RV64C-NEXT: lh a6, 0(a1)
-; CHECK-RV64C-NEXT: lh t0, 8(a1)
-; CHECK-RV64C-NEXT: lh a2, 16(a1)
-; CHECK-RV64C-NEXT: lh a1, 24(a1)
-; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sh a5, 14(a0)
+; CHECK-RV64C-NEXT: lh a1, 56(a1)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sh a4, 12(a0)
+; CHECK-RV64C-NEXT: sh a2, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
; CHECK-RV64C-NEXT: sh a3, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sh a7, 8(a0)
+; CHECK-RV64C-NEXT: sh a4, 12(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sh a1, 6(a0)
+; CHECK-RV64C-NEXT: sh a1, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sh a2, 4(a0)
+; CHECK-RV64C-NEXT: sh a6, 0(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sh t0, 2(a0)
+; CHECK-RV64C-NEXT: sh a7, 2(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sh a6, 0(a0)
+; CHECK-RV64C-NEXT: sh t0, 4(a0)
+; CHECK-RV64C-NEXT: c.ntl.s1
+; CHECK-RV64C-NEXT: sh a5, 6(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_S1_store_v8i16:
; CHECK-RV32C: # %bb.0:
-; CHECK-RV32C-NEXT: lh a7, 16(a1)
+; CHECK-RV32C-NEXT: lh a6, 0(a1)
+; CHECK-RV32C-NEXT: lh a7, 4(a1)
+; CHECK-RV32C-NEXT: lh t0, 8(a1)
+; CHECK-RV32C-NEXT: lh a5, 12(a1)
+; CHECK-RV32C-NEXT: lh a2, 16(a1)
; CHECK-RV32C-NEXT: lh a3, 20(a1)
; CHECK-RV32C-NEXT: lh a4, 24(a1)
-; CHECK-RV32C-NEXT: lh a5, 28(a1)
-; CHECK-RV32C-NEXT: lh a6, 0(a1)
-; CHECK-RV32C-NEXT: lh t0, 4(a1)
-; CHECK-RV32C-NEXT: lh a2, 8(a1)
-; CHECK-RV32C-NEXT: lh a1, 12(a1)
+; CHECK-RV32C-NEXT: lh a1, 28(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sh a5, 14(a0)
-; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sh a4, 12(a0)
+; CHECK-RV32C-NEXT: sh a2, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
; CHECK-RV32C-NEXT: sh a3, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sh a7, 8(a0)
+; CHECK-RV32C-NEXT: sh a4, 12(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sh a1, 6(a0)
+; CHECK-RV32C-NEXT: sh a1, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sh a2, 4(a0)
+; CHECK-RV32C-NEXT: sh a6, 0(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sh t0, 2(a0)
+; CHECK-RV32C-NEXT: sh a7, 2(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sh a6, 0(a0)
+; CHECK-RV32C-NEXT: sh t0, 4(a0)
+; CHECK-RV32C-NEXT: c.ntl.s1
+; CHECK-RV32C-NEXT: sh a5, 6(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_S1_store_v8i16:
@@ -5535,66 +5535,66 @@ define void @test_nontemporal_S1_store_v8i16(ptr %p, <8 x i16> %v) {
define void @test_nontemporal_S1_store_v4i32(ptr %p, <4 x i32> %v) {
; CHECK-RV64-LABEL: test_nontemporal_S1_store_v4i32:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lw a2, 24(a1)
-; CHECK-RV64-NEXT: lw a3, 16(a1)
-; CHECK-RV64-NEXT: lw a4, 8(a1)
-; CHECK-RV64-NEXT: lw a1, 0(a1)
+; CHECK-RV64-NEXT: lw a2, 0(a1)
+; CHECK-RV64-NEXT: lw a3, 8(a1)
+; CHECK-RV64-NEXT: lw a4, 16(a1)
+; CHECK-RV64-NEXT: lw a1, 24(a1)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sw a2, 12(a0)
+; CHECK-RV64-NEXT: sw a2, 0(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sw a3, 8(a0)
+; CHECK-RV64-NEXT: sw a3, 4(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sw a4, 4(a0)
+; CHECK-RV64-NEXT: sw a4, 8(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sw a1, 0(a0)
+; CHECK-RV64-NEXT: sw a1, 12(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_S1_store_v4i32:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lw a2, 12(a1)
-; CHECK-RV32-NEXT: lw a3, 8(a1)
-; CHECK-RV32-NEXT: lw a4, 4(a1)
-; CHECK-RV32-NEXT: lw a1, 0(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sw a2, 12(a0)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sw a3, 8(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sw a4, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_S1_store_v4i32:
; CHECK-RV64C: # %bb.0:
-; CHECK-RV64C-NEXT: lw a2, 24(a1)
-; CHECK-RV64C-NEXT: lw a3, 16(a1)
-; CHECK-RV64C-NEXT: lw a4, 8(a1)
-; CHECK-RV64C-NEXT: lw a1, 0(a1)
+; CHECK-RV64C-NEXT: lw a2, 0(a1)
+; CHECK-RV64C-NEXT: lw a3, 8(a1)
+; CHECK-RV64C-NEXT: lw a4, 16(a1)
+; CHECK-RV64C-NEXT: lw a1, 24(a1)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sw a2, 12(a0)
+; CHECK-RV64C-NEXT: sw a2, 0(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sw a3, 8(a0)
+; CHECK-RV64C-NEXT: sw a3, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sw a4, 4(a0)
+; CHECK-RV64C-NEXT: sw a4, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sw a1, 0(a0)
+; CHECK-RV64C-NEXT: sw a1, 12(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_S1_store_v4i32:
; CHECK-RV32C: # %bb.0:
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_S1_store_v4i32:
@@ -5618,49 +5618,49 @@ define void @test_nontemporal_S1_store_v2i64(ptr %p, <2 x i64> %v) {
; CHECK-RV64-LABEL: test_nontemporal_S1_store_v2i64:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: ntl.s1
; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ntl.s1
+; CHECK-RV64-NEXT: sd a2, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_S1_store_v2i64:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lw a2, 12(a1)
-; CHECK-RV32-NEXT: lw a3, 8(a1)
-; CHECK-RV32-NEXT: lw a4, 4(a1)
-; CHECK-RV32-NEXT: lw a1, 0(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sw a2, 12(a0)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sw a3, 8(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sw a4, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_S1_store_v2i64:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: c.ntl.s1
; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: c.ntl.s1
+; CHECK-RV64C-NEXT: sd a2, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_S1_store_v2i64:
; CHECK-RV32C: # %bb.0:
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_S1_store_v2i64:
@@ -5994,53 +5994,53 @@ define <16 x i8> @test_nontemporal_ALL_load_v16i8(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_ALL_load_v16i8:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: ld a2, 8(a1)
+; CHECK-RV64-NEXT: ld a2, 0(a1)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: ld a1, 0(a1)
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ld a1, 8(a1)
+; CHECK-RV64-NEXT: sd a2, 0(a0)
+; CHECK-RV64-NEXT: sd a1, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_ALL_load_v16i8:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_v16i8:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: ld a2, 8(a1)
+; CHECK-RV64C-NEXT: ld a2, 0(a1)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: ld a1, 0(a1)
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: ld a1, 8(a1)
+; CHECK-RV64C-NEXT: sd a2, 0(a0)
+; CHECK-RV64C-NEXT: sd a1, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_v16i8:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_v16i8:
@@ -6064,53 +6064,53 @@ define <8 x i16> @test_nontemporal_ALL_load_v8i16(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_ALL_load_v8i16:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: ld a2, 8(a1)
+; CHECK-RV64-NEXT: ld a2, 0(a1)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: ld a1, 0(a1)
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ld a1, 8(a1)
+; CHECK-RV64-NEXT: sd a2, 0(a0)
+; CHECK-RV64-NEXT: sd a1, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_ALL_load_v8i16:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_v8i16:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: ld a2, 8(a1)
+; CHECK-RV64C-NEXT: ld a2, 0(a1)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: ld a1, 0(a1)
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: ld a1, 8(a1)
+; CHECK-RV64C-NEXT: sd a2, 0(a0)
+; CHECK-RV64C-NEXT: sd a1, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_v8i16:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_v8i16:
@@ -6134,53 +6134,53 @@ define <4 x i32> @test_nontemporal_ALL_load_v4i32(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_ALL_load_v4i32:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: ld a2, 8(a1)
+; CHECK-RV64-NEXT: ld a2, 0(a1)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: ld a1, 0(a1)
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ld a1, 8(a1)
+; CHECK-RV64-NEXT: sd a2, 0(a0)
+; CHECK-RV64-NEXT: sd a1, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_ALL_load_v4i32:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_v4i32:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: ld a2, 8(a1)
+; CHECK-RV64C-NEXT: ld a2, 0(a1)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: ld a1, 0(a1)
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: ld a1, 8(a1)
+; CHECK-RV64C-NEXT: sd a2, 0(a0)
+; CHECK-RV64C-NEXT: sd a1, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_v4i32:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_v4i32:
@@ -6213,17 +6213,17 @@ define <2 x i64> @test_nontemporal_ALL_load_v2i64(ptr %p) {
; CHECK-RV32-LABEL: test_nontemporal_ALL_load_v2i64:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a2, 12(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a3, 8(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a4, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: lw a1, 0(a1)
-; CHECK-RV32-NEXT: sw a2, 12(a0)
-; CHECK-RV32-NEXT: sw a3, 8(a0)
-; CHECK-RV32-NEXT: sw a4, 4(a0)
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_v2i64:
@@ -6238,17 +6238,17 @@ define <2 x i64> @test_nontemporal_ALL_load_v2i64(ptr %p) {
; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_v2i64:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_v2i64:
@@ -6278,9 +6278,9 @@ define void @test_nontemporal_ALL_store_i64(ptr %p, i64 %v) {
; CHECK-RV32-LABEL: test_nontemporal_ALL_store_i64:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a2, 4(a0)
-; CHECK-RV32-NEXT: ntl.all
; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: ntl.all
+; CHECK-RV32-NEXT: sw a2, 4(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_i64:
@@ -6292,9 +6292,9 @@ define void @test_nontemporal_ALL_store_i64(ptr %p, i64 %v) {
; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_i64:
; CHECK-RV32C: # %bb.0:
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a2, 4(a0)
-; CHECK-RV32C-NEXT: c.ntl.all
; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: c.ntl.all
+; CHECK-RV32C-NEXT: sw a2, 4(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_i64:
@@ -6306,9 +6306,9 @@ define void @test_nontemporal_ALL_store_i64(ptr %p, i64 %v) {
; CHECK-RV32V-LABEL: test_nontemporal_ALL_store_i64:
; CHECK-RV32V: # %bb.0:
; CHECK-RV32V-NEXT: ntl.all
-; CHECK-RV32V-NEXT: sw a2, 4(a0)
-; CHECK-RV32V-NEXT: ntl.all
; CHECK-RV32V-NEXT: sw a1, 0(a0)
+; CHECK-RV32V-NEXT: ntl.all
+; CHECK-RV32V-NEXT: sw a2, 4(a0)
; CHECK-RV32V-NEXT: ret
store i64 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
ret void
@@ -6571,46 +6571,46 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64-NEXT: lbu a7, 40(a1)
; CHECK-RV64-NEXT: lbu t0, 48(a1)
; CHECK-RV64-NEXT: lbu t1, 56(a1)
-; CHECK-RV64-NEXT: lbu t2, 96(a1)
-; CHECK-RV64-NEXT: lbu t3, 104(a1)
-; CHECK-RV64-NEXT: lbu t4, 112(a1)
-; CHECK-RV64-NEXT: lbu t5, 120(a1)
-; CHECK-RV64-NEXT: lbu t6, 64(a1)
-; CHECK-RV64-NEXT: lbu s0, 72(a1)
-; CHECK-RV64-NEXT: lbu s1, 80(a1)
-; CHECK-RV64-NEXT: lbu a1, 88(a1)
+; CHECK-RV64-NEXT: lbu t2, 64(a1)
+; CHECK-RV64-NEXT: lbu t3, 72(a1)
+; CHECK-RV64-NEXT: lbu t4, 80(a1)
+; CHECK-RV64-NEXT: lbu t5, 88(a1)
+; CHECK-RV64-NEXT: lbu t6, 96(a1)
+; CHECK-RV64-NEXT: lbu s0, 104(a1)
+; CHECK-RV64-NEXT: lbu s1, 112(a1)
+; CHECK-RV64-NEXT: lbu a1, 120(a1)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t5, 15(a0)
+; CHECK-RV64-NEXT: sb t6, 12(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t4, 14(a0)
+; CHECK-RV64-NEXT: sb s0, 13(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t3, 13(a0)
+; CHECK-RV64-NEXT: sb s1, 14(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t2, 12(a0)
+; CHECK-RV64-NEXT: sb a1, 15(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb a1, 11(a0)
+; CHECK-RV64-NEXT: sb t2, 8(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb s1, 10(a0)
+; CHECK-RV64-NEXT: sb t3, 9(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb s0, 9(a0)
+; CHECK-RV64-NEXT: sb t4, 10(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t6, 8(a0)
+; CHECK-RV64-NEXT: sb t5, 11(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t1, 7(a0)
-; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t0, 6(a0)
+; CHECK-RV64-NEXT: sb a6, 4(a0)
; CHECK-RV64-NEXT: ntl.all
; CHECK-RV64-NEXT: sb a7, 5(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb a6, 4(a0)
+; CHECK-RV64-NEXT: sb t0, 6(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb a5, 3(a0)
+; CHECK-RV64-NEXT: sb t1, 7(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb a4, 2(a0)
+; CHECK-RV64-NEXT: sb a2, 0(a0)
; CHECK-RV64-NEXT: ntl.all
; CHECK-RV64-NEXT: sb a3, 1(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb a2, 0(a0)
+; CHECK-RV64-NEXT: sb a4, 2(a0)
+; CHECK-RV64-NEXT: ntl.all
+; CHECK-RV64-NEXT: sb a5, 3(a0)
; CHECK-RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; CHECK-RV64-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
; CHECK-RV64-NEXT: addi sp, sp, 16
@@ -6632,46 +6632,46 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32-NEXT: lbu a7, 20(a1)
; CHECK-RV32-NEXT: lbu t0, 24(a1)
; CHECK-RV32-NEXT: lbu t1, 28(a1)
-; CHECK-RV32-NEXT: lbu t2, 48(a1)
-; CHECK-RV32-NEXT: lbu t3, 52(a1)
-; CHECK-RV32-NEXT: lbu t4, 56(a1)
-; CHECK-RV32-NEXT: lbu t5, 60(a1)
-; CHECK-RV32-NEXT: lbu t6, 32(a1)
-; CHECK-RV32-NEXT: lbu s0, 36(a1)
-; CHECK-RV32-NEXT: lbu s1, 40(a1)
-; CHECK-RV32-NEXT: lbu a1, 44(a1)
-; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t5, 15(a0)
+; CHECK-RV32-NEXT: lbu t2, 32(a1)
+; CHECK-RV32-NEXT: lbu t3, 36(a1)
+; CHECK-RV32-NEXT: lbu t4, 40(a1)
+; CHECK-RV32-NEXT: lbu t5, 44(a1)
+; CHECK-RV32-NEXT: lbu t6, 48(a1)
+; CHECK-RV32-NEXT: lbu s0, 52(a1)
+; CHECK-RV32-NEXT: lbu s1, 56(a1)
+; CHECK-RV32-NEXT: lbu a1, 60(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t4, 14(a0)
+; CHECK-RV32-NEXT: sb t6, 12(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t3, 13(a0)
+; CHECK-RV32-NEXT: sb s0, 13(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t2, 12(a0)
+; CHECK-RV32-NEXT: sb s1, 14(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb a1, 11(a0)
+; CHECK-RV32-NEXT: sb a1, 15(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb s1, 10(a0)
+; CHECK-RV32-NEXT: sb t2, 8(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb s0, 9(a0)
+; CHECK-RV32-NEXT: sb t3, 9(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t6, 8(a0)
+; CHECK-RV32-NEXT: sb t4, 10(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t1, 7(a0)
+; CHECK-RV32-NEXT: sb t5, 11(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t0, 6(a0)
+; CHECK-RV32-NEXT: sb a6, 4(a0)
; CHECK-RV32-NEXT: ntl.all
; CHECK-RV32-NEXT: sb a7, 5(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb a6, 4(a0)
+; CHECK-RV32-NEXT: sb t0, 6(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb a5, 3(a0)
+; CHECK-RV32-NEXT: sb t1, 7(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb a4, 2(a0)
+; CHECK-RV32-NEXT: sb a2, 0(a0)
; CHECK-RV32-NEXT: ntl.all
; CHECK-RV32-NEXT: sb a3, 1(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb a2, 0(a0)
+; CHECK-RV32-NEXT: sb a4, 2(a0)
+; CHECK-RV32-NEXT: ntl.all
+; CHECK-RV32-NEXT: sb a5, 3(a0)
; CHECK-RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT: addi sp, sp, 16
@@ -6693,46 +6693,46 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64C-NEXT: lbu t3, 40(a1)
; CHECK-RV64C-NEXT: lbu t4, 48(a1)
; CHECK-RV64C-NEXT: lbu t5, 56(a1)
-; CHECK-RV64C-NEXT: lbu a2, 96(a1)
-; CHECK-RV64C-NEXT: lbu a3, 104(a1)
-; CHECK-RV64C-NEXT: lbu a4, 112(a1)
-; CHECK-RV64C-NEXT: lbu a5, 120(a1)
; CHECK-RV64C-NEXT: lbu t6, 64(a1)
-; CHECK-RV64C-NEXT: lbu s0, 72(a1)
-; CHECK-RV64C-NEXT: lbu s1, 80(a1)
-; CHECK-RV64C-NEXT: lbu a1, 88(a1)
-; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a5, 15(a0)
-; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a4, 14(a0)
-; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a3, 13(a0)
+; CHECK-RV64C-NEXT: lbu a3, 72(a1)
+; CHECK-RV64C-NEXT: lbu a4, 80(a1)
+; CHECK-RV64C-NEXT: lbu a5, 88(a1)
+; CHECK-RV64C-NEXT: lbu a2, 96(a1)
+; CHECK-RV64C-NEXT: lbu s0, 104(a1)
+; CHECK-RV64C-NEXT: lbu s1, 112(a1)
+; CHECK-RV64C-NEXT: lbu a1, 120(a1)
; CHECK-RV64C-NEXT: c.ntl.all
; CHECK-RV64C-NEXT: sb a2, 12(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a1, 11(a0)
+; CHECK-RV64C-NEXT: sb s0, 13(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb s1, 10(a0)
+; CHECK-RV64C-NEXT: sb s1, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb s0, 9(a0)
+; CHECK-RV64C-NEXT: sb a1, 15(a0)
; CHECK-RV64C-NEXT: c.ntl.all
; CHECK-RV64C-NEXT: sb t6, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb t5, 7(a0)
+; CHECK-RV64C-NEXT: sb a3, 9(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb t4, 6(a0)
+; CHECK-RV64C-NEXT: sb a4, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb t3, 5(a0)
+; CHECK-RV64C-NEXT: sb a5, 11(a0)
; CHECK-RV64C-NEXT: c.ntl.all
; CHECK-RV64C-NEXT: sb t2, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb t1, 3(a0)
+; CHECK-RV64C-NEXT: sb t3, 5(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb t0, 2(a0)
+; CHECK-RV64C-NEXT: sb t4, 6(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a7, 1(a0)
+; CHECK-RV64C-NEXT: sb t5, 7(a0)
; CHECK-RV64C-NEXT: c.ntl.all
; CHECK-RV64C-NEXT: sb a6, 0(a0)
+; CHECK-RV64C-NEXT: c.ntl.all
+; CHECK-RV64C-NEXT: sb a7, 1(a0)
+; CHECK-RV64C-NEXT: c.ntl.all
+; CHECK-RV64C-NEXT: sb t0, 2(a0)
+; CHECK-RV64C-NEXT: c.ntl.all
+; CHECK-RV64C-NEXT: sb t1, 3(a0)
; CHECK-RV64C-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; CHECK-RV64C-NEXT: ld s1, 0(sp) # 8-byte Folded Reload
; CHECK-RV64C-NEXT: addi sp, sp, 16
@@ -6754,46 +6754,46 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32C-NEXT: lbu t3, 20(a1)
; CHECK-RV32C-NEXT: lbu t4, 24(a1)
; CHECK-RV32C-NEXT: lbu t5, 28(a1)
-; CHECK-RV32C-NEXT: lbu a2, 48(a1)
-; CHECK-RV32C-NEXT: lbu a3, 52(a1)
-; CHECK-RV32C-NEXT: lbu a4, 56(a1)
-; CHECK-RV32C-NEXT: lbu a5, 60(a1)
; CHECK-RV32C-NEXT: lbu t6, 32(a1)
-; CHECK-RV32C-NEXT: lbu s0, 36(a1)
-; CHECK-RV32C-NEXT: lbu s1, 40(a1)
-; CHECK-RV32C-NEXT: lbu a1, 44(a1)
-; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a5, 15(a0)
-; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a4, 14(a0)
-; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a3, 13(a0)
+; CHECK-RV32C-NEXT: lbu a3, 36(a1)
+; CHECK-RV32C-NEXT: lbu a4, 40(a1)
+; CHECK-RV32C-NEXT: lbu a5, 44(a1)
+; CHECK-RV32C-NEXT: lbu a2, 48(a1)
+; CHECK-RV32C-NEXT: lbu s0, 52(a1)
+; CHECK-RV32C-NEXT: lbu s1, 56(a1)
+; CHECK-RV32C-NEXT: lbu a1, 60(a1)
; CHECK-RV32C-NEXT: c.ntl.all
; CHECK-RV32C-NEXT: sb a2, 12(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a1, 11(a0)
+; CHECK-RV32C-NEXT: sb s0, 13(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb s1, 10(a0)
+; CHECK-RV32C-NEXT: sb s1, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb s0, 9(a0)
+; CHECK-RV32C-NEXT: sb a1, 15(a0)
; CHECK-RV32C-NEXT: c.ntl.all
; CHECK-RV32C-NEXT: sb t6, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb t5, 7(a0)
+; CHECK-RV32C-NEXT: sb a3, 9(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb t4, 6(a0)
+; CHECK-RV32C-NEXT: sb a4, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb t3, 5(a0)
+; CHECK-RV32C-NEXT: sb a5, 11(a0)
; CHECK-RV32C-NEXT: c.ntl.all
; CHECK-RV32C-NEXT: sb t2, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb t1, 3(a0)
+; CHECK-RV32C-NEXT: sb t3, 5(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb t0, 2(a0)
+; CHECK-RV32C-NEXT: sb t4, 6(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a7, 1(a0)
+; CHECK-RV32C-NEXT: sb t5, 7(a0)
; CHECK-RV32C-NEXT: c.ntl.all
; CHECK-RV32C-NEXT: sb a6, 0(a0)
+; CHECK-RV32C-NEXT: c.ntl.all
+; CHECK-RV32C-NEXT: sb a7, 1(a0)
+; CHECK-RV32C-NEXT: c.ntl.all
+; CHECK-RV32C-NEXT: sb t0, 2(a0)
+; CHECK-RV32C-NEXT: c.ntl.all
+; CHECK-RV32C-NEXT: sb t1, 3(a0)
; CHECK-RV32C-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; CHECK-RV32C-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; CHECK-RV32C-NEXT: addi sp, sp, 16
@@ -6819,114 +6819,114 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) {
define void @test_nontemporal_ALL_store_v8i16(ptr %p, <8 x i16> %v) {
; CHECK-RV64-LABEL: test_nontemporal_ALL_store_v8i16:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lh a2, 32(a1)
-; CHECK-RV64-NEXT: lh a3, 40(a1)
-; CHECK-RV64-NEXT: lh a4, 48(a1)
-; CHECK-RV64-NEXT: lh a5, 56(a1)
-; CHECK-RV64-NEXT: lh a6, 0(a1)
-; CHECK-RV64-NEXT: lh a7, 8(a1)
-; CHECK-RV64-NEXT: lh t0, 16(a1)
-; CHECK-RV64-NEXT: lh a1, 24(a1)
+; CHECK-RV64-NEXT: lh a2, 0(a1)
+; CHECK-RV64-NEXT: lh a3, 8(a1)
+; CHECK-RV64-NEXT: lh a4, 16(a1)
+; CHECK-RV64-NEXT: lh a5, 24(a1)
+; CHECK-RV64-NEXT: lh a6, 32(a1)
+; CHECK-RV64-NEXT: lh a7, 40(a1)
+; CHECK-RV64-NEXT: lh t0, 48(a1)
+; CHECK-RV64-NEXT: lh a1, 56(a1)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a5, 14(a0)
+; CHECK-RV64-NEXT: sh a6, 8(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a4, 12(a0)
+; CHECK-RV64-NEXT: sh a7, 10(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a3, 10(a0)
+; CHECK-RV64-NEXT: sh t0, 12(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a2, 8(a0)
+; CHECK-RV64-NEXT: sh a1, 14(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a1, 6(a0)
+; CHECK-RV64-NEXT: sh a2, 0(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh t0, 4(a0)
+; CHECK-RV64-NEXT: sh a3, 2(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a7, 2(a0)
+; CHECK-RV64-NEXT: sh a4, 4(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a6, 0(a0)
+; CHECK-RV64-NEXT: sh a5, 6(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_ALL_store_v8i16:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lh a2, 16(a1)
-; CHECK-RV32-NEXT: lh a3, 20(a1)
-; CHECK-RV32-NEXT: lh a4, 24(a1)
-; CHECK-RV32-NEXT: lh a5, 28(a1)
-; CHECK-RV32-NEXT: lh a6, 0(a1)
-; CHECK-RV32-NEXT: lh a7, 4(a1)
-; CHECK-RV32-NEXT: lh t0, 8(a1)
-; CHECK-RV32-NEXT: lh a1, 12(a1)
+; CHECK-RV32-NEXT: lh a2, 0(a1)
+; CHECK-RV32-NEXT: lh a3, 4(a1)
+; CHECK-RV32-NEXT: lh a4, 8(a1)
+; CHECK-RV32-NEXT: lh a5, 12(a1)
+; CHECK-RV32-NEXT: lh a6, 16(a1)
+; CHECK-RV32-NEXT: lh a7, 20(a1)
+; CHECK-RV32-NEXT: lh t0, 24(a1)
+; CHECK-RV32-NEXT: lh a1, 28(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a5, 14(a0)
+; CHECK-RV32-NEXT: sh a6, 8(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a4, 12(a0)
+; CHECK-RV32-NEXT: sh a7, 10(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a3, 10(a0)
+; CHECK-RV32-NEXT: sh t0, 12(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a2, 8(a0)
+; CHECK-RV32-NEXT: sh a1, 14(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a1, 6(a0)
+; CHECK-RV32-NEXT: sh a2, 0(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh t0, 4(a0)
+; CHECK-RV32-NEXT: sh a3, 2(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a7, 2(a0)
+; CHECK-RV32-NEXT: sh a4, 4(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a6, 0(a0)
+; CHECK-RV32-NEXT: sh a5, 6(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_v8i16:
; CHECK-RV64C: # %bb.0:
-; CHECK-RV64C-NEXT: lh a7, 32(a1)
+; CHECK-RV64C-NEXT: lh a6, 0(a1)
+; CHECK-RV64C-NEXT: lh a7, 8(a1)
+; CHECK-RV64C-NEXT: lh t0, 16(a1)
+; CHECK-RV64C-NEXT: lh a5, 24(a1)
+; CHECK-RV64C-NEXT: lh a2, 32(a1)
; CHECK-RV64C-NEXT: lh a3, 40(a1)
; CHECK-RV64C-NEXT: lh a4, 48(a1)
-; CHECK-RV64C-NEXT: lh a5, 56(a1)
-; CHECK-RV64C-NEXT: lh a6, 0(a1)
-; CHECK-RV64C-NEXT: lh t0, 8(a1)
-; CHECK-RV64C-NEXT: lh a2, 16(a1)
-; CHECK-RV64C-NEXT: lh a1, 24(a1)
-; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a5, 14(a0)
+; CHECK-RV64C-NEXT: lh a1, 56(a1)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a4, 12(a0)
+; CHECK-RV64C-NEXT: sh a2, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.all
; CHECK-RV64C-NEXT: sh a3, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a7, 8(a0)
+; CHECK-RV64C-NEXT: sh a4, 12(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a1, 6(a0)
+; CHECK-RV64C-NEXT: sh a1, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a2, 4(a0)
+; CHECK-RV64C-NEXT: sh a6, 0(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh t0, 2(a0)
+; CHECK-RV64C-NEXT: sh a7, 2(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a6, 0(a0)
+; CHECK-RV64C-NEXT: sh t0, 4(a0)
+; CHECK-RV64C-NEXT: c.ntl.all
+; CHECK-RV64C-NEXT: sh a5, 6(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_v8i16:
; CHECK-RV32C: # %bb.0:
-; CHECK-RV32C-NEXT: lh a7, 16(a1)
+; CHECK-RV32C-NEXT: lh a6, 0(a1)
+; CHECK-RV32C-NEXT: lh a7, 4(a1)
+; CHECK-RV32C-NEXT: lh t0, 8(a1)
+; CHECK-RV32C-NEXT: lh a5, 12(a1)
+; CHECK-RV32C-NEXT: lh a2, 16(a1)
; CHECK-RV32C-NEXT: lh a3, 20(a1)
; CHECK-RV32C-NEXT: lh a4, 24(a1)
-; CHECK-RV32C-NEXT: lh a5, 28(a1)
-; CHECK-RV32C-NEXT: lh a6, 0(a1)
-; CHECK-RV32C-NEXT: lh t0, 4(a1)
-; CHECK-RV32C-NEXT: lh a2, 8(a1)
-; CHECK-RV32C-NEXT: lh a1, 12(a1)
-; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a5, 14(a0)
+; CHECK-RV32C-NEXT: lh a1, 28(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a4, 12(a0)
+; CHECK-RV32C-NEXT: sh a2, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.all
; CHECK-RV32C-NEXT: sh a3, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a7, 8(a0)
+; CHECK-RV32C-NEXT: sh a4, 12(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a1, 6(a0)
+; CHECK-RV32C-NEXT: sh a1, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a2, 4(a0)
+; CHECK-RV32C-NEXT: sh a6, 0(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh t0, 2(a0)
+; CHECK-RV32C-NEXT: sh a7, 2(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a6, 0(a0)
+; CHECK-RV32C-NEXT: sh t0, 4(a0)
+; CHECK-RV32C-NEXT: c.ntl.all
+; CHECK-RV32C-NEXT: sh a5, 6(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_v8i16:
@@ -6949,66 +6949,66 @@ define void @test_nontemporal_ALL_store_v8i16(ptr %p, <8 x i16> %v) {
define void @test_nontemporal_ALL_store_v4i32(ptr %p, <4 x i32> %v) {
; CHECK-RV64-LABEL: test_nontemporal_ALL_store_v4i32:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lw a2, 24(a1)
-; CHECK-RV64-NEXT: lw a3, 16(a1)
-; CHECK-RV64-NEXT: lw a4, 8(a1)
-; CHECK-RV64-NEXT: lw a1, 0(a1)
+; CHECK-RV64-NEXT: lw a2, 0(a1)
+; CHECK-RV64-NEXT: lw a3, 8(a1)
+; CHECK-RV64-NEXT: lw a4, 16(a1)
+; CHECK-RV64-NEXT: lw a1, 24(a1)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sw a2, 12(a0)
+; CHECK-RV64-NEXT: sw a2, 0(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sw a3, 8(a0)
+; CHECK-RV64-NEXT: sw a3, 4(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sw a4, 4(a0)
+; CHECK-RV64-NEXT: sw a4, 8(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sw a1, 0(a0)
+; CHECK-RV64-NEXT: sw a1, 12(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_ALL_store_v4i32:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lw a2, 12(a1)
-; CHECK-RV32-NEXT: lw a3, 8(a1)
-; CHECK-RV32-NEXT: lw a4, 4(a1)
-; CHECK-RV32-NEXT: lw a1, 0(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a2, 12(a0)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a3, 8(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a4, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_v4i32:
; CHECK-RV64C: # %bb.0:
-; CHECK-RV64C-NEXT: lw a2, 24(a1)
-; CHECK-RV64C-NEXT: lw a3, 16(a1)
-; CHECK-RV64C-NEXT: lw a4, 8(a1)
-; CHECK-RV64C-NEXT: lw a1, 0(a1)
+; CHECK-RV64C-NEXT: lw a2, 0(a1)
+; CHECK-RV64C-NEXT: lw a3, 8(a1)
+; CHECK-RV64C-NEXT: lw a4, 16(a1)
+; CHECK-RV64C-NEXT: lw a1, 24(a1)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sw a2, 12(a0)
+; CHECK-RV64C-NEXT: sw a2, 0(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sw a3, 8(a0)
+; CHECK-RV64C-NEXT: sw a3, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sw a4, 4(a0)
+; CHECK-RV64C-NEXT: sw a4, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sw a1, 0(a0)
+; CHECK-RV64C-NEXT: sw a1, 12(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_v4i32:
; CHECK-RV32C: # %bb.0:
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_v4i32:
@@ -7032,49 +7032,49 @@ define void @test_nontemporal_ALL_store_v2i64(ptr %p, <2 x i64> %v) {
; CHECK-RV64-LABEL: test_nontemporal_ALL_store_v2i64:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sd a2, 8(a0)
-; CHECK-RV64-NEXT: ntl.all
; CHECK-RV64-NEXT: sd a1, 0(a0)
+; CHECK-RV64-NEXT: ntl.all
+; CHECK-RV64-NEXT: sd a2, 8(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_ALL_store_v2i64:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lw a2, 12(a1)
-; CHECK-RV32-NEXT: lw a3, 8(a1)
-; CHECK-RV32-NEXT: lw a4, 4(a1)
-; CHECK-RV32-NEXT: lw a1, 0(a1)
+; CHECK-RV32-NEXT: lw a2, 0(a1)
+; CHECK-RV32-NEXT: lw a3, 4(a1)
+; CHECK-RV32-NEXT: lw a4, 8(a1)
+; CHECK-RV32-NEXT: lw a1, 12(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a2, 12(a0)
+; CHECK-RV32-NEXT: sw a2, 0(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a3, 8(a0)
+; CHECK-RV32-NEXT: sw a3, 4(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a4, 4(a0)
+; CHECK-RV32-NEXT: sw a4, 8(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: sw a1, 12(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_v2i64:
; CHECK-RV64C: # %bb.0:
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sd a2, 8(a0)
-; CHECK-RV64C-NEXT: c.ntl.all
; CHECK-RV64C-NEXT: sd a1, 0(a0)
+; CHECK-RV64C-NEXT: c.ntl.all
+; CHECK-RV64C-NEXT: sd a2, 8(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_v2i64:
; CHECK-RV32C: # %bb.0:
-; CHECK-RV32C-NEXT: lw a2, 12(a1)
-; CHECK-RV32C-NEXT: lw a3, 8(a1)
-; CHECK-RV32C-NEXT: lw a4, 4(a1)
-; CHECK-RV32C-NEXT: lw a1, 0(a1)
+; CHECK-RV32C-NEXT: lw a2, 0(a1)
+; CHECK-RV32C-NEXT: lw a3, 4(a1)
+; CHECK-RV32C-NEXT: lw a4, 8(a1)
+; CHECK-RV32C-NEXT: lw a1, 12(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a2, 12(a0)
+; CHECK-RV32C-NEXT: sw a2, 0(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a3, 8(a0)
+; CHECK-RV32C-NEXT: sw a3, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a4, 4(a0)
+; CHECK-RV32C-NEXT: sw a4, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sw a1, 0(a0)
+; CHECK-RV32C-NEXT: sw a1, 12(a0)
; CHECK-RV32C-NEXT: ret
;
; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_v2i64:
diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
index fe602b5b8fc2bc..0d571600a2d813 100644
--- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
@@ -757,8 +757,8 @@ define i1 @uaddo_i42_increment_illegal_type(i42 %x, ptr %p) {
; RV64-NEXT: slli a0, a2, 22
; RV64-NEXT: srli a3, a0, 22
; RV64-NEXT: seqz a0, a3
-; RV64-NEXT: sw a2, 0(a1)
; RV64-NEXT: srli a3, a3, 32
+; RV64-NEXT: sw a2, 0(a1)
; RV64-NEXT: sh a3, 4(a1)
; RV64-NEXT: ret
%a = add i42 %x, 1
@@ -1247,8 +1247,8 @@ define void @PR41129(ptr %p64) {
; RV32-NEXT: beqz a3, .LBB37_2
; RV32-NEXT: # %bb.1: # %false
; RV32-NEXT: andi a1, a1, 7
-; RV32-NEXT: sw zero, 4(a0)
; RV32-NEXT: sw a1, 0(a0)
+; RV32-NEXT: sw zero, 4(a0)
; RV32-NEXT: ret
; RV32-NEXT: .LBB37_2: # %true
; RV32-NEXT: seqz a3, a1
diff --git a/llvm/test/CodeGen/RISCV/pr63816.ll b/llvm/test/CodeGen/RISCV/pr63816.ll
index 2e33a0e994996a..75ddeda3de5071 100644
--- a/llvm/test/CodeGen/RISCV/pr63816.ll
+++ b/llvm/test/CodeGen/RISCV/pr63816.ll
@@ -55,14 +55,14 @@ define void @test(ptr %0, ptr %1) nounwind {
; CHECK-NEXT: fcvt.d.s fs0, fs0
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: fcvt.d.s fa5, fa0
-; CHECK-NEXT: fsd fa5, 56(s0)
-; CHECK-NEXT: fsd fs0, 48(s0)
-; CHECK-NEXT: fsd fs1, 40(s0)
; CHECK-NEXT: fsd fs2, 32(s0)
-; CHECK-NEXT: fsd fs3, 24(s0)
-; CHECK-NEXT: fsd fs4, 16(s0)
-; CHECK-NEXT: fsd fs5, 8(s0)
+; CHECK-NEXT: fsd fs1, 40(s0)
+; CHECK-NEXT: fsd fs0, 48(s0)
+; CHECK-NEXT: fsd fa5, 56(s0)
; CHECK-NEXT: fsd fs6, 0(s0)
+; CHECK-NEXT: fsd fs5, 8(s0)
+; CHECK-NEXT: fsd fs4, 16(s0)
+; CHECK-NEXT: fsd fs3, 24(s0)
; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/push-pop-popret.ll b/llvm/test/CodeGen/RISCV/push-pop-popret.ll
index 85c2997e268a94..5cb00e617273a6 100644
--- a/llvm/test/CodeGen/RISCV/push-pop-popret.ll
+++ b/llvm/test/CodeGen/RISCV/push-pop-popret.ll
@@ -1000,13 +1000,13 @@ define i32 @varargs(ptr %fmt, ...) nounwind {
; RV32IZCMP: # %bb.0:
; RV32IZCMP-NEXT: addi sp, sp, -48
; RV32IZCMP-NEXT: mv a0, a1
-; RV32IZCMP-NEXT: sw a7, 44(sp)
-; RV32IZCMP-NEXT: sw a6, 40(sp)
; RV32IZCMP-NEXT: sw a5, 36(sp)
-; RV32IZCMP-NEXT: sw a4, 32(sp)
-; RV32IZCMP-NEXT: sw a3, 28(sp)
-; RV32IZCMP-NEXT: sw a2, 24(sp)
+; RV32IZCMP-NEXT: sw a6, 40(sp)
+; RV32IZCMP-NEXT: sw a7, 44(sp)
; RV32IZCMP-NEXT: sw a1, 20(sp)
+; RV32IZCMP-NEXT: sw a2, 24(sp)
+; RV32IZCMP-NEXT: sw a3, 28(sp)
+; RV32IZCMP-NEXT: sw a4, 32(sp)
; RV32IZCMP-NEXT: addi a1, sp, 24
; RV32IZCMP-NEXT: sw a1, 12(sp)
; RV32IZCMP-NEXT: addi sp, sp, 48
@@ -1016,15 +1016,15 @@ define i32 @varargs(ptr %fmt, ...) nounwind {
; RV64IZCMP: # %bb.0:
; RV64IZCMP-NEXT: addi sp, sp, -80
; RV64IZCMP-NEXT: sd a1, 24(sp)
-; RV64IZCMP-NEXT: sd a7, 72(sp)
-; RV64IZCMP-NEXT: sd a6, 64(sp)
; RV64IZCMP-NEXT: addi a0, sp, 28
; RV64IZCMP-NEXT: sd a0, 8(sp)
; RV64IZCMP-NEXT: lw a0, 24(sp)
; RV64IZCMP-NEXT: sd a5, 56(sp)
-; RV64IZCMP-NEXT: sd a4, 48(sp)
-; RV64IZCMP-NEXT: sd a3, 40(sp)
+; RV64IZCMP-NEXT: sd a6, 64(sp)
+; RV64IZCMP-NEXT: sd a7, 72(sp)
; RV64IZCMP-NEXT: sd a2, 32(sp)
+; RV64IZCMP-NEXT: sd a3, 40(sp)
+; RV64IZCMP-NEXT: sd a4, 48(sp)
; RV64IZCMP-NEXT: addi sp, sp, 80
; RV64IZCMP-NEXT: ret
;
@@ -1032,13 +1032,13 @@ define i32 @varargs(ptr %fmt, ...) nounwind {
; RV32IZCMP-SR: # %bb.0:
; RV32IZCMP-SR-NEXT: addi sp, sp, -48
; RV32IZCMP-SR-NEXT: mv a0, a1
-; RV32IZCMP-SR-NEXT: sw a7, 44(sp)
-; RV32IZCMP-SR-NEXT: sw a6, 40(sp)
; RV32IZCMP-SR-NEXT: sw a5, 36(sp)
-; RV32IZCMP-SR-NEXT: sw a4, 32(sp)
-; RV32IZCMP-SR-NEXT: sw a3, 28(sp)
-; RV32IZCMP-SR-NEXT: sw a2, 24(sp)
+; RV32IZCMP-SR-NEXT: sw a6, 40(sp)
+; RV32IZCMP-SR-NEXT: sw a7, 44(sp)
; RV32IZCMP-SR-NEXT: sw a1, 20(sp)
+; RV32IZCMP-SR-NEXT: sw a2, 24(sp)
+; RV32IZCMP-SR-NEXT: sw a3, 28(sp)
+; RV32IZCMP-SR-NEXT: sw a4, 32(sp)
; RV32IZCMP-SR-NEXT: addi a1, sp, 24
; RV32IZCMP-SR-NEXT: sw a1, 12(sp)
; RV32IZCMP-SR-NEXT: addi sp, sp, 48
@@ -1048,15 +1048,15 @@ define i32 @varargs(ptr %fmt, ...) nounwind {
; RV64IZCMP-SR: # %bb.0:
; RV64IZCMP-SR-NEXT: addi sp, sp, -80
; RV64IZCMP-SR-NEXT: sd a1, 24(sp)
-; RV64IZCMP-SR-NEXT: sd a7, 72(sp)
-; RV64IZCMP-SR-NEXT: sd a6, 64(sp)
; RV64IZCMP-SR-NEXT: addi a0, sp, 28
; RV64IZCMP-SR-NEXT: sd a0, 8(sp)
; RV64IZCMP-SR-NEXT: lw a0, 24(sp)
; RV64IZCMP-SR-NEXT: sd a5, 56(sp)
-; RV64IZCMP-SR-NEXT: sd a4, 48(sp)
-; RV64IZCMP-SR-NEXT: sd a3, 40(sp)
+; RV64IZCMP-SR-NEXT: sd a6, 64(sp)
+; RV64IZCMP-SR-NEXT: sd a7, 72(sp)
; RV64IZCMP-SR-NEXT: sd a2, 32(sp)
+; RV64IZCMP-SR-NEXT: sd a3, 40(sp)
+; RV64IZCMP-SR-NEXT: sd a4, 48(sp)
; RV64IZCMP-SR-NEXT: addi sp, sp, 80
; RV64IZCMP-SR-NEXT: ret
;
@@ -1064,13 +1064,13 @@ define i32 @varargs(ptr %fmt, ...) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -48
; RV32I-NEXT: mv a0, a1
-; RV32I-NEXT: sw a7, 44(sp)
-; RV32I-NEXT: sw a6, 40(sp)
; RV32I-NEXT: sw a5, 36(sp)
-; RV32I-NEXT: sw a4, 32(sp)
-; RV32I-NEXT: sw a3, 28(sp)
-; RV32I-NEXT: sw a2, 24(sp)
+; RV32I-NEXT: sw a6, 40(sp)
+; RV32I-NEXT: sw a7, 44(sp)
; RV32I-NEXT: sw a1, 20(sp)
+; RV32I-NEXT: sw a2, 24(sp)
+; RV32I-NEXT: sw a3, 28(sp)
+; RV32I-NEXT: sw a4, 32(sp)
; RV32I-NEXT: addi a1, sp, 24
; RV32I-NEXT: sw a1, 12(sp)
; RV32I-NEXT: addi sp, sp, 48
@@ -1080,15 +1080,15 @@ define i32 @varargs(ptr %fmt, ...) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -80
; RV64I-NEXT: sd a1, 24(sp)
-; RV64I-NEXT: sd a7, 72(sp)
-; RV64I-NEXT: sd a6, 64(sp)
; RV64I-NEXT: addi a0, sp, 28
; RV64I-NEXT: sd a0, 8(sp)
; RV64I-NEXT: lw a0, 24(sp)
; RV64I-NEXT: sd a5, 56(sp)
-; RV64I-NEXT: sd a4, 48(sp)
-; RV64I-NEXT: sd a3, 40(sp)
+; RV64I-NEXT: sd a6, 64(sp)
+; RV64I-NEXT: sd a7, 72(sp)
; RV64I-NEXT: sd a2, 32(sp)
+; RV64I-NEXT: sd a3, 40(sp)
+; RV64I-NEXT: sd a4, 48(sp)
; RV64I-NEXT: addi sp, sp, 80
; RV64I-NEXT: ret
%va = alloca ptr
diff --git a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
index 31ca8eab33508c..352184c2d85ada 100644
--- a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
@@ -65,8 +65,8 @@ define void @test2(ptr nocapture noundef %a, i32 noundef signext %n) {
; CHECK-NEXT: lw a5, -4(a4)
; CHECK-NEXT: lw a6, 0(a4)
; CHECK-NEXT: addi a5, a5, 4
-; CHECK-NEXT: sw a5, -4(a4)
; CHECK-NEXT: addi a6, a6, 4
+; CHECK-NEXT: sw a5, -4(a4)
; CHECK-NEXT: sw a6, 0(a4)
; CHECK-NEXT: addi a3, a3, 2
; CHECK-NEXT: addi a4, a4, 8
diff --git a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
index 957f44f9f669de..4901e268ec11a0 100644
--- a/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-shifted-extend.ll
@@ -8,9 +8,9 @@ define void @test(ptr nocapture noundef writeonly %array1, i32 noundef signext %
; RV64-NEXT: addiw a3, a1, 5
; RV64-NEXT: slli a4, a3, 2
; RV64-NEXT: add a4, a0, a4
-; RV64-NEXT: sw a2, 0(a4)
; RV64-NEXT: slli a1, a1, 2
; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: sw a2, 0(a4)
; RV64-NEXT: sw a2, 24(a0)
; RV64-NEXT: sw a3, 140(a0)
; RV64-NEXT: ret
@@ -42,9 +42,9 @@ define void @test1(ptr nocapture noundef %array1, i32 noundef signext %a, i32 no
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: mv a6, a2
; RV64-NEXT: .LBB1_2: # %entry
-; RV64-NEXT: sw a6, 0(a5)
; RV64-NEXT: slli a1, a1, 2
; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: sw a6, 0(a5)
; RV64-NEXT: sw a6, 24(a0)
; RV64-NEXT: sw a4, 140(a0)
; RV64-NEXT: ret
@@ -72,9 +72,9 @@ define void @test2(ptr nocapture noundef writeonly %array1, i64 noundef %a, i64
; RV64-NEXT: addi a3, a1, 5
; RV64-NEXT: slli a4, a3, 3
; RV64-NEXT: add a4, a0, a4
-; RV64-NEXT: sd a2, 0(a4)
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: sd a2, 0(a4)
; RV64-NEXT: sd a2, 48(a0)
; RV64-NEXT: sd a3, 280(a0)
; RV64-NEXT: ret
@@ -102,9 +102,9 @@ define void @test3(ptr nocapture noundef %array1, i64 noundef %a, i64 noundef %b
; RV64-NEXT: .LBB3_2: # %entry
; RV64-NEXT: slli a2, a4, 3
; RV64-NEXT: add a2, a0, a2
-; RV64-NEXT: sd a5, 0(a2)
; RV64-NEXT: slli a1, a1, 3
; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: sd a5, 0(a2)
; RV64-NEXT: sd a5, 48(a0)
; RV64-NEXT: sd a4, 280(a0)
; RV64-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index e24b1b41645cdf..35cf65182a6c65 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -754,10 +754,10 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind {
; RV32I-NEXT: add a2, a2, a4
; RV32I-NEXT: srli a2, a2, 24
; RV32I-NEXT: add a1, a2, a1
-; RV32I-NEXT: sw zero, 12(a0)
+; RV32I-NEXT: sw a3, 0(a0)
; RV32I-NEXT: sw zero, 4(a0)
; RV32I-NEXT: sw a1, 8(a0)
-; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw zero, 12(a0)
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_v2i64:
@@ -772,10 +772,10 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind {
; RV32ZBB-NEXT: cpop a1, a1
; RV32ZBB-NEXT: cpop a3, a4
; RV32ZBB-NEXT: add a1, a3, a1
-; RV32ZBB-NEXT: sw zero, 12(a0)
+; RV32ZBB-NEXT: sw a2, 0(a0)
; RV32ZBB-NEXT: sw zero, 4(a0)
; RV32ZBB-NEXT: sw a1, 8(a0)
-; RV32ZBB-NEXT: sw a2, 0(a0)
+; RV32ZBB-NEXT: sw zero, 12(a0)
; RV32ZBB-NEXT: ret
%1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
ret <2 x i64> %1
diff --git a/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll b/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll
index f26e57b5a0b733..0e426ee309abab 100644
--- a/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll
@@ -397,10 +397,10 @@ define <vscale x 1 x i64> @nxv1i64(i64 %x, i64 %y) {
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: add a2, a0, a2
-; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: add a1, a1, a3
; RV32-NEXT: sltu a0, a2, a0
; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
@@ -428,10 +428,10 @@ define <vscale x 2 x i64> @nxv2i64(i64 %x, i64 %y) {
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: add a2, a0, a2
-; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: add a1, a1, a3
; RV32-NEXT: sltu a0, a2, a0
; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
@@ -459,10 +459,10 @@ define <vscale x 4 x i64> @nxv4i64(i64 %x, i64 %y) {
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: add a2, a0, a2
-; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: add a1, a1, a3
; RV32-NEXT: sltu a0, a2, a0
; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
@@ -490,10 +490,10 @@ define <vscale x 8 x i64> @nxv8i64(i64 %x, i64 %y) {
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: add a2, a0, a2
-; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: add a1, a1, a3
; RV32-NEXT: sltu a0, a2, a0
; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
@@ -580,13 +580,13 @@ define <vscale x 1 x i64> @uaddsatnxv1i64(i64 %x, i64 %y) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
-; RV32-NEXT: sw a3, 4(sp)
; RV32-NEXT: sw a2, 0(sp)
+; RV32-NEXT: sw a3, 4(sp)
; RV32-NEXT: mv a0, sp
; RV32-NEXT: vlse64.v v9, (a0), zero
; RV32-NEXT: vsaddu.vv v8, v8, v9
diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll
index 94e945f8032059..a34f06948a762c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll
@@ -706,9 +706,9 @@ define <vscale x 1 x i64> @bitreverse_nxv1i64(<vscale x 1 x i64> %va) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a0, 1044480
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v9, v8, a0
@@ -846,9 +846,9 @@ define <vscale x 2 x i64> @bitreverse_nxv2i64(<vscale x 2 x i64> %va) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a0, 1044480
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vsrl.vx v10, v8, a0
@@ -986,9 +986,9 @@ define <vscale x 4 x i64> @bitreverse_nxv4i64(<vscale x 4 x i64> %va) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a0, 1044480
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vsrl.vx v12, v8, a0
@@ -1130,9 +1130,9 @@ define <vscale x 8 x i64> @bitreverse_nxv8i64(<vscale x 8 x i64> %va) {
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: sub sp, sp, a0
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a0, 1044480
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vsrl.vx v16, v8, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
index 5709de567c18dc..afce04d107e728 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
@@ -1437,9 +1437,9 @@ define <vscale x 1 x i64> @vp_bitreverse_nxv1i64(<vscale x 1 x i64> %va, <vscale
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v9, v8, a1, v0.t
@@ -1578,9 +1578,9 @@ define <vscale x 1 x i64> @vp_bitreverse_nxv1i64_unmasked(<vscale x 1 x i64> %va
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v9, v8, a1
@@ -1721,9 +1721,9 @@ define <vscale x 2 x i64> @vp_bitreverse_nxv2i64(<vscale x 2 x i64> %va, <vscale
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v10, v8, a1, v0.t
@@ -1862,9 +1862,9 @@ define <vscale x 2 x i64> @vp_bitreverse_nxv2i64_unmasked(<vscale x 2 x i64> %va
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v10, v8, a1
@@ -2005,9 +2005,9 @@ define <vscale x 4 x i64> @vp_bitreverse_nxv4i64(<vscale x 4 x i64> %va, <vscale
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v12, v8, a1, v0.t
@@ -2146,9 +2146,9 @@ define <vscale x 4 x i64> @vp_bitreverse_nxv4i64_unmasked(<vscale x 4 x i64> %va
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v12, v8, a1
@@ -2294,9 +2294,9 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1, v0.t
@@ -2497,9 +2497,9 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64_unmasked(<vscale x 7 x i64> %va
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1
@@ -2655,9 +2655,9 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1, v0.t
@@ -2858,9 +2858,9 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64_unmasked(<vscale x 8 x i64> %va
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll
index d3fce3caf8aef9..e8e362b1f042dd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll
@@ -258,9 +258,9 @@ define <vscale x 1 x i64> @bswap_nxv1i64(<vscale x 1 x i64> %va) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a0, 1044480
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v9, v8, a0
@@ -341,9 +341,9 @@ define <vscale x 2 x i64> @bswap_nxv2i64(<vscale x 2 x i64> %va) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a0, 1044480
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vsrl.vx v10, v8, a0
@@ -424,9 +424,9 @@ define <vscale x 4 x i64> @bswap_nxv4i64(<vscale x 4 x i64> %va) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a0, 1044480
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vsrl.vx v12, v8, a0
@@ -511,9 +511,9 @@ define <vscale x 8 x i64> @bswap_nxv8i64(<vscale x 8 x i64> %va) {
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: sub sp, sp, a0
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a0, 1044480
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a0, 56
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vsrl.vx v16, v8, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
index 6917d7e44a8e6a..171de6c2fddf17 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
@@ -513,9 +513,9 @@ define <vscale x 1 x i64> @vp_bswap_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v9, v8, a1, v0.t
@@ -597,9 +597,9 @@ define <vscale x 1 x i64> @vp_bswap_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v9, v8, a1
@@ -683,9 +683,9 @@ define <vscale x 2 x i64> @vp_bswap_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v10, v8, a1, v0.t
@@ -767,9 +767,9 @@ define <vscale x 2 x i64> @vp_bswap_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v10, v8, a1
@@ -853,9 +853,9 @@ define <vscale x 4 x i64> @vp_bswap_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v12, v8, a1, v0.t
@@ -937,9 +937,9 @@ define <vscale x 4 x i64> @vp_bswap_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v12, v8, a1
@@ -1028,9 +1028,9 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1, v0.t
@@ -1174,9 +1174,9 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1
@@ -1274,9 +1274,9 @@ define <vscale x 8 x i64> @vp_bswap_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1, v0.t
@@ -1420,9 +1420,9 @@ define <vscale x 8 x i64> @vp_bswap_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1
@@ -1640,9 +1640,9 @@ define <vscale x 1 x i48> @vp_bswap_nxv1i48(<vscale x 1 x i48> %va, <vscale x 1
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v9, v8, a1, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-store.ll b/llvm/test/CodeGen/RISCV/rvv/combine-store.ll
index 5c4cc00a969580..181ab26c16af78 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-store.ll
@@ -210,10 +210,10 @@ define void @combine_zero_stores_2xi32_unaligned(ptr %p) {
define void @combine_zero_stores_2xi64(ptr %p) {
; RV32-LABEL: combine_zero_stores_2xi64:
; RV32: # %bb.0:
-; RV32-NEXT: sw zero, 4(a0)
; RV32-NEXT: sw zero, 0(a0)
-; RV32-NEXT: sw zero, 12(a0)
+; RV32-NEXT: sw zero, 4(a0)
; RV32-NEXT: sw zero, 8(a0)
+; RV32-NEXT: sw zero, 12(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: combine_zero_stores_2xi64:
@@ -300,10 +300,10 @@ define void @combine_allones_stores_2xi64(ptr %p) {
; RV32-LABEL: combine_allones_stores_2xi64:
; RV32: # %bb.0:
; RV32-NEXT: li a1, -1
-; RV32-NEXT: sw a1, 4(a0)
; RV32-NEXT: sw a1, 0(a0)
-; RV32-NEXT: sw a1, 12(a0)
+; RV32-NEXT: sw a1, 4(a0)
; RV32-NEXT: sw a1, 8(a0)
+; RV32-NEXT: sw a1, 12(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: combine_allones_stores_2xi64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll
index 8f40b02423094a..29489be4dcb569 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll
@@ -462,10 +462,10 @@ define <1 x i64> @v1i64(i64 %x, i64 %y) {
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: add a2, a0, a2
-; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: add a1, a1, a3
; RV32-NEXT: sltu a0, a2, a0
; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
@@ -493,10 +493,10 @@ define <2 x i64> @v2i64(i64 %x, i64 %y) {
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: add a2, a0, a2
-; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: add a1, a1, a3
; RV32-NEXT: sltu a0, a2, a0
; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
@@ -525,10 +525,10 @@ define <4 x i64> @v4i64(i64 %x, i64 %y) {
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: add a2, a0, a2
-; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: add a1, a1, a3
; RV32-NEXT: sltu a0, a2, a0
; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
@@ -557,10 +557,10 @@ define <8 x i64> @v8i64(i64 %x, i64 %y) {
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: add a2, a0, a2
-; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: add a1, a1, a3
; RV32-NEXT: sltu a0, a2, a0
; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
index 90bedf87e04d30..54265193b09f6e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
@@ -835,9 +835,9 @@ define <2 x i64> @vp_bitreverse_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %e
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v9, v8, a1, v0.t
@@ -970,9 +970,9 @@ define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl)
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v9, v8, a1
@@ -1107,9 +1107,9 @@ define <4 x i64> @vp_bitreverse_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %e
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v10, v8, a1, v0.t
@@ -1242,9 +1242,9 @@ define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl)
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v10, v8, a1
@@ -1379,9 +1379,9 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %e
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v12, v8, a1, v0.t
@@ -1514,9 +1514,9 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl)
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v12, v8, a1
@@ -1656,21 +1656,21 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb
-; RV32-NEXT: sw zero, 20(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw zero, 20(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1, v0.t
@@ -1862,21 +1862,21 @@ define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %ev
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb
-; RV32-NEXT: sw zero, 20(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw zero, 20(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1
@@ -2023,21 +2023,21 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb
-; RV32-NEXT: sw zero, 20(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw zero, 20(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1, v0.t
@@ -2229,21 +2229,21 @@ define <16 x i64> @vp_bitreverse_v16i64_unmasked(<16 x i64> %va, i32 zeroext %ev
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb
-; RV32-NEXT: sw zero, 20(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw zero, 20(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
index 7f211d0f8f9bad..37c22d328a4b1a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
@@ -113,9 +113,9 @@ define void @bitreverse_v2i64(ptr %x, ptr %y) {
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsrl.vx v9, v8, a1
; RV32-NEXT: li a2, 40
@@ -364,9 +364,9 @@ define void @bitreverse_v4i64(ptr %x, ptr %y) {
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsrl.vx v10, v8, a1
; RV32-NEXT: li a2, 40
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
index 6f2e86097d6ff9..b8ddf74c30dbdc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
@@ -283,9 +283,9 @@ define <2 x i64> @vp_bswap_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v9, v8, a1, v0.t
@@ -361,9 +361,9 @@ define <2 x i64> @vp_bswap_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v9, v8, a1
@@ -441,9 +441,9 @@ define <4 x i64> @vp_bswap_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v10, v8, a1, v0.t
@@ -519,9 +519,9 @@ define <4 x i64> @vp_bswap_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v10, v8, a1
@@ -599,9 +599,9 @@ define <8 x i64> @vp_bswap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v12, v8, a1, v0.t
@@ -677,9 +677,9 @@ define <8 x i64> @vp_bswap_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v12, v8, a1
@@ -762,9 +762,9 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1, v0.t
@@ -902,9 +902,9 @@ define <15 x i64> @vp_bswap_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1
@@ -996,9 +996,9 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1, v0.t
@@ -1136,9 +1136,9 @@ define <16 x i64> @vp_bswap_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
index d5338f9b6c6fc0..217597f6d57d69 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
@@ -71,9 +71,9 @@ define void @bswap_v2i64(ptr %x, ptr %y) {
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsrl.vx v9, v8, a1
; RV32-NEXT: li a2, 40
@@ -223,9 +223,9 @@ define void @bswap_v4i64(ptr %x, ptr %y) {
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsrl.vx v10, v8, a1
; RV32-NEXT: li a2, 40
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
index 9f48fdb3608a0b..d8e294d32cd080 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
@@ -308,8 +308,7 @@ define fastcc <32 x i32> @pass_vector_arg_direct_stack(<32 x i32> %x, <32 x i32>
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: sd a0, 144(sp)
; CHECK-NEXT: li a0, 13
-; CHECK-NEXT: sd a0, 8(sp)
-; CHECK-NEXT: li a0, 12
+; CHECK-NEXT: li t0, 12
; CHECK-NEXT: li a1, 1
; CHECK-NEXT: li a2, 2
; CHECK-NEXT: li a3, 3
@@ -321,7 +320,8 @@ define fastcc <32 x i32> @pass_vector_arg_direct_stack(<32 x i32> %x, <32 x i32>
; CHECK-NEXT: li t4, 9
; CHECK-NEXT: li t5, 10
; CHECK-NEXT: li t6, 11
-; CHECK-NEXT: sd a0, 0(sp)
+; CHECK-NEXT: sd t0, 0(sp)
+; CHECK-NEXT: sd a0, 8(sp)
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: vmv.v.i v16, 0
; CHECK-NEXT: call vector_arg_direct_stack
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
index f5e6b929051939..c59b45a1d4f833 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
@@ -1511,20 +1511,20 @@ define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
@@ -1653,20 +1653,20 @@ define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: sw a1, 0(sp)
+; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 1
; RV32-NEXT: vor.vv v8, v8, v16
@@ -1774,20 +1774,20 @@ define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
@@ -1916,20 +1916,20 @@ define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: sw a1, 0(sp)
+; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 1
; RV32-NEXT: vor.vv v8, v8, v16
@@ -2045,21 +2045,21 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV32-NEXT: vslidedown.vi v24, v0, 2
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: li a3, 16
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: mv a2, a0
; RV32-NEXT: bltu a0, a3, .LBB34_2
; RV32-NEXT: # %bb.1:
@@ -2472,21 +2472,21 @@ define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: li a2, 16
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: mv a1, a0
; RV32-NEXT: bltu a0, a2, .LBB35_2
; RV32-NEXT: # %bb.1:
@@ -4147,20 +4147,20 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
@@ -4289,20 +4289,20 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex
; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: sw a1, 0(sp)
+; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 1
; RV32-NEXT: vor.vv v8, v8, v16
@@ -4408,20 +4408,20 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
@@ -4550,20 +4550,20 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex
; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: sw a1, 0(sp)
+; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 1
; RV32-NEXT: vor.vv v8, v8, v16
@@ -4677,21 +4677,21 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
; RV32-NEXT: vslidedown.vi v24, v0, 2
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: li a3, 16
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: mv a2, a0
; RV32-NEXT: bltu a0, a3, .LBB70_2
; RV32-NEXT: # %bb.1:
@@ -5104,21 +5104,21 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: li a2, 16
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: mv a1, a0
; RV32-NEXT: bltu a0, a2, .LBB71_2
; RV32-NEXT: # %bb.1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
index e90e52fba642b1..ffc1bfd240804e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
@@ -1128,20 +1128,20 @@ define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: addi a1, sp, 40
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a1), zero
@@ -1252,20 +1252,20 @@ define <15 x i64> @vp_ctpop_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: sw a1, 0(sp)
+; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: addi a1, sp, 24
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a1), zero
@@ -1345,20 +1345,20 @@ define <16 x i64> @vp_ctpop_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: addi a1, sp, 40
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a1), zero
@@ -1469,20 +1469,20 @@ define <16 x i64> @vp_ctpop_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: sw a1, 0(sp)
+; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: addi a1, sp, 24
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a1), zero
@@ -1561,7 +1561,8 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 4
+; RV32-NEXT: li a2, 24
+; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 48
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -1575,21 +1576,21 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
; RV32-NEXT: vslidedown.vi v7, v0, 2
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: li a2, 16
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: mv a1, a0
; RV32-NEXT: bltu a0, a2, .LBB34_2
; RV32-NEXT: # %bb.1:
@@ -1606,8 +1607,7 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
; RV32-NEXT: addi a2, sp, 32
; RV32-NEXT: vlse64.v v16, (a2), zero
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: li a3, 24
-; RV32-NEXT: mul a2, a2, a3
+; RV32-NEXT: slli a2, a2, 4
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
@@ -1618,47 +1618,40 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
; RV32-NEXT: addi a2, a2, 48
; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t
+; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: slli a2, a2, 5
+; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
-; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v24, v24, v8, v0.t
+; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: li a3, 40
-; RV32-NEXT: mul a2, a2, a3
+; RV32-NEXT: slli a2, a2, 5
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
-; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
-; RV32-NEXT: vsub.vv v24, v8, v24, v0.t
-; RV32-NEXT: vand.vv v8, v24, v16, v0.t
+; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: li a3, 40
-; RV32-NEXT: mul a2, a2, a3
+; RV32-NEXT: slli a2, a2, 3
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
-; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
+; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vand.vv v24, v8, v24, v0.t
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: li a3, 40
; RV32-NEXT: mul a2, a2, a3
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
-; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
-; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: slli a2, a2, 3
-; RV32-NEXT: add a2, sp, a2
-; RV32-NEXT: addi a2, a2, 48
-; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vsub.vv v8, v8, v24, v0.t
+; RV32-NEXT: vand.vv v24, v8, v16, v0.t
+; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT: vand.vv v8, v8, v16, v0.t
+; RV32-NEXT: vadd.vv v16, v24, v8, v0.t
; RV32-NEXT: addi a2, sp, 24
; RV32-NEXT: addi a3, sp, 16
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a2), zero
+; RV32-NEXT: vlse64.v v24, (a2), zero
; RV32-NEXT: addi a2, sp, 48
-; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; RV32-NEXT: vlse64.v v8, (a3), zero
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: li a3, 40
@@ -1666,15 +1659,11 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
-; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: slli a2, a2, 3
-; RV32-NEXT: add a2, sp, a2
-; RV32-NEXT: addi a2, a2, 48
-; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT: vmv8r.v v8, v16
; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t
-; RV32-NEXT: vadd.vv v8, v8, v24, v0.t
-; RV32-NEXT: vand.vv v16, v8, v16, v0.t
+; RV32-NEXT: vsrl.vi v16, v16, 4, v0.t
+; RV32-NEXT: vadd.vv v16, v8, v16, v0.t
+; RV32-NEXT: vand.vv v16, v16, v24, v0.t
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: li a2, 40
; RV32-NEXT: mul a1, a1, a2
@@ -1695,44 +1684,28 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
; RV32-NEXT: and a0, a0, a2
; RV32-NEXT: vmv1r.v v0, v7
; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: slli a2, a2, 4
+; RV32-NEXT: li a3, 24
+; RV32-NEXT: mul a2, a2, a3
; RV32-NEXT: add a2, sp, a2
; RV32-NEXT: addi a2, a2, 48
; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
+; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v8, v24, v8, v0.t
+; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vand.vv v8, v8, v24, v0.t
; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a2, 24
-; RV32-NEXT: mul a0, a0, a2
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v16, v8, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 5
+; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vand.vv v16, v8, v24, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: li a2, 24
-; RV32-NEXT: mul a0, a0, a2
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 5
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT: vand.vv v8, v8, v24, v0.t
; RV32-NEXT: vadd.vv v8, v16, v8, v0.t
; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
@@ -1862,21 +1835,21 @@ define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: li a2, 16
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: mv a1, a0
; RV32-NEXT: bltu a0, a2, .LBB35_2
; RV32-NEXT: # %bb.1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
index dfad7881066a27..5b002207729735 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
@@ -1271,20 +1271,20 @@ define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsub.vx v16, v8, a1, v0.t
@@ -1393,20 +1393,20 @@ define <15 x i64> @vp_cttz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: sw a1, 0(sp)
+; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsub.vx v16, v8, a1
@@ -1494,20 +1494,20 @@ define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsub.vx v16, v8, a1, v0.t
@@ -1616,20 +1616,20 @@ define <16 x i64> @vp_cttz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: sw a1, 0(sp)
+; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsub.vx v16, v8, a1
@@ -1725,21 +1725,21 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV32-NEXT: vslidedown.vi v24, v0, 2
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: li a3, 16
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: mv a2, a0
; RV32-NEXT: bltu a0, a3, .LBB34_2
; RV32-NEXT: # %bb.1:
@@ -2112,21 +2112,21 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: li a2, 16
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: mv a1, a0
; RV32-NEXT: bltu a0, a2, .LBB35_2
; RV32-NEXT: # %bb.1:
@@ -3507,20 +3507,20 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsub.vx v16, v8, a1, v0.t
@@ -3629,20 +3629,20 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex
; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: sw a1, 0(sp)
+; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsub.vx v16, v8, a1
@@ -3728,20 +3728,20 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsub.vx v16, v8, a1, v0.t
@@ -3850,20 +3850,20 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex
; RV32-NEXT: .cfi_def_cfa_offset 32
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: sw a1, 0(sp)
+; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: li a1, 1
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsub.vx v16, v8, a1
@@ -3957,21 +3957,21 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
; RV32-NEXT: vslidedown.vi v24, v0, 2
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: li a3, 16
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: mv a2, a0
; RV32-NEXT: bltu a0, a3, .LBB70_2
; RV32-NEXT: # %bb.1:
@@ -4344,21 +4344,21 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
-; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
+; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
-; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
+; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
-; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
+; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: lui a1, 4112
; RV32-NEXT: addi a1, a1, 257
-; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: li a2, 16
; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: mv a1, a0
; RV32-NEXT: bltu a0, a2, .LBB71_2
; RV32-NEXT: # %bb.1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
index 81e20a29881630..04ebc7ca6b2b89 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
@@ -42,10 +42,10 @@ define void @add_v2i64(ptr %x, ptr %y) {
; RV32-NEXT: add t0, a6, t0
; RV32-NEXT: sltu a4, t0, a6
; RV32-NEXT: add a1, a1, a4
-; RV32-NEXT: sw t0, 8(a0)
; RV32-NEXT: sw a2, 0(a0)
-; RV32-NEXT: sw a1, 12(a0)
; RV32-NEXT: sw a3, 4(a0)
+; RV32-NEXT: sw t0, 8(a0)
+; RV32-NEXT: sw a1, 12(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: add_v2i64:
@@ -56,8 +56,8 @@ define void @add_v2i64(ptr %x, ptr %y) {
; RV64-NEXT: ld a1, 8(a1)
; RV64-NEXT: add a2, a2, a4
; RV64-NEXT: add a1, a3, a1
-; RV64-NEXT: sd a1, 8(a0)
; RV64-NEXT: sd a2, 0(a0)
+; RV64-NEXT: sd a1, 8(a0)
; RV64-NEXT: ret
%a = load <2 x i64>, ptr %x
%b = load <2 x i64>, ptr %y
@@ -140,8 +140,8 @@ define void @fadd_v2f64(ptr %x, ptr %y) {
; CHECK-NEXT: fld fa2, 8(a1)
; CHECK-NEXT: fadd.d fa5, fa5, fa3
; CHECK-NEXT: fadd.d fa4, fa4, fa2
-; CHECK-NEXT: fsd fa4, 8(a0)
; CHECK-NEXT: fsd fa5, 0(a0)
+; CHECK-NEXT: fsd fa4, 8(a0)
; CHECK-NEXT: ret
%a = load <2 x double>, ptr %x
%b = load <2 x double>, ptr %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll
index 5bf8b07efc1daf..d34235127e8389 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll
@@ -603,10 +603,10 @@ define <1 x i64> @expandload_v1i64(ptr %base, <1 x i64> %src0, <1 x i1> %mask) {
; RV32-NEXT: # %bb.1: # %cond.load
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: lw a1, 4(a0)
-; RV32-NEXT: lw a0, 0(a0)
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: lw a1, 0(a0)
+; RV32-NEXT: lw a0, 4(a0)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 53059a4f28d42b..96b9b2bac2993c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -502,22 +502,22 @@ define <16 x float> @buildvec_v16f32(float %e0, float %e1, float %e2, float %e3,
; RV32-NEXT: addi s0, sp, 128
; RV32-NEXT: .cfi_def_cfa s0, 0
; RV32-NEXT: andi sp, sp, -64
-; RV32-NEXT: sw a7, 60(sp)
-; RV32-NEXT: sw a6, 56(sp)
-; RV32-NEXT: sw a5, 52(sp)
; RV32-NEXT: sw a4, 48(sp)
-; RV32-NEXT: sw a3, 44(sp)
-; RV32-NEXT: sw a2, 40(sp)
-; RV32-NEXT: sw a1, 36(sp)
+; RV32-NEXT: sw a5, 52(sp)
+; RV32-NEXT: sw a6, 56(sp)
+; RV32-NEXT: sw a7, 60(sp)
; RV32-NEXT: sw a0, 32(sp)
-; RV32-NEXT: fsw fa7, 28(sp)
-; RV32-NEXT: fsw fa6, 24(sp)
-; RV32-NEXT: fsw fa5, 20(sp)
+; RV32-NEXT: sw a1, 36(sp)
+; RV32-NEXT: sw a2, 40(sp)
+; RV32-NEXT: sw a3, 44(sp)
; RV32-NEXT: fsw fa4, 16(sp)
-; RV32-NEXT: fsw fa3, 12(sp)
-; RV32-NEXT: fsw fa2, 8(sp)
-; RV32-NEXT: fsw fa1, 4(sp)
+; RV32-NEXT: fsw fa5, 20(sp)
+; RV32-NEXT: fsw fa6, 24(sp)
+; RV32-NEXT: fsw fa7, 28(sp)
; RV32-NEXT: fsw fa0, 0(sp)
+; RV32-NEXT: fsw fa1, 4(sp)
+; RV32-NEXT: fsw fa2, 8(sp)
+; RV32-NEXT: fsw fa3, 12(sp)
; RV32-NEXT: mv a0, sp
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vle32.v v8, (a0)
@@ -546,22 +546,22 @@ define <16 x float> @buildvec_v16f32(float %e0, float %e1, float %e2, float %e3,
; RV64-NEXT: fmv.w.x ft5, a5
; RV64-NEXT: fmv.w.x ft6, a6
; RV64-NEXT: fmv.w.x ft7, a7
-; RV64-NEXT: fsw fa7, 28(sp)
-; RV64-NEXT: fsw fa6, 24(sp)
-; RV64-NEXT: fsw fa5, 20(sp)
; RV64-NEXT: fsw fa4, 16(sp)
-; RV64-NEXT: fsw fa3, 12(sp)
-; RV64-NEXT: fsw fa2, 8(sp)
-; RV64-NEXT: fsw fa1, 4(sp)
+; RV64-NEXT: fsw fa5, 20(sp)
+; RV64-NEXT: fsw fa6, 24(sp)
+; RV64-NEXT: fsw fa7, 28(sp)
; RV64-NEXT: fsw fa0, 0(sp)
-; RV64-NEXT: fsw ft7, 60(sp)
-; RV64-NEXT: fsw ft6, 56(sp)
-; RV64-NEXT: fsw ft5, 52(sp)
+; RV64-NEXT: fsw fa1, 4(sp)
+; RV64-NEXT: fsw fa2, 8(sp)
+; RV64-NEXT: fsw fa3, 12(sp)
; RV64-NEXT: fsw ft4, 48(sp)
-; RV64-NEXT: fsw ft3, 44(sp)
-; RV64-NEXT: fsw ft2, 40(sp)
-; RV64-NEXT: fsw ft1, 36(sp)
+; RV64-NEXT: fsw ft5, 52(sp)
+; RV64-NEXT: fsw ft6, 56(sp)
+; RV64-NEXT: fsw ft7, 60(sp)
; RV64-NEXT: fsw ft0, 32(sp)
+; RV64-NEXT: fsw ft1, 36(sp)
+; RV64-NEXT: fsw ft2, 40(sp)
+; RV64-NEXT: fsw ft3, 44(sp)
; RV64-NEXT: mv a0, sp
; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-NEXT: vle32.v v8, (a0)
@@ -621,42 +621,42 @@ define <32 x float> @buildvec_v32f32(float %e0, float %e1, float %e2, float %e3,
; RV32-NEXT: flw ft9, 36(s0)
; RV32-NEXT: flw ft10, 40(s0)
; RV32-NEXT: flw ft11, 44(s0)
-; RV32-NEXT: flw fs0, 60(s0)
-; RV32-NEXT: flw fs1, 56(s0)
-; RV32-NEXT: flw fs2, 52(s0)
-; RV32-NEXT: flw fs3, 48(s0)
-; RV32-NEXT: fsw fs0, 124(sp)
-; RV32-NEXT: fsw fs1, 120(sp)
-; RV32-NEXT: fsw fs2, 116(sp)
-; RV32-NEXT: fsw fs3, 112(sp)
-; RV32-NEXT: fsw ft11, 108(sp)
-; RV32-NEXT: fsw ft10, 104(sp)
-; RV32-NEXT: fsw ft9, 100(sp)
+; RV32-NEXT: flw fs0, 48(s0)
+; RV32-NEXT: flw fs1, 52(s0)
+; RV32-NEXT: flw fs2, 56(s0)
+; RV32-NEXT: flw fs3, 60(s0)
+; RV32-NEXT: fsw fs0, 112(sp)
+; RV32-NEXT: fsw fs1, 116(sp)
+; RV32-NEXT: fsw fs2, 120(sp)
+; RV32-NEXT: fsw fs3, 124(sp)
; RV32-NEXT: fsw ft8, 96(sp)
-; RV32-NEXT: fsw ft7, 92(sp)
-; RV32-NEXT: fsw ft6, 88(sp)
-; RV32-NEXT: fsw ft5, 84(sp)
+; RV32-NEXT: fsw ft9, 100(sp)
+; RV32-NEXT: fsw ft10, 104(sp)
+; RV32-NEXT: fsw ft11, 108(sp)
; RV32-NEXT: fsw ft4, 80(sp)
-; RV32-NEXT: fsw ft3, 76(sp)
-; RV32-NEXT: fsw ft2, 72(sp)
-; RV32-NEXT: fsw ft1, 68(sp)
+; RV32-NEXT: fsw ft5, 84(sp)
+; RV32-NEXT: fsw ft6, 88(sp)
+; RV32-NEXT: fsw ft7, 92(sp)
; RV32-NEXT: fsw ft0, 64(sp)
-; RV32-NEXT: sw a7, 60(sp)
-; RV32-NEXT: sw a6, 56(sp)
-; RV32-NEXT: sw a5, 52(sp)
+; RV32-NEXT: fsw ft1, 68(sp)
+; RV32-NEXT: fsw ft2, 72(sp)
+; RV32-NEXT: fsw ft3, 76(sp)
; RV32-NEXT: sw a4, 48(sp)
-; RV32-NEXT: sw a3, 44(sp)
-; RV32-NEXT: sw a2, 40(sp)
-; RV32-NEXT: sw a1, 36(sp)
+; RV32-NEXT: sw a5, 52(sp)
+; RV32-NEXT: sw a6, 56(sp)
+; RV32-NEXT: sw a7, 60(sp)
; RV32-NEXT: sw a0, 32(sp)
-; RV32-NEXT: fsw fa7, 28(sp)
-; RV32-NEXT: fsw fa6, 24(sp)
-; RV32-NEXT: fsw fa5, 20(sp)
+; RV32-NEXT: sw a1, 36(sp)
+; RV32-NEXT: sw a2, 40(sp)
+; RV32-NEXT: sw a3, 44(sp)
; RV32-NEXT: fsw fa4, 16(sp)
-; RV32-NEXT: fsw fa3, 12(sp)
-; RV32-NEXT: fsw fa2, 8(sp)
-; RV32-NEXT: fsw fa1, 4(sp)
+; RV32-NEXT: fsw fa5, 20(sp)
+; RV32-NEXT: fsw fa6, 24(sp)
+; RV32-NEXT: fsw fa7, 28(sp)
; RV32-NEXT: fsw fa0, 0(sp)
+; RV32-NEXT: fsw fa1, 4(sp)
+; RV32-NEXT: fsw fa2, 8(sp)
+; RV32-NEXT: fsw fa3, 12(sp)
; RV32-NEXT: li a0, 32
; RV32-NEXT: mv a1, sp
; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
@@ -726,42 +726,42 @@ define <32 x float> @buildvec_v32f32(float %e0, float %e1, float %e2, float %e3,
; RV64-NEXT: flw fs5, 72(s0)
; RV64-NEXT: flw fs6, 80(s0)
; RV64-NEXT: flw fs7, 88(s0)
-; RV64-NEXT: flw fs8, 120(s0)
-; RV64-NEXT: flw fs9, 112(s0)
-; RV64-NEXT: flw fs10, 104(s0)
-; RV64-NEXT: flw fs11, 96(s0)
-; RV64-NEXT: fsw fs8, 124(sp)
-; RV64-NEXT: fsw fs9, 120(sp)
-; RV64-NEXT: fsw fs10, 116(sp)
-; RV64-NEXT: fsw fs11, 112(sp)
-; RV64-NEXT: fsw fs7, 108(sp)
-; RV64-NEXT: fsw fs6, 104(sp)
-; RV64-NEXT: fsw fs5, 100(sp)
+; RV64-NEXT: flw fs8, 96(s0)
+; RV64-NEXT: flw fs9, 104(s0)
+; RV64-NEXT: flw fs10, 112(s0)
+; RV64-NEXT: flw fs11, 120(s0)
+; RV64-NEXT: fsw fs8, 112(sp)
+; RV64-NEXT: fsw fs9, 116(sp)
+; RV64-NEXT: fsw fs10, 120(sp)
+; RV64-NEXT: fsw fs11, 124(sp)
; RV64-NEXT: fsw fs4, 96(sp)
-; RV64-NEXT: fsw fs3, 92(sp)
-; RV64-NEXT: fsw fs2, 88(sp)
-; RV64-NEXT: fsw fs1, 84(sp)
+; RV64-NEXT: fsw fs5, 100(sp)
+; RV64-NEXT: fsw fs6, 104(sp)
+; RV64-NEXT: fsw fs7, 108(sp)
; RV64-NEXT: fsw fs0, 80(sp)
-; RV64-NEXT: fsw ft11, 76(sp)
-; RV64-NEXT: fsw ft10, 72(sp)
-; RV64-NEXT: fsw ft9, 68(sp)
+; RV64-NEXT: fsw fs1, 84(sp)
+; RV64-NEXT: fsw fs2, 88(sp)
+; RV64-NEXT: fsw fs3, 92(sp)
; RV64-NEXT: fsw ft8, 64(sp)
-; RV64-NEXT: fsw fa7, 28(sp)
-; RV64-NEXT: fsw fa6, 24(sp)
-; RV64-NEXT: fsw fa5, 20(sp)
+; RV64-NEXT: fsw ft9, 68(sp)
+; RV64-NEXT: fsw ft10, 72(sp)
+; RV64-NEXT: fsw ft11, 76(sp)
; RV64-NEXT: fsw fa4, 16(sp)
-; RV64-NEXT: fsw fa3, 12(sp)
-; RV64-NEXT: fsw fa2, 8(sp)
-; RV64-NEXT: fsw fa1, 4(sp)
+; RV64-NEXT: fsw fa5, 20(sp)
+; RV64-NEXT: fsw fa6, 24(sp)
+; RV64-NEXT: fsw fa7, 28(sp)
; RV64-NEXT: fsw fa0, 0(sp)
-; RV64-NEXT: fsw ft7, 60(sp)
-; RV64-NEXT: fsw ft6, 56(sp)
-; RV64-NEXT: fsw ft5, 52(sp)
+; RV64-NEXT: fsw fa1, 4(sp)
+; RV64-NEXT: fsw fa2, 8(sp)
+; RV64-NEXT: fsw fa3, 12(sp)
; RV64-NEXT: fsw ft4, 48(sp)
-; RV64-NEXT: fsw ft3, 44(sp)
-; RV64-NEXT: fsw ft2, 40(sp)
-; RV64-NEXT: fsw ft1, 36(sp)
+; RV64-NEXT: fsw ft5, 52(sp)
+; RV64-NEXT: fsw ft6, 56(sp)
+; RV64-NEXT: fsw ft7, 60(sp)
; RV64-NEXT: fsw ft0, 32(sp)
+; RV64-NEXT: fsw ft1, 36(sp)
+; RV64-NEXT: fsw ft2, 40(sp)
+; RV64-NEXT: fsw ft3, 44(sp)
; RV64-NEXT: li a0, 32
; RV64-NEXT: mv a1, sp
; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
@@ -830,14 +830,14 @@ define <8 x double> @buildvec_v8f64(double %e0, double %e1, double %e2, double %
; RV32-NEXT: addi s0, sp, 128
; RV32-NEXT: .cfi_def_cfa s0, 0
; RV32-NEXT: andi sp, sp, -64
-; RV32-NEXT: fsd fa7, 56(sp)
-; RV32-NEXT: fsd fa6, 48(sp)
-; RV32-NEXT: fsd fa5, 40(sp)
; RV32-NEXT: fsd fa4, 32(sp)
-; RV32-NEXT: fsd fa3, 24(sp)
-; RV32-NEXT: fsd fa2, 16(sp)
-; RV32-NEXT: fsd fa1, 8(sp)
+; RV32-NEXT: fsd fa5, 40(sp)
+; RV32-NEXT: fsd fa6, 48(sp)
+; RV32-NEXT: fsd fa7, 56(sp)
; RV32-NEXT: fsd fa0, 0(sp)
+; RV32-NEXT: fsd fa1, 8(sp)
+; RV32-NEXT: fsd fa2, 16(sp)
+; RV32-NEXT: fsd fa3, 24(sp)
; RV32-NEXT: mv a0, sp
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
@@ -858,14 +858,14 @@ define <8 x double> @buildvec_v8f64(double %e0, double %e1, double %e2, double %
; RV64-NEXT: addi s0, sp, 128
; RV64-NEXT: .cfi_def_cfa s0, 0
; RV64-NEXT: andi sp, sp, -64
-; RV64-NEXT: fsd fa7, 56(sp)
-; RV64-NEXT: fsd fa6, 48(sp)
-; RV64-NEXT: fsd fa5, 40(sp)
; RV64-NEXT: fsd fa4, 32(sp)
-; RV64-NEXT: fsd fa3, 24(sp)
-; RV64-NEXT: fsd fa2, 16(sp)
-; RV64-NEXT: fsd fa1, 8(sp)
+; RV64-NEXT: fsd fa5, 40(sp)
+; RV64-NEXT: fsd fa6, 48(sp)
+; RV64-NEXT: fsd fa7, 56(sp)
; RV64-NEXT: fsd fa0, 0(sp)
+; RV64-NEXT: fsd fa1, 8(sp)
+; RV64-NEXT: fsd fa2, 16(sp)
+; RV64-NEXT: fsd fa3, 24(sp)
; RV64-NEXT: mv a0, sp
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
@@ -909,26 +909,26 @@ define <16 x double> @buildvec_v16f64(double %e0, double %e1, double %e2, double
; RV32-NEXT: sw a6, 120(sp)
; RV32-NEXT: sw a7, 124(sp)
; RV32-NEXT: fld ft3, 120(sp)
-; RV32-NEXT: fld ft4, 24(s0)
-; RV32-NEXT: fld ft5, 16(s0)
-; RV32-NEXT: fld ft6, 8(s0)
-; RV32-NEXT: fld ft7, 0(s0)
-; RV32-NEXT: fsd ft4, 248(sp)
-; RV32-NEXT: fsd ft5, 240(sp)
-; RV32-NEXT: fsd ft6, 232(sp)
-; RV32-NEXT: fsd ft7, 224(sp)
-; RV32-NEXT: fsd fa7, 184(sp)
-; RV32-NEXT: fsd fa6, 176(sp)
-; RV32-NEXT: fsd fa5, 168(sp)
+; RV32-NEXT: fld ft4, 0(s0)
+; RV32-NEXT: fld ft5, 8(s0)
+; RV32-NEXT: fld ft6, 16(s0)
+; RV32-NEXT: fld ft7, 24(s0)
+; RV32-NEXT: fsd ft4, 224(sp)
+; RV32-NEXT: fsd ft5, 232(sp)
+; RV32-NEXT: fsd ft6, 240(sp)
+; RV32-NEXT: fsd ft7, 248(sp)
; RV32-NEXT: fsd fa4, 160(sp)
-; RV32-NEXT: fsd fa3, 152(sp)
-; RV32-NEXT: fsd fa2, 144(sp)
-; RV32-NEXT: fsd fa1, 136(sp)
+; RV32-NEXT: fsd fa5, 168(sp)
+; RV32-NEXT: fsd fa6, 176(sp)
+; RV32-NEXT: fsd fa7, 184(sp)
; RV32-NEXT: fsd fa0, 128(sp)
-; RV32-NEXT: fsd ft3, 216(sp)
-; RV32-NEXT: fsd ft2, 208(sp)
-; RV32-NEXT: fsd ft1, 200(sp)
+; RV32-NEXT: fsd fa1, 136(sp)
+; RV32-NEXT: fsd fa2, 144(sp)
+; RV32-NEXT: fsd fa3, 152(sp)
; RV32-NEXT: fsd ft0, 192(sp)
+; RV32-NEXT: fsd ft1, 200(sp)
+; RV32-NEXT: fsd ft2, 208(sp)
+; RV32-NEXT: fsd ft3, 216(sp)
; RV32-NEXT: addi a0, sp, 128
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
@@ -949,22 +949,22 @@ define <16 x double> @buildvec_v16f64(double %e0, double %e1, double %e2, double
; RV64-NEXT: addi s0, sp, 256
; RV64-NEXT: .cfi_def_cfa s0, 0
; RV64-NEXT: andi sp, sp, -128
-; RV64-NEXT: sd a7, 120(sp)
-; RV64-NEXT: sd a6, 112(sp)
-; RV64-NEXT: sd a5, 104(sp)
; RV64-NEXT: sd a4, 96(sp)
-; RV64-NEXT: sd a3, 88(sp)
-; RV64-NEXT: sd a2, 80(sp)
-; RV64-NEXT: sd a1, 72(sp)
+; RV64-NEXT: sd a5, 104(sp)
+; RV64-NEXT: sd a6, 112(sp)
+; RV64-NEXT: sd a7, 120(sp)
; RV64-NEXT: sd a0, 64(sp)
-; RV64-NEXT: fsd fa7, 56(sp)
-; RV64-NEXT: fsd fa6, 48(sp)
-; RV64-NEXT: fsd fa5, 40(sp)
+; RV64-NEXT: sd a1, 72(sp)
+; RV64-NEXT: sd a2, 80(sp)
+; RV64-NEXT: sd a3, 88(sp)
; RV64-NEXT: fsd fa4, 32(sp)
-; RV64-NEXT: fsd fa3, 24(sp)
-; RV64-NEXT: fsd fa2, 16(sp)
-; RV64-NEXT: fsd fa1, 8(sp)
+; RV64-NEXT: fsd fa5, 40(sp)
+; RV64-NEXT: fsd fa6, 48(sp)
+; RV64-NEXT: fsd fa7, 56(sp)
; RV64-NEXT: fsd fa0, 0(sp)
+; RV64-NEXT: fsd fa1, 8(sp)
+; RV64-NEXT: fsd fa2, 16(sp)
+; RV64-NEXT: fsd fa3, 24(sp)
; RV64-NEXT: mv a0, sp
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
@@ -1056,42 +1056,42 @@ define <32 x double> @buildvec_v32f64(double %e0, double %e1, double %e2, double
; RV32-NEXT: fld fs5, 104(s0)
; RV32-NEXT: fld fs6, 112(s0)
; RV32-NEXT: fld fs7, 120(s0)
-; RV32-NEXT: fld fs8, 152(s0)
-; RV32-NEXT: fld fs9, 144(s0)
-; RV32-NEXT: fld fs10, 136(s0)
-; RV32-NEXT: fld fs11, 128(s0)
-; RV32-NEXT: fsd fs8, 248(sp)
-; RV32-NEXT: fsd fs9, 240(sp)
-; RV32-NEXT: fsd fs10, 232(sp)
-; RV32-NEXT: fsd fs11, 224(sp)
-; RV32-NEXT: fsd fs7, 216(sp)
-; RV32-NEXT: fsd fs6, 208(sp)
-; RV32-NEXT: fsd fs5, 200(sp)
+; RV32-NEXT: fld fs8, 128(s0)
+; RV32-NEXT: fld fs9, 136(s0)
+; RV32-NEXT: fld fs10, 144(s0)
+; RV32-NEXT: fld fs11, 152(s0)
+; RV32-NEXT: fsd fs8, 224(sp)
+; RV32-NEXT: fsd fs9, 232(sp)
+; RV32-NEXT: fsd fs10, 240(sp)
+; RV32-NEXT: fsd fs11, 248(sp)
; RV32-NEXT: fsd fs4, 192(sp)
-; RV32-NEXT: fsd fs3, 184(sp)
-; RV32-NEXT: fsd fs2, 176(sp)
-; RV32-NEXT: fsd fs1, 168(sp)
+; RV32-NEXT: fsd fs5, 200(sp)
+; RV32-NEXT: fsd fs6, 208(sp)
+; RV32-NEXT: fsd fs7, 216(sp)
; RV32-NEXT: fsd fs0, 160(sp)
-; RV32-NEXT: fsd ft11, 152(sp)
-; RV32-NEXT: fsd ft10, 144(sp)
-; RV32-NEXT: fsd ft9, 136(sp)
+; RV32-NEXT: fsd fs1, 168(sp)
+; RV32-NEXT: fsd fs2, 176(sp)
+; RV32-NEXT: fsd fs3, 184(sp)
; RV32-NEXT: fsd ft8, 128(sp)
-; RV32-NEXT: fsd ft7, 376(sp)
-; RV32-NEXT: fsd ft6, 368(sp)
-; RV32-NEXT: fsd ft5, 360(sp)
+; RV32-NEXT: fsd ft9, 136(sp)
+; RV32-NEXT: fsd ft10, 144(sp)
+; RV32-NEXT: fsd ft11, 152(sp)
; RV32-NEXT: fsd ft4, 352(sp)
-; RV32-NEXT: fsd fa7, 312(sp)
-; RV32-NEXT: fsd fa6, 304(sp)
-; RV32-NEXT: fsd fa5, 296(sp)
+; RV32-NEXT: fsd ft5, 360(sp)
+; RV32-NEXT: fsd ft6, 368(sp)
+; RV32-NEXT: fsd ft7, 376(sp)
; RV32-NEXT: fsd fa4, 288(sp)
-; RV32-NEXT: fsd fa3, 280(sp)
-; RV32-NEXT: fsd fa2, 272(sp)
-; RV32-NEXT: fsd fa1, 264(sp)
+; RV32-NEXT: fsd fa5, 296(sp)
+; RV32-NEXT: fsd fa6, 304(sp)
+; RV32-NEXT: fsd fa7, 312(sp)
; RV32-NEXT: fsd fa0, 256(sp)
-; RV32-NEXT: fsd ft3, 344(sp)
-; RV32-NEXT: fsd ft2, 336(sp)
-; RV32-NEXT: fsd ft1, 328(sp)
+; RV32-NEXT: fsd fa1, 264(sp)
+; RV32-NEXT: fsd fa2, 272(sp)
+; RV32-NEXT: fsd fa3, 280(sp)
; RV32-NEXT: fsd ft0, 320(sp)
+; RV32-NEXT: fsd ft1, 328(sp)
+; RV32-NEXT: fsd ft2, 336(sp)
+; RV32-NEXT: fsd ft3, 344(sp)
; RV32-NEXT: addi a0, sp, 128
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vle64.v v16, (a0)
@@ -1150,38 +1150,38 @@ define <32 x double> @buildvec_v32f64(double %e0, double %e1, double %e2, double
; RV64-NEXT: fld fs1, 104(s0)
; RV64-NEXT: fld fs2, 112(s0)
; RV64-NEXT: fld fs3, 120(s0)
-; RV64-NEXT: sd a7, 248(sp)
-; RV64-NEXT: sd a6, 240(sp)
-; RV64-NEXT: sd a5, 232(sp)
; RV64-NEXT: sd a4, 224(sp)
-; RV64-NEXT: sd a3, 216(sp)
-; RV64-NEXT: sd a2, 208(sp)
-; RV64-NEXT: sd a1, 200(sp)
+; RV64-NEXT: sd a5, 232(sp)
+; RV64-NEXT: sd a6, 240(sp)
+; RV64-NEXT: sd a7, 248(sp)
; RV64-NEXT: sd a0, 192(sp)
-; RV64-NEXT: fsd fa7, 184(sp)
-; RV64-NEXT: fsd fa6, 176(sp)
-; RV64-NEXT: fsd fa5, 168(sp)
+; RV64-NEXT: sd a1, 200(sp)
+; RV64-NEXT: sd a2, 208(sp)
+; RV64-NEXT: sd a3, 216(sp)
; RV64-NEXT: fsd fa4, 160(sp)
-; RV64-NEXT: fsd fa3, 152(sp)
-; RV64-NEXT: fsd fa2, 144(sp)
-; RV64-NEXT: fsd fa1, 136(sp)
+; RV64-NEXT: fsd fa5, 168(sp)
+; RV64-NEXT: fsd fa6, 176(sp)
+; RV64-NEXT: fsd fa7, 184(sp)
; RV64-NEXT: fsd fa0, 128(sp)
-; RV64-NEXT: fsd fs3, 120(sp)
-; RV64-NEXT: fsd fs2, 112(sp)
-; RV64-NEXT: fsd fs1, 104(sp)
+; RV64-NEXT: fsd fa1, 136(sp)
+; RV64-NEXT: fsd fa2, 144(sp)
+; RV64-NEXT: fsd fa3, 152(sp)
; RV64-NEXT: fsd fs0, 96(sp)
-; RV64-NEXT: fsd ft11, 88(sp)
-; RV64-NEXT: fsd ft10, 80(sp)
-; RV64-NEXT: fsd ft9, 72(sp)
+; RV64-NEXT: fsd fs1, 104(sp)
+; RV64-NEXT: fsd fs2, 112(sp)
+; RV64-NEXT: fsd fs3, 120(sp)
; RV64-NEXT: fsd ft8, 64(sp)
-; RV64-NEXT: fsd ft7, 56(sp)
-; RV64-NEXT: fsd ft6, 48(sp)
-; RV64-NEXT: fsd ft5, 40(sp)
+; RV64-NEXT: fsd ft9, 72(sp)
+; RV64-NEXT: fsd ft10, 80(sp)
+; RV64-NEXT: fsd ft11, 88(sp)
; RV64-NEXT: fsd ft4, 32(sp)
-; RV64-NEXT: fsd ft3, 24(sp)
-; RV64-NEXT: fsd ft2, 16(sp)
-; RV64-NEXT: fsd ft1, 8(sp)
+; RV64-NEXT: fsd ft5, 40(sp)
+; RV64-NEXT: fsd ft6, 48(sp)
+; RV64-NEXT: fsd ft7, 56(sp)
; RV64-NEXT: fsd ft0, 0(sp)
+; RV64-NEXT: fsd ft1, 8(sp)
+; RV64-NEXT: fsd ft2, 16(sp)
+; RV64-NEXT: fsd ft3, 24(sp)
; RV64-NEXT: addi a0, sp, 128
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
index bfcc7017178e31..3b72bcbda614b9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
@@ -126,9 +126,6 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8
; ZVFH32-NEXT: vslidedown.vi v8, v9, 2
; ZVFH32-NEXT: vmv.x.s a1, v8
-; ZVFH32-NEXT: slli a2, a1, 17
-; ZVFH32-NEXT: srli a2, a2, 19
-; ZVFH32-NEXT: sh a2, 4(a0)
; ZVFH32-NEXT: vmv.x.s a2, v9
; ZVFH32-NEXT: lui a3, 8
; ZVFH32-NEXT: addi a3, a3, -1
@@ -136,11 +133,14 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
; ZVFH32-NEXT: vslidedown.vi v8, v9, 1
; ZVFH32-NEXT: vmv.x.s a4, v8
; ZVFH32-NEXT: and a3, a4, a3
+; ZVFH32-NEXT: slli a4, a1, 17
+; ZVFH32-NEXT: srli a4, a4, 19
; ZVFH32-NEXT: slli a3, a3, 15
; ZVFH32-NEXT: slli a1, a1, 30
; ZVFH32-NEXT: or a1, a2, a1
; ZVFH32-NEXT: or a1, a1, a3
; ZVFH32-NEXT: sw a1, 0(a0)
+; ZVFH32-NEXT: sh a4, 4(a0)
; ZVFH32-NEXT: ret
;
; ZVFH64-LABEL: fp2si_v3f32_v3i15:
@@ -160,10 +160,10 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
; ZVFH64-NEXT: slli a3, a3, 30
; ZVFH64-NEXT: or a1, a1, a3
; ZVFH64-NEXT: or a1, a1, a2
+; ZVFH64-NEXT: slli a2, a1, 19
+; ZVFH64-NEXT: srli a2, a2, 51
; ZVFH64-NEXT: sw a1, 0(a0)
-; ZVFH64-NEXT: slli a1, a1, 19
-; ZVFH64-NEXT: srli a1, a1, 51
-; ZVFH64-NEXT: sh a1, 4(a0)
+; ZVFH64-NEXT: sh a2, 4(a0)
; ZVFH64-NEXT: ret
;
; ZVFHMIN32-LABEL: fp2si_v3f32_v3i15:
@@ -172,9 +172,6 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8
; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2
; ZVFHMIN32-NEXT: vmv.x.s a1, v8
-; ZVFHMIN32-NEXT: slli a2, a1, 17
-; ZVFHMIN32-NEXT: srli a2, a2, 19
-; ZVFHMIN32-NEXT: sh a2, 4(a0)
; ZVFHMIN32-NEXT: vmv.x.s a2, v9
; ZVFHMIN32-NEXT: lui a3, 8
; ZVFHMIN32-NEXT: addi a3, a3, -1
@@ -182,11 +179,14 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 1
; ZVFHMIN32-NEXT: vmv.x.s a4, v8
; ZVFHMIN32-NEXT: and a3, a4, a3
+; ZVFHMIN32-NEXT: slli a4, a1, 17
+; ZVFHMIN32-NEXT: srli a4, a4, 19
; ZVFHMIN32-NEXT: slli a3, a3, 15
; ZVFHMIN32-NEXT: slli a1, a1, 30
; ZVFHMIN32-NEXT: or a1, a2, a1
; ZVFHMIN32-NEXT: or a1, a1, a3
; ZVFHMIN32-NEXT: sw a1, 0(a0)
+; ZVFHMIN32-NEXT: sh a4, 4(a0)
; ZVFHMIN32-NEXT: ret
;
; ZVFHMIN64-LABEL: fp2si_v3f32_v3i15:
@@ -206,10 +206,10 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
; ZVFHMIN64-NEXT: slli a3, a3, 30
; ZVFHMIN64-NEXT: or a1, a1, a3
; ZVFHMIN64-NEXT: or a1, a1, a2
+; ZVFHMIN64-NEXT: slli a2, a1, 19
+; ZVFHMIN64-NEXT: srli a2, a2, 51
; ZVFHMIN64-NEXT: sw a1, 0(a0)
-; ZVFHMIN64-NEXT: slli a1, a1, 19
-; ZVFHMIN64-NEXT: srli a1, a1, 51
-; ZVFHMIN64-NEXT: sh a1, 4(a0)
+; ZVFHMIN64-NEXT: sh a2, 4(a0)
; ZVFHMIN64-NEXT: ret
%z = fptosi <3 x float> %x to <3 x i15>
ret <3 x i15> %z
@@ -223,9 +223,6 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8
; ZVFH32-NEXT: vslidedown.vi v8, v9, 2
; ZVFH32-NEXT: vmv.x.s a1, v8
-; ZVFH32-NEXT: slli a2, a1, 17
-; ZVFH32-NEXT: srli a2, a2, 19
-; ZVFH32-NEXT: sh a2, 4(a0)
; ZVFH32-NEXT: vmv.x.s a2, v9
; ZVFH32-NEXT: lui a3, 16
; ZVFH32-NEXT: addi a3, a3, -1
@@ -233,11 +230,14 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
; ZVFH32-NEXT: vslidedown.vi v8, v9, 1
; ZVFH32-NEXT: vmv.x.s a4, v8
; ZVFH32-NEXT: and a3, a4, a3
+; ZVFH32-NEXT: slli a4, a1, 17
+; ZVFH32-NEXT: srli a4, a4, 19
; ZVFH32-NEXT: slli a3, a3, 15
; ZVFH32-NEXT: slli a1, a1, 30
; ZVFH32-NEXT: or a1, a2, a1
; ZVFH32-NEXT: or a1, a1, a3
; ZVFH32-NEXT: sw a1, 0(a0)
+; ZVFH32-NEXT: sh a4, 4(a0)
; ZVFH32-NEXT: ret
;
; ZVFH64-LABEL: fp2ui_v3f32_v3i15:
@@ -257,10 +257,10 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
; ZVFH64-NEXT: slli a3, a3, 30
; ZVFH64-NEXT: or a1, a1, a3
; ZVFH64-NEXT: or a1, a1, a2
+; ZVFH64-NEXT: slli a2, a1, 19
+; ZVFH64-NEXT: srli a2, a2, 51
; ZVFH64-NEXT: sw a1, 0(a0)
-; ZVFH64-NEXT: slli a1, a1, 19
-; ZVFH64-NEXT: srli a1, a1, 51
-; ZVFH64-NEXT: sh a1, 4(a0)
+; ZVFH64-NEXT: sh a2, 4(a0)
; ZVFH64-NEXT: ret
;
; ZVFHMIN32-LABEL: fp2ui_v3f32_v3i15:
@@ -269,9 +269,6 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8
; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2
; ZVFHMIN32-NEXT: vmv.x.s a1, v8
-; ZVFHMIN32-NEXT: slli a2, a1, 17
-; ZVFHMIN32-NEXT: srli a2, a2, 19
-; ZVFHMIN32-NEXT: sh a2, 4(a0)
; ZVFHMIN32-NEXT: vmv.x.s a2, v9
; ZVFHMIN32-NEXT: lui a3, 16
; ZVFHMIN32-NEXT: addi a3, a3, -1
@@ -279,11 +276,14 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 1
; ZVFHMIN32-NEXT: vmv.x.s a4, v8
; ZVFHMIN32-NEXT: and a3, a4, a3
+; ZVFHMIN32-NEXT: slli a4, a1, 17
+; ZVFHMIN32-NEXT: srli a4, a4, 19
; ZVFHMIN32-NEXT: slli a3, a3, 15
; ZVFHMIN32-NEXT: slli a1, a1, 30
; ZVFHMIN32-NEXT: or a1, a2, a1
; ZVFHMIN32-NEXT: or a1, a1, a3
; ZVFHMIN32-NEXT: sw a1, 0(a0)
+; ZVFHMIN32-NEXT: sh a4, 4(a0)
; ZVFHMIN32-NEXT: ret
;
; ZVFHMIN64-LABEL: fp2ui_v3f32_v3i15:
@@ -303,10 +303,10 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
; ZVFHMIN64-NEXT: slli a3, a3, 30
; ZVFHMIN64-NEXT: or a1, a1, a3
; ZVFHMIN64-NEXT: or a1, a1, a2
+; ZVFHMIN64-NEXT: slli a2, a1, 19
+; ZVFHMIN64-NEXT: srli a2, a2, 51
; ZVFHMIN64-NEXT: sw a1, 0(a0)
-; ZVFHMIN64-NEXT: slli a1, a1, 19
-; ZVFHMIN64-NEXT: srli a1, a1, 51
-; ZVFHMIN64-NEXT: sh a1, 4(a0)
+; ZVFHMIN64-NEXT: sh a2, 4(a0)
; ZVFHMIN64-NEXT: ret
%z = fptoui <3 x float> %x to <3 x i15>
ret <3 x i15> %z
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index 81fb86cd81cd35..7853e91ca53a39 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -195,8 +195,8 @@ define <4 x i64> @insertelt_v4i64(<4 x i64> %a, i64 %y) {
define void @insertelt_v4i64_store(ptr %x, i64 %y) {
; RV32-LABEL: insertelt_v4i64_store:
; RV32: # %bb.0:
-; RV32-NEXT: sw a2, 28(a0)
; RV32-NEXT: sw a1, 24(a0)
+; RV32-NEXT: sw a2, 28(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: insertelt_v4i64_store:
@@ -253,8 +253,8 @@ define <3 x i64> @insertelt_v3i64(<3 x i64> %a, i64 %y) {
define void @insertelt_v3i64_store(ptr %x, i64 %y) {
; RV32-LABEL: insertelt_v3i64_store:
; RV32: # %bb.0:
-; RV32-NEXT: sw a2, 20(a0)
; RV32-NEXT: sw a1, 16(a0)
+; RV32-NEXT: sw a2, 20(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: insertelt_v3i64_store:
@@ -382,8 +382,8 @@ define void @insertelt_v8i64_0_store(ptr %x) {
; RV32-LABEL: insertelt_v8i64_0_store:
; RV32: # %bb.0:
; RV32-NEXT: li a1, -1
-; RV32-NEXT: sw a1, 4(a0)
; RV32-NEXT: sw a1, 0(a0)
+; RV32-NEXT: sw a1, 4(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: insertelt_v8i64_0_store:
@@ -428,8 +428,8 @@ define void @insertelt_v8i64_store(ptr %x, i32 %idx) {
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a0, a0, a1
; RV32-NEXT: li a1, -1
-; RV32-NEXT: sw a1, 4(a0)
; RV32-NEXT: sw a1, 0(a0)
+; RV32-NEXT: sw a1, 4(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: insertelt_v8i64_store:
@@ -460,9 +460,9 @@ define <8 x i64> @insertelt_c6_v8i64_0(<8 x i64> %a, ptr %x) {
define void @insertelt_c6_v8i64_0_store(ptr %x) {
; RV32-LABEL: insertelt_c6_v8i64_0_store:
; RV32: # %bb.0:
-; RV32-NEXT: sw zero, 4(a0)
; RV32-NEXT: li a1, 6
; RV32-NEXT: sw a1, 0(a0)
+; RV32-NEXT: sw zero, 4(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: insertelt_c6_v8i64_0_store:
@@ -506,9 +506,9 @@ define void @insertelt_c6_v8i64_store(ptr %x, i32 %idx) {
; RV32-NEXT: andi a1, a1, 7
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: sw zero, 4(a0)
; RV32-NEXT: li a1, 6
; RV32-NEXT: sw a1, 0(a0)
+; RV32-NEXT: sw zero, 4(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: insertelt_c6_v8i64_store:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index 43cee6610e7872..7bf47d42de3b95 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -310,12 +310,12 @@ define <4 x i64> @buildvec_vid_step1_add0_v4i64() {
; RV64ZVE32-LABEL: buildvec_vid_step1_add0_v4i64:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: li a1, 3
-; RV64ZVE32-NEXT: sd a1, 24(a0)
-; RV64ZVE32-NEXT: li a1, 2
-; RV64ZVE32-NEXT: sd a1, 16(a0)
-; RV64ZVE32-NEXT: li a1, 1
-; RV64ZVE32-NEXT: sd a1, 8(a0)
+; RV64ZVE32-NEXT: li a2, 2
+; RV64ZVE32-NEXT: li a3, 1
; RV64ZVE32-NEXT: sd zero, 0(a0)
+; RV64ZVE32-NEXT: sd a3, 8(a0)
+; RV64ZVE32-NEXT: sd a2, 16(a0)
+; RV64ZVE32-NEXT: sd a1, 24(a0)
; RV64ZVE32-NEXT: ret
ret <4 x i64> <i64 0, i64 1, i64 2, i64 3>
}
@@ -340,12 +340,12 @@ define <4 x i64> @buildvec_vid_step2_add0_v4i64() {
; RV64ZVE32-LABEL: buildvec_vid_step2_add0_v4i64:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: li a1, 6
-; RV64ZVE32-NEXT: sd a1, 24(a0)
-; RV64ZVE32-NEXT: li a1, 4
-; RV64ZVE32-NEXT: sd a1, 16(a0)
-; RV64ZVE32-NEXT: li a1, 2
-; RV64ZVE32-NEXT: sd a1, 8(a0)
+; RV64ZVE32-NEXT: li a2, 4
+; RV64ZVE32-NEXT: li a3, 2
; RV64ZVE32-NEXT: sd zero, 0(a0)
+; RV64ZVE32-NEXT: sd a3, 8(a0)
+; RV64ZVE32-NEXT: sd a2, 16(a0)
+; RV64ZVE32-NEXT: sd a1, 24(a0)
; RV64ZVE32-NEXT: ret
ret <4 x i64> <i64 0, i64 2, i64 4, i64 6>
}
@@ -517,8 +517,8 @@ define void @buildvec_dominant0_v2i32(ptr %x) {
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI38_0)
; RV64ZVE32-NEXT: ld a1, %lo(.LCPI38_0)(a1)
; RV64ZVE32-NEXT: li a2, -1
-; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: sd a1, 0(a0)
+; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: ret
store <2 x i64> <i64 2049638230412172402, i64 -1>, ptr %x
ret void
@@ -548,8 +548,8 @@ define void @buildvec_dominant1_optsize_v2i32(ptr %x) optsize {
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI39_0)
; RV64ZVE32-NEXT: ld a1, %lo(.LCPI39_0)(a1)
; RV64ZVE32-NEXT: li a2, -1
-; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: sd a1, 0(a0)
+; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: ret
store <2 x i64> <i64 2049638230412172402, i64 -1>, ptr %x
ret void
@@ -1130,10 +1130,10 @@ define <4 x i64> @v4xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2)
;
; RV64ZVE32-LABEL: v4xi64_exact:
; RV64ZVE32: # %bb.0:
-; RV64ZVE32-NEXT: sd a4, 24(a0)
-; RV64ZVE32-NEXT: sd a3, 16(a0)
-; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: sd a1, 0(a0)
+; RV64ZVE32-NEXT: sd a2, 8(a0)
+; RV64ZVE32-NEXT: sd a3, 16(a0)
+; RV64ZVE32-NEXT: sd a4, 24(a0)
; RV64ZVE32-NEXT: ret
%v1 = insertelement <4 x i64> poison, i64 %a, i32 0
%v2 = insertelement <4 x i64> %v1, i64 %b, i32 1
@@ -1194,14 +1194,14 @@ define <8 x i64> @v8xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i
; RV64ZVE32-LABEL: v8xi64_exact:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: ld t0, 0(sp)
-; RV64ZVE32-NEXT: sd t0, 56(a0)
-; RV64ZVE32-NEXT: sd a7, 48(a0)
-; RV64ZVE32-NEXT: sd a6, 40(a0)
; RV64ZVE32-NEXT: sd a5, 32(a0)
-; RV64ZVE32-NEXT: sd a4, 24(a0)
-; RV64ZVE32-NEXT: sd a3, 16(a0)
-; RV64ZVE32-NEXT: sd a2, 8(a0)
+; RV64ZVE32-NEXT: sd a6, 40(a0)
+; RV64ZVE32-NEXT: sd a7, 48(a0)
+; RV64ZVE32-NEXT: sd t0, 56(a0)
; RV64ZVE32-NEXT: sd a1, 0(a0)
+; RV64ZVE32-NEXT: sd a2, 8(a0)
+; RV64ZVE32-NEXT: sd a3, 16(a0)
+; RV64ZVE32-NEXT: sd a4, 24(a0)
; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i64> poison, i64 %a, i32 0
%v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
@@ -1243,14 +1243,14 @@ define <8 x i64> @v8xi64_exact_equal_halves(i64 %a, i64 %b, i64 %c, i64 %d) vsca
;
; RV64ZVE32-LABEL: v8xi64_exact_equal_halves:
; RV64ZVE32: # %bb.0:
-; RV64ZVE32-NEXT: sd a4, 56(a0)
-; RV64ZVE32-NEXT: sd a3, 48(a0)
-; RV64ZVE32-NEXT: sd a2, 40(a0)
; RV64ZVE32-NEXT: sd a1, 32(a0)
-; RV64ZVE32-NEXT: sd a4, 24(a0)
-; RV64ZVE32-NEXT: sd a3, 16(a0)
-; RV64ZVE32-NEXT: sd a2, 8(a0)
+; RV64ZVE32-NEXT: sd a2, 40(a0)
+; RV64ZVE32-NEXT: sd a3, 48(a0)
+; RV64ZVE32-NEXT: sd a4, 56(a0)
; RV64ZVE32-NEXT: sd a1, 0(a0)
+; RV64ZVE32-NEXT: sd a2, 8(a0)
+; RV64ZVE32-NEXT: sd a3, 16(a0)
+; RV64ZVE32-NEXT: sd a4, 24(a0)
; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i64> poison, i64 %a, i32 0
%v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
@@ -1288,10 +1288,10 @@ define <8 x i64> @v8xi64_exact_undef_suffix(i64 %a, i64 %b, i64 %c, i64 %d) vsca
;
; RV64ZVE32-LABEL: v8xi64_exact_undef_suffix:
; RV64ZVE32: # %bb.0:
-; RV64ZVE32-NEXT: sd a4, 24(a0)
-; RV64ZVE32-NEXT: sd a3, 16(a0)
-; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: sd a1, 0(a0)
+; RV64ZVE32-NEXT: sd a2, 8(a0)
+; RV64ZVE32-NEXT: sd a3, 16(a0)
+; RV64ZVE32-NEXT: sd a4, 24(a0)
; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i64> poison, i64 %a, i32 0
%v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
@@ -1325,10 +1325,10 @@ define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vsca
;
; RV64ZVE32-LABEL: v8xi64_exact_undef_prefix:
; RV64ZVE32: # %bb.0:
-; RV64ZVE32-NEXT: sd a4, 56(a0)
-; RV64ZVE32-NEXT: sd a3, 48(a0)
-; RV64ZVE32-NEXT: sd a2, 40(a0)
; RV64ZVE32-NEXT: sd a1, 32(a0)
+; RV64ZVE32-NEXT: sd a2, 40(a0)
+; RV64ZVE32-NEXT: sd a3, 48(a0)
+; RV64ZVE32-NEXT: sd a4, 56(a0)
; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i64> poison, i64 %a, i32 4
%v2 = insertelement <8 x i64> %v1, i64 %b, i32 5
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
index 336a64b1b89ca8..f6354d3e2f619a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
@@ -46,8 +46,8 @@ define void @splat_v2i64(ptr %x, i64 %y) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a2, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a2, 12(sp)
; RV32-NEXT: addi a1, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v8, (a1), zero
@@ -112,8 +112,8 @@ define void @splat_v4i64(ptr %x, i64 %y) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a2, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a2, 12(sp)
; RV32-NEXT: addi a1, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v8, (a1), zero
@@ -405,8 +405,8 @@ define void @vadd_vx_v16i64(ptr %a, i64 %b, ptr %c) {
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
-; RV32-NEXT: sw a2, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a2, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vlse64.v v16, (a0), zero
; RV32-NEXT: vadd.vv v8, v8, v16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 70bda8c2da0f27..f9992a5de793d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -5527,9 +5527,9 @@ define void @mulhu_vx_v2i64(ptr %x) {
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: lui a1, 699051
; RV32-NEXT: addi a2, a1, -1366
-; RV32-NEXT: sw a2, 12(sp)
; RV32-NEXT: addi a1, a1, -1365
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a2, 12(sp)
; RV32-NEXT: addi a1, sp, 8
; RV32-NEXT: vlse64.v v9, (a1), zero
; RV32-NEXT: vmulhu.vv v8, v8, v9
@@ -5632,9 +5632,9 @@ define void @mulhs_vx_v2i64(ptr %x) {
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a2, a1, 1365
-; RV32-NEXT: sw a2, 12(sp)
; RV32-NEXT: addi a1, a1, 1366
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a2, 12(sp)
; RV32-NEXT: addi a1, sp, 8
; RV32-NEXT: vlse64.v v9, (a1), zero
; RV32-NEXT: vmulh.vv v8, v8, v9
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
index 805b371f1e3d56..7f1493544eabcf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll
@@ -14,8 +14,8 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: sw a0, 0(sp)
+; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: mv a0, sp
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
@@ -332,64 +332,64 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 68(sp)
; RV32-NEXT: sw a0, 64(sp)
+; RV32-NEXT: sw a1, 68(sp)
; RV32-NEXT: addi a0, sp, 192
; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 7
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 124(sp)
; RV32-NEXT: sw a0, 120(sp)
+; RV32-NEXT: sw a1, 124(sp)
; RV32-NEXT: addi a0, sp, 192
; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 6
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 116(sp)
; RV32-NEXT: sw a0, 112(sp)
+; RV32-NEXT: sw a1, 116(sp)
; RV32-NEXT: addi a0, sp, 192
; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 5
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 108(sp)
; RV32-NEXT: sw a0, 104(sp)
+; RV32-NEXT: sw a1, 108(sp)
; RV32-NEXT: addi a0, sp, 192
; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 4
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 100(sp)
; RV32-NEXT: sw a0, 96(sp)
+; RV32-NEXT: sw a1, 100(sp)
; RV32-NEXT: addi a0, sp, 192
; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 3
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 92(sp)
; RV32-NEXT: sw a0, 88(sp)
+; RV32-NEXT: sw a1, 92(sp)
; RV32-NEXT: addi a0, sp, 192
; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 2
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 84(sp)
; RV32-NEXT: sw a0, 80(sp)
+; RV32-NEXT: sw a1, 84(sp)
; RV32-NEXT: addi a0, sp, 192
; RV32-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 1
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 76(sp)
; RV32-NEXT: sw a0, 72(sp)
+; RV32-NEXT: sw a1, 76(sp)
; RV32-NEXT: addi a0, sp, 64
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vle32.v v8, (a0)
@@ -413,36 +413,36 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64-NEXT: vfmv.f.s fa5, v8
; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 0(sp)
; RV64-NEXT: vslidedown.vi v10, v8, 7
; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 56(sp)
+; RV64-NEXT: fcvt.l.s a1, fa5
; RV64-NEXT: vslidedown.vi v10, v8, 6
; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 48(sp)
+; RV64-NEXT: fcvt.l.s a2, fa5
; RV64-NEXT: vslidedown.vi v10, v8, 5
; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 40(sp)
+; RV64-NEXT: fcvt.l.s a3, fa5
; RV64-NEXT: vslidedown.vi v10, v8, 4
; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 32(sp)
+; RV64-NEXT: fcvt.l.s a4, fa5
+; RV64-NEXT: sd a4, 32(sp)
+; RV64-NEXT: sd a3, 40(sp)
+; RV64-NEXT: sd a2, 48(sp)
+; RV64-NEXT: sd a1, 56(sp)
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v9, v8, 3
; RV64-NEXT: vfmv.f.s fa5, v9
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 24(sp)
+; RV64-NEXT: fcvt.l.s a1, fa5
; RV64-NEXT: vslidedown.vi v9, v8, 2
; RV64-NEXT: vfmv.f.s fa5, v9
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 16(sp)
+; RV64-NEXT: fcvt.l.s a2, fa5
; RV64-NEXT: vslidedown.vi v8, v8, 1
; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 8(sp)
+; RV64-NEXT: fcvt.l.s a3, fa5
+; RV64-NEXT: sd a0, 0(sp)
+; RV64-NEXT: sd a3, 8(sp)
+; RV64-NEXT: sd a2, 16(sp)
+; RV64-NEXT: sd a1, 24(sp)
; RV64-NEXT: mv a0, sp
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
@@ -478,99 +478,99 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: flw fa0, 124(sp)
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 252(sp)
; RV32-NEXT: sw a0, 248(sp)
+; RV32-NEXT: sw a1, 252(sp)
; RV32-NEXT: flw fa0, 120(sp)
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 244(sp)
; RV32-NEXT: sw a0, 240(sp)
+; RV32-NEXT: sw a1, 244(sp)
; RV32-NEXT: flw fa0, 116(sp)
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 236(sp)
; RV32-NEXT: sw a0, 232(sp)
+; RV32-NEXT: sw a1, 236(sp)
; RV32-NEXT: flw fa0, 112(sp)
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 228(sp)
; RV32-NEXT: sw a0, 224(sp)
+; RV32-NEXT: sw a1, 228(sp)
; RV32-NEXT: flw fa0, 108(sp)
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 220(sp)
; RV32-NEXT: sw a0, 216(sp)
+; RV32-NEXT: sw a1, 220(sp)
; RV32-NEXT: flw fa0, 104(sp)
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 212(sp)
; RV32-NEXT: sw a0, 208(sp)
+; RV32-NEXT: sw a1, 212(sp)
; RV32-NEXT: flw fa0, 100(sp)
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 204(sp)
; RV32-NEXT: sw a0, 200(sp)
+; RV32-NEXT: sw a1, 204(sp)
; RV32-NEXT: flw fa0, 96(sp)
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 196(sp)
; RV32-NEXT: sw a0, 192(sp)
+; RV32-NEXT: sw a1, 196(sp)
; RV32-NEXT: addi a0, sp, 384
; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 132(sp)
; RV32-NEXT: sw a0, 128(sp)
+; RV32-NEXT: sw a1, 132(sp)
; RV32-NEXT: addi a0, sp, 384
; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 3
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 156(sp)
; RV32-NEXT: sw a0, 152(sp)
+; RV32-NEXT: sw a1, 156(sp)
; RV32-NEXT: addi a0, sp, 384
; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 2
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 148(sp)
; RV32-NEXT: sw a0, 144(sp)
+; RV32-NEXT: sw a1, 148(sp)
; RV32-NEXT: addi a0, sp, 384
; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 1
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 140(sp)
; RV32-NEXT: sw a0, 136(sp)
+; RV32-NEXT: sw a1, 140(sp)
; RV32-NEXT: addi a0, sp, 384
; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 7
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 188(sp)
; RV32-NEXT: sw a0, 184(sp)
+; RV32-NEXT: sw a1, 188(sp)
; RV32-NEXT: addi a0, sp, 384
; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 6
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 180(sp)
; RV32-NEXT: sw a0, 176(sp)
+; RV32-NEXT: sw a1, 180(sp)
; RV32-NEXT: addi a0, sp, 384
; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 5
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 172(sp)
; RV32-NEXT: sw a0, 168(sp)
+; RV32-NEXT: sw a1, 172(sp)
; RV32-NEXT: addi a0, sp, 384
; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 4
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrintf
-; RV32-NEXT: sw a1, 164(sp)
; RV32-NEXT: sw a0, 160(sp)
+; RV32-NEXT: sw a1, 164(sp)
; RV32-NEXT: li a0, 32
; RV32-NEXT: addi a1, sp, 128
; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
@@ -621,37 +621,37 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
; RV64-NEXT: sd a0, 192(sp)
; RV64-NEXT: vfmv.f.s fa5, v8
; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 128(sp)
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v10, v8, 3
; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 152(sp)
+; RV64-NEXT: fcvt.l.s a1, fa5
; RV64-NEXT: vslidedown.vi v10, v8, 2
; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 144(sp)
+; RV64-NEXT: fcvt.l.s a2, fa5
; RV64-NEXT: vslidedown.vi v10, v8, 1
; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 136(sp)
+; RV64-NEXT: fcvt.l.s a3, fa5
+; RV64-NEXT: sd a0, 128(sp)
+; RV64-NEXT: sd a3, 136(sp)
+; RV64-NEXT: sd a2, 144(sp)
+; RV64-NEXT: sd a1, 152(sp)
; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64-NEXT: vslidedown.vi v10, v8, 7
; RV64-NEXT: vfmv.f.s fa5, v10
; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 184(sp)
; RV64-NEXT: vslidedown.vi v10, v8, 6
; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 176(sp)
+; RV64-NEXT: fcvt.l.s a1, fa5
; RV64-NEXT: vslidedown.vi v10, v8, 5
; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 168(sp)
+; RV64-NEXT: fcvt.l.s a2, fa5
; RV64-NEXT: vslidedown.vi v8, v8, 4
; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: fcvt.l.s a0, fa5
-; RV64-NEXT: sd a0, 160(sp)
+; RV64-NEXT: fcvt.l.s a3, fa5
+; RV64-NEXT: sd a3, 160(sp)
+; RV64-NEXT: sd a2, 168(sp)
+; RV64-NEXT: sd a1, 176(sp)
+; RV64-NEXT: sd a0, 184(sp)
; RV64-NEXT: addi a0, sp, 128
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
@@ -675,8 +675,8 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrint
-; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: sw a0, 0(sp)
+; RV32-NEXT: sw a1, 4(sp)
; RV32-NEXT: mv a0, sp
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
@@ -879,51 +879,51 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
; RV32-NEXT: vse64.v v8, (a0)
; RV32-NEXT: fld fa0, 120(sp)
; RV32-NEXT: call llrint
-; RV32-NEXT: sw a1, 188(sp)
; RV32-NEXT: sw a0, 184(sp)
+; RV32-NEXT: sw a1, 188(sp)
; RV32-NEXT: fld fa0, 112(sp)
; RV32-NEXT: call llrint
-; RV32-NEXT: sw a1, 180(sp)
; RV32-NEXT: sw a0, 176(sp)
+; RV32-NEXT: sw a1, 180(sp)
; RV32-NEXT: fld fa0, 104(sp)
; RV32-NEXT: call llrint
-; RV32-NEXT: sw a1, 172(sp)
; RV32-NEXT: sw a0, 168(sp)
+; RV32-NEXT: sw a1, 172(sp)
; RV32-NEXT: fld fa0, 96(sp)
; RV32-NEXT: call llrint
-; RV32-NEXT: sw a1, 164(sp)
; RV32-NEXT: sw a0, 160(sp)
+; RV32-NEXT: sw a1, 164(sp)
; RV32-NEXT: addi a0, sp, 256
; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrint
-; RV32-NEXT: sw a1, 132(sp)
; RV32-NEXT: sw a0, 128(sp)
+; RV32-NEXT: sw a1, 132(sp)
; RV32-NEXT: addi a0, sp, 256
; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 1
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrint
-; RV32-NEXT: sw a1, 140(sp)
; RV32-NEXT: sw a0, 136(sp)
+; RV32-NEXT: sw a1, 140(sp)
; RV32-NEXT: addi a0, sp, 256
; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 3
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrint
-; RV32-NEXT: sw a1, 156(sp)
; RV32-NEXT: sw a0, 152(sp)
+; RV32-NEXT: sw a1, 156(sp)
; RV32-NEXT: addi a0, sp, 256
; RV32-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 2
; RV32-NEXT: vfmv.f.s fa0, v8
; RV32-NEXT: call llrint
-; RV32-NEXT: sw a1, 148(sp)
; RV32-NEXT: sw a0, 144(sp)
+; RV32-NEXT: sw a1, 148(sp)
; RV32-NEXT: addi a0, sp, 128
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vle32.v v8, (a0)
@@ -961,21 +961,21 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
; RV64-NEXT: sd a0, 96(sp)
; RV64-NEXT: vfmv.f.s fa5, v8
; RV64-NEXT: fcvt.l.d a0, fa5
-; RV64-NEXT: sd a0, 64(sp)
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vslidedown.vi v10, v8, 1
; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: fcvt.l.d a0, fa5
-; RV64-NEXT: sd a0, 72(sp)
+; RV64-NEXT: fcvt.l.d a1, fa5
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-NEXT: vslidedown.vi v10, v8, 3
; RV64-NEXT: vfmv.f.s fa5, v10
-; RV64-NEXT: fcvt.l.d a0, fa5
-; RV64-NEXT: sd a0, 88(sp)
+; RV64-NEXT: fcvt.l.d a2, fa5
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: vfmv.f.s fa5, v8
-; RV64-NEXT: fcvt.l.d a0, fa5
-; RV64-NEXT: sd a0, 80(sp)
+; RV64-NEXT: fcvt.l.d a3, fa5
+; RV64-NEXT: sd a0, 64(sp)
+; RV64-NEXT: sd a1, 72(sp)
+; RV64-NEXT: sd a3, 80(sp)
+; RV64-NEXT: sd a2, 88(sp)
; RV64-NEXT: addi a0, sp, 64
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
index 43184a28ba3238..0b1c5ea56cec8e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
@@ -315,36 +315,36 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
; RV64-i64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64-i64-NEXT: vfmv.f.s fa5, v8
; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 0(sp)
; RV64-i64-NEXT: vslidedown.vi v10, v8, 7
; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 56(sp)
+; RV64-i64-NEXT: fcvt.l.s a1, fa5
; RV64-i64-NEXT: vslidedown.vi v10, v8, 6
; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 48(sp)
+; RV64-i64-NEXT: fcvt.l.s a2, fa5
; RV64-i64-NEXT: vslidedown.vi v10, v8, 5
; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 40(sp)
+; RV64-i64-NEXT: fcvt.l.s a3, fa5
; RV64-i64-NEXT: vslidedown.vi v10, v8, 4
; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 32(sp)
+; RV64-i64-NEXT: fcvt.l.s a4, fa5
+; RV64-i64-NEXT: sd a4, 32(sp)
+; RV64-i64-NEXT: sd a3, 40(sp)
+; RV64-i64-NEXT: sd a2, 48(sp)
+; RV64-i64-NEXT: sd a1, 56(sp)
; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-i64-NEXT: vslidedown.vi v9, v8, 3
; RV64-i64-NEXT: vfmv.f.s fa5, v9
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 24(sp)
+; RV64-i64-NEXT: fcvt.l.s a1, fa5
; RV64-i64-NEXT: vslidedown.vi v9, v8, 2
; RV64-i64-NEXT: vfmv.f.s fa5, v9
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 16(sp)
+; RV64-i64-NEXT: fcvt.l.s a2, fa5
; RV64-i64-NEXT: vslidedown.vi v8, v8, 1
; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 8(sp)
+; RV64-i64-NEXT: fcvt.l.s a3, fa5
+; RV64-i64-NEXT: sd a0, 0(sp)
+; RV64-i64-NEXT: sd a3, 8(sp)
+; RV64-i64-NEXT: sd a2, 16(sp)
+; RV64-i64-NEXT: sd a1, 24(sp)
; RV64-i64-NEXT: mv a0, sp
; RV64-i64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-i64-NEXT: vle64.v v8, (a0)
@@ -399,37 +399,37 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
; RV32-NEXT: sw a0, 96(sp)
; RV32-NEXT: vfmv.f.s fa5, v8
; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: sw a0, 64(sp)
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vi v10, v8, 3
; RV32-NEXT: vfmv.f.s fa5, v10
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: sw a0, 76(sp)
+; RV32-NEXT: fcvt.w.s a1, fa5
; RV32-NEXT: vslidedown.vi v10, v8, 2
; RV32-NEXT: vfmv.f.s fa5, v10
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: sw a0, 72(sp)
+; RV32-NEXT: fcvt.w.s a2, fa5
; RV32-NEXT: vslidedown.vi v10, v8, 1
; RV32-NEXT: vfmv.f.s fa5, v10
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: sw a0, 68(sp)
+; RV32-NEXT: fcvt.w.s a3, fa5
+; RV32-NEXT: sw a0, 64(sp)
+; RV32-NEXT: sw a3, 68(sp)
+; RV32-NEXT: sw a2, 72(sp)
+; RV32-NEXT: sw a1, 76(sp)
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v10, v8, 7
; RV32-NEXT: vfmv.f.s fa5, v10
; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: sw a0, 92(sp)
; RV32-NEXT: vslidedown.vi v10, v8, 6
; RV32-NEXT: vfmv.f.s fa5, v10
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: sw a0, 88(sp)
+; RV32-NEXT: fcvt.w.s a1, fa5
; RV32-NEXT: vslidedown.vi v10, v8, 5
; RV32-NEXT: vfmv.f.s fa5, v10
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: sw a0, 84(sp)
+; RV32-NEXT: fcvt.w.s a2, fa5
; RV32-NEXT: vslidedown.vi v8, v8, 4
; RV32-NEXT: vfmv.f.s fa5, v8
-; RV32-NEXT: fcvt.w.s a0, fa5
-; RV32-NEXT: sw a0, 80(sp)
+; RV32-NEXT: fcvt.w.s a3, fa5
+; RV32-NEXT: sw a3, 80(sp)
+; RV32-NEXT: sw a2, 84(sp)
+; RV32-NEXT: sw a1, 88(sp)
+; RV32-NEXT: sw a0, 92(sp)
; RV32-NEXT: addi a0, sp, 64
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vle32.v v8, (a0)
@@ -479,37 +479,37 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
; RV64-i32-NEXT: sw a0, 96(sp)
; RV64-i32-NEXT: vfmv.f.s fa5, v8
; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: sw a0, 64(sp)
; RV64-i32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-i32-NEXT: vslidedown.vi v10, v8, 3
; RV64-i32-NEXT: vfmv.f.s fa5, v10
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: sw a0, 76(sp)
+; RV64-i32-NEXT: fcvt.l.s a1, fa5
; RV64-i32-NEXT: vslidedown.vi v10, v8, 2
; RV64-i32-NEXT: vfmv.f.s fa5, v10
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: sw a0, 72(sp)
+; RV64-i32-NEXT: fcvt.l.s a2, fa5
; RV64-i32-NEXT: vslidedown.vi v10, v8, 1
; RV64-i32-NEXT: vfmv.f.s fa5, v10
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: sw a0, 68(sp)
+; RV64-i32-NEXT: fcvt.l.s a3, fa5
+; RV64-i32-NEXT: sw a0, 64(sp)
+; RV64-i32-NEXT: sw a3, 68(sp)
+; RV64-i32-NEXT: sw a2, 72(sp)
+; RV64-i32-NEXT: sw a1, 76(sp)
; RV64-i32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64-i32-NEXT: vslidedown.vi v10, v8, 7
; RV64-i32-NEXT: vfmv.f.s fa5, v10
; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: sw a0, 92(sp)
; RV64-i32-NEXT: vslidedown.vi v10, v8, 6
; RV64-i32-NEXT: vfmv.f.s fa5, v10
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: sw a0, 88(sp)
+; RV64-i32-NEXT: fcvt.l.s a1, fa5
; RV64-i32-NEXT: vslidedown.vi v10, v8, 5
; RV64-i32-NEXT: vfmv.f.s fa5, v10
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: sw a0, 84(sp)
+; RV64-i32-NEXT: fcvt.l.s a2, fa5
; RV64-i32-NEXT: vslidedown.vi v8, v8, 4
; RV64-i32-NEXT: vfmv.f.s fa5, v8
-; RV64-i32-NEXT: fcvt.l.s a0, fa5
-; RV64-i32-NEXT: sw a0, 80(sp)
+; RV64-i32-NEXT: fcvt.l.s a3, fa5
+; RV64-i32-NEXT: sw a3, 80(sp)
+; RV64-i32-NEXT: sw a2, 84(sp)
+; RV64-i32-NEXT: sw a1, 88(sp)
+; RV64-i32-NEXT: sw a0, 92(sp)
; RV64-i32-NEXT: addi a0, sp, 64
; RV64-i32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64-i32-NEXT: vle32.v v8, (a0)
@@ -559,37 +559,37 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
; RV64-i64-NEXT: sd a0, 192(sp)
; RV64-i64-NEXT: vfmv.f.s fa5, v8
; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 128(sp)
; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-i64-NEXT: vslidedown.vi v10, v8, 3
; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 152(sp)
+; RV64-i64-NEXT: fcvt.l.s a1, fa5
; RV64-i64-NEXT: vslidedown.vi v10, v8, 2
; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 144(sp)
+; RV64-i64-NEXT: fcvt.l.s a2, fa5
; RV64-i64-NEXT: vslidedown.vi v10, v8, 1
; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 136(sp)
+; RV64-i64-NEXT: fcvt.l.s a3, fa5
+; RV64-i64-NEXT: sd a0, 128(sp)
+; RV64-i64-NEXT: sd a3, 136(sp)
+; RV64-i64-NEXT: sd a2, 144(sp)
+; RV64-i64-NEXT: sd a1, 152(sp)
; RV64-i64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64-i64-NEXT: vslidedown.vi v10, v8, 7
; RV64-i64-NEXT: vfmv.f.s fa5, v10
; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 184(sp)
; RV64-i64-NEXT: vslidedown.vi v10, v8, 6
; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 176(sp)
+; RV64-i64-NEXT: fcvt.l.s a1, fa5
; RV64-i64-NEXT: vslidedown.vi v10, v8, 5
; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 168(sp)
+; RV64-i64-NEXT: fcvt.l.s a2, fa5
; RV64-i64-NEXT: vslidedown.vi v8, v8, 4
; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: fcvt.l.s a0, fa5
-; RV64-i64-NEXT: sd a0, 160(sp)
+; RV64-i64-NEXT: fcvt.l.s a3, fa5
+; RV64-i64-NEXT: sd a3, 160(sp)
+; RV64-i64-NEXT: sd a2, 168(sp)
+; RV64-i64-NEXT: sd a1, 176(sp)
+; RV64-i64-NEXT: sd a0, 184(sp)
; RV64-i64-NEXT: addi a0, sp, 128
; RV64-i64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-i64-NEXT: vle64.v v8, (a0)
@@ -880,21 +880,21 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
; RV64-i64-NEXT: sd a0, 96(sp)
; RV64-i64-NEXT: vfmv.f.s fa5, v8
; RV64-i64-NEXT: fcvt.l.d a0, fa5
-; RV64-i64-NEXT: sd a0, 64(sp)
; RV64-i64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-i64-NEXT: vslidedown.vi v10, v8, 1
; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: fcvt.l.d a0, fa5
-; RV64-i64-NEXT: sd a0, 72(sp)
+; RV64-i64-NEXT: fcvt.l.d a1, fa5
; RV64-i64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-i64-NEXT: vslidedown.vi v10, v8, 3
; RV64-i64-NEXT: vfmv.f.s fa5, v10
-; RV64-i64-NEXT: fcvt.l.d a0, fa5
-; RV64-i64-NEXT: sd a0, 88(sp)
+; RV64-i64-NEXT: fcvt.l.d a2, fa5
; RV64-i64-NEXT: vslidedown.vi v8, v8, 2
; RV64-i64-NEXT: vfmv.f.s fa5, v8
-; RV64-i64-NEXT: fcvt.l.d a0, fa5
-; RV64-i64-NEXT: sd a0, 80(sp)
+; RV64-i64-NEXT: fcvt.l.d a3, fa5
+; RV64-i64-NEXT: sd a0, 64(sp)
+; RV64-i64-NEXT: sd a1, 72(sp)
+; RV64-i64-NEXT: sd a3, 80(sp)
+; RV64-i64-NEXT: sd a2, 88(sp)
; RV64-i64-NEXT: addi a0, sp, 64
; RV64-i64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64-i64-NEXT: vle64.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 9cd38056364494..9c6ec6aef60347 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -341,8 +341,8 @@ define <2 x i64> @mgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x
; RV32ZVE32F-NEXT: vmv.x.s a3, v9
; RV32ZVE32F-NEXT: srai a4, a3, 31
; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a1, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 4(a0)
+; RV32ZVE32F-NEXT: sw a1, 8(a0)
; RV32ZVE32F-NEXT: sw a2, 12(a0)
; RV32ZVE32F-NEXT: ret
;
@@ -402,10 +402,10 @@ define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x
; RV32ZVE32F-NEXT: andi a1, a1, 255
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: andi a2, a2, 255
-; RV32ZVE32F-NEXT: sw zero, 12(a0)
-; RV32ZVE32F-NEXT: sw zero, 4(a0)
; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: sw zero, 4(a0)
; RV32ZVE32F-NEXT: sw a1, 8(a0)
+; RV32ZVE32F-NEXT: sw zero, 12(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i64:
@@ -1034,8 +1034,8 @@ define <2 x i64> @mgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2
; RV32ZVE32F-NEXT: vmv.x.s a3, v9
; RV32ZVE32F-NEXT: srai a4, a3, 31
; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a1, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 4(a0)
+; RV32ZVE32F-NEXT: sw a1, 8(a0)
; RV32ZVE32F-NEXT: sw a2, 12(a0)
; RV32ZVE32F-NEXT: ret
;
@@ -1097,10 +1097,10 @@ define <2 x i64> @mgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2
; RV32ZVE32F-NEXT: and a1, a1, a2
; RV32ZVE32F-NEXT: vmv.x.s a3, v9
; RV32ZVE32F-NEXT: and a2, a3, a2
-; RV32ZVE32F-NEXT: sw zero, 12(a0)
-; RV32ZVE32F-NEXT: sw zero, 4(a0)
; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: sw zero, 4(a0)
; RV32ZVE32F-NEXT: sw a1, 8(a0)
+; RV32ZVE32F-NEXT: sw zero, 12(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i16_zextload_v2i64:
@@ -2154,8 +2154,8 @@ define <2 x i64> @mgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2
; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV32ZVE32F-NEXT: sw zero, 12(a0)
; RV32ZVE32F-NEXT: sw zero, 4(a0)
+; RV32ZVE32F-NEXT: sw zero, 12(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vse32.v v9, (a0)
; RV32ZVE32F-NEXT: addi a0, a0, 8
@@ -3775,36 +3775,36 @@ define <4 x i64> @mgather_truemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) {
; RV32ZVE32F-NEXT: lw a3, 4(a3)
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a5, v9
-; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
-; RV32ZVE32F-NEXT: vmv.x.s a6, v8
-; RV32ZVE32F-NEXT: lw a7, 0(a6)
-; RV32ZVE32F-NEXT: lw a6, 4(a6)
-; RV32ZVE32F-NEXT: lw t0, 0(a5)
+; RV32ZVE32F-NEXT: lw a6, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
-; RV32ZVE32F-NEXT: sw a1, 4(a0)
-; RV32ZVE32F-NEXT: sw a2, 0(a0)
-; RV32ZVE32F-NEXT: sw a6, 28(a0)
-; RV32ZVE32F-NEXT: sw a7, 24(a0)
+; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
+; RV32ZVE32F-NEXT: vmv.x.s a7, v8
+; RV32ZVE32F-NEXT: lw t0, 0(a7)
+; RV32ZVE32F-NEXT: lw a7, 4(a7)
+; RV32ZVE32F-NEXT: sw a6, 16(a0)
; RV32ZVE32F-NEXT: sw a5, 20(a0)
-; RV32ZVE32F-NEXT: sw t0, 16(a0)
-; RV32ZVE32F-NEXT: sw a3, 12(a0)
+; RV32ZVE32F-NEXT: sw t0, 24(a0)
+; RV32ZVE32F-NEXT: sw a7, 28(a0)
+; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
+; RV32ZVE32F-NEXT: sw a3, 12(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_truemask_v4i64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a2, 24(a1)
-; RV64ZVE32F-NEXT: ld a3, 16(a1)
-; RV64ZVE32F-NEXT: ld a4, 8(a1)
-; RV64ZVE32F-NEXT: ld a1, 0(a1)
+; RV64ZVE32F-NEXT: ld a2, 0(a1)
+; RV64ZVE32F-NEXT: ld a3, 8(a1)
+; RV64ZVE32F-NEXT: ld a4, 16(a1)
+; RV64ZVE32F-NEXT: ld a1, 24(a1)
; RV64ZVE32F-NEXT: ld a2, 0(a2)
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: ld a1, 0(a1)
-; RV64ZVE32F-NEXT: sd a2, 24(a0)
-; RV64ZVE32F-NEXT: sd a3, 16(a0)
-; RV64ZVE32F-NEXT: sd a4, 8(a0)
-; RV64ZVE32F-NEXT: sd a1, 0(a0)
+; RV64ZVE32F-NEXT: sd a2, 0(a0)
+; RV64ZVE32F-NEXT: sd a3, 8(a0)
+; RV64ZVE32F-NEXT: sd a4, 16(a0)
+; RV64ZVE32F-NEXT: sd a1, 24(a0)
; RV64ZVE32F-NEXT: ret
%v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1), <4 x i64> %passthru)
ret <4 x i64> %v
@@ -3823,34 +3823,34 @@ define <4 x i64> @mgather_falsemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru)
;
; RV32ZVE32F-LABEL: mgather_falsemask_v4i64:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: lw a2, 16(a1)
-; RV32ZVE32F-NEXT: lw a3, 20(a1)
-; RV32ZVE32F-NEXT: lw a4, 24(a1)
-; RV32ZVE32F-NEXT: lw a5, 28(a1)
-; RV32ZVE32F-NEXT: lw a6, 0(a1)
-; RV32ZVE32F-NEXT: lw a7, 4(a1)
-; RV32ZVE32F-NEXT: lw t0, 8(a1)
-; RV32ZVE32F-NEXT: lw a1, 12(a1)
-; RV32ZVE32F-NEXT: sw a5, 28(a0)
-; RV32ZVE32F-NEXT: sw a4, 24(a0)
-; RV32ZVE32F-NEXT: sw a3, 20(a0)
-; RV32ZVE32F-NEXT: sw a2, 16(a0)
-; RV32ZVE32F-NEXT: sw a1, 12(a0)
-; RV32ZVE32F-NEXT: sw t0, 8(a0)
-; RV32ZVE32F-NEXT: sw a7, 4(a0)
-; RV32ZVE32F-NEXT: sw a6, 0(a0)
+; RV32ZVE32F-NEXT: lw a2, 0(a1)
+; RV32ZVE32F-NEXT: lw a3, 4(a1)
+; RV32ZVE32F-NEXT: lw a4, 8(a1)
+; RV32ZVE32F-NEXT: lw a5, 12(a1)
+; RV32ZVE32F-NEXT: lw a6, 16(a1)
+; RV32ZVE32F-NEXT: lw a7, 20(a1)
+; RV32ZVE32F-NEXT: lw t0, 24(a1)
+; RV32ZVE32F-NEXT: lw a1, 28(a1)
+; RV32ZVE32F-NEXT: sw a6, 16(a0)
+; RV32ZVE32F-NEXT: sw a7, 20(a0)
+; RV32ZVE32F-NEXT: sw t0, 24(a0)
+; RV32ZVE32F-NEXT: sw a1, 28(a0)
+; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 8(a0)
+; RV32ZVE32F-NEXT: sw a5, 12(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_falsemask_v4i64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a1, 24(a2)
-; RV64ZVE32F-NEXT: ld a3, 16(a2)
-; RV64ZVE32F-NEXT: ld a4, 8(a2)
-; RV64ZVE32F-NEXT: ld a2, 0(a2)
-; RV64ZVE32F-NEXT: sd a1, 24(a0)
-; RV64ZVE32F-NEXT: sd a3, 16(a0)
-; RV64ZVE32F-NEXT: sd a4, 8(a0)
-; RV64ZVE32F-NEXT: sd a2, 0(a0)
+; RV64ZVE32F-NEXT: ld a1, 0(a2)
+; RV64ZVE32F-NEXT: ld a3, 8(a2)
+; RV64ZVE32F-NEXT: ld a4, 16(a2)
+; RV64ZVE32F-NEXT: ld a2, 24(a2)
+; RV64ZVE32F-NEXT: sd a1, 0(a0)
+; RV64ZVE32F-NEXT: sd a3, 8(a0)
+; RV64ZVE32F-NEXT: sd a4, 16(a0)
+; RV64ZVE32F-NEXT: sd a2, 24(a0)
; RV64ZVE32F-NEXT: ret
%v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer, <4 x i64> %passthru)
ret <4 x i64> %v
@@ -9433,35 +9433,35 @@ define <4 x double> @mgather_truemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passt
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
-; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
-; RV32ZVE32F-NEXT: vmv.x.s a1, v9
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
+; RV32ZVE32F-NEXT: vmv.x.s a1, v9
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
+; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
+; RV32ZVE32F-NEXT: vmv.x.s a1, v9
+; RV32ZVE32F-NEXT: fld fa3, 0(a1)
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
-; RV32ZVE32F-NEXT: fld fa3, 0(a1)
-; RV32ZVE32F-NEXT: vmv.x.s a1, v9
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
-; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
-; RV32ZVE32F-NEXT: fsd fa4, 16(a0)
-; RV32ZVE32F-NEXT: fsd fa2, 8(a0)
+; RV32ZVE32F-NEXT: fsd fa4, 8(a0)
+; RV32ZVE32F-NEXT: fsd fa3, 16(a0)
+; RV32ZVE32F-NEXT: fsd fa2, 24(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_truemask_v4f64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a2, 24(a1)
-; RV64ZVE32F-NEXT: ld a3, 16(a1)
-; RV64ZVE32F-NEXT: ld a4, 8(a1)
-; RV64ZVE32F-NEXT: ld a1, 0(a1)
+; RV64ZVE32F-NEXT: ld a2, 0(a1)
+; RV64ZVE32F-NEXT: ld a3, 8(a1)
+; RV64ZVE32F-NEXT: ld a4, 16(a1)
+; RV64ZVE32F-NEXT: ld a1, 24(a1)
; RV64ZVE32F-NEXT: fld fa5, 0(a2)
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: fld fa3, 0(a4)
; RV64ZVE32F-NEXT: fld fa2, 0(a1)
-; RV64ZVE32F-NEXT: fsd fa5, 24(a0)
-; RV64ZVE32F-NEXT: fsd fa4, 16(a0)
-; RV64ZVE32F-NEXT: fsd fa3, 8(a0)
-; RV64ZVE32F-NEXT: fsd fa2, 0(a0)
+; RV64ZVE32F-NEXT: fsd fa5, 0(a0)
+; RV64ZVE32F-NEXT: fsd fa4, 8(a0)
+; RV64ZVE32F-NEXT: fsd fa3, 16(a0)
+; RV64ZVE32F-NEXT: fsd fa2, 24(a0)
; RV64ZVE32F-NEXT: ret
%v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1), <4 x double> %passthru)
ret <4 x double> %v
@@ -9480,18 +9480,18 @@ define <4 x double> @mgather_falsemask_v4f64(<4 x ptr> %ptrs, <4 x double> %pass
;
; RV32ZVE32F-LABEL: mgather_falsemask_v4f64:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
-; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
-; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
+; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
+; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
+; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_falsemask_v4f64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
-; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
-; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
+; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
+; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
+; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: ret
%v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer, <4 x double> %passthru)
ret <4 x double> %v
@@ -13489,41 +13489,41 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lw a3, 0(a1)
-; RV32ZVE32F-NEXT: sw a3, 216(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw a3, 252(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
-; RV32ZVE32F-NEXT: sw a1, 208(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw a1, 248(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v16
; RV32ZVE32F-NEXT: lw a3, 0(a1)
-; RV32ZVE32F-NEXT: sw a3, 252(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw a3, 244(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
-; RV32ZVE32F-NEXT: sw a1, 248(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw a1, 240(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v16
; RV32ZVE32F-NEXT: lw a3, 0(a1)
-; RV32ZVE32F-NEXT: sw a3, 244(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw a3, 220(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
-; RV32ZVE32F-NEXT: sw a1, 236(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw a1, 216(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v16
; RV32ZVE32F-NEXT: lw a3, 0(a1)
-; RV32ZVE32F-NEXT: sw a3, 228(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw a3, 212(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
-; RV32ZVE32F-NEXT: sw a1, 220(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw a1, 208(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v16
; RV32ZVE32F-NEXT: lw a3, 0(a1)
-; RV32ZVE32F-NEXT: sw a3, 240(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw a3, 236(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a1, 232(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v16
; RV32ZVE32F-NEXT: lw a3, 0(a1)
-; RV32ZVE32F-NEXT: sw a3, 224(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw a3, 228(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
-; RV32ZVE32F-NEXT: sw a1, 212(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw a1, 224(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v16
; RV32ZVE32F-NEXT: lw a3, 0(a1)
@@ -13583,160 +13583,160 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
; RV32ZVE32F-NEXT: lw a2, 324(sp)
; RV32ZVE32F-NEXT: lw a3, 328(sp)
; RV32ZVE32F-NEXT: lw a4, 332(sp)
-; RV32ZVE32F-NEXT: lw a5, 0(a1)
-; RV32ZVE32F-NEXT: sw a5, 124(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a1, 4(a1)
-; RV32ZVE32F-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a1, 0(a2)
-; RV32ZVE32F-NEXT: sw a1, 116(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a1, 4(a2)
-; RV32ZVE32F-NEXT: sw a1, 112(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a1, 0(a3)
-; RV32ZVE32F-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw ra, 4(a3)
-; RV32ZVE32F-NEXT: lw s10, 0(a4)
-; RV32ZVE32F-NEXT: lw s8, 4(a4)
+; RV32ZVE32F-NEXT: lw s8, 0(a1)
+; RV32ZVE32F-NEXT: lw s9, 4(a1)
+; RV32ZVE32F-NEXT: lw s10, 0(a2)
+; RV32ZVE32F-NEXT: lw s11, 4(a2)
+; RV32ZVE32F-NEXT: lw t5, 0(a3)
+; RV32ZVE32F-NEXT: lw t6, 4(a3)
+; RV32ZVE32F-NEXT: lw s2, 0(a4)
+; RV32ZVE32F-NEXT: lw s3, 4(a4)
; RV32ZVE32F-NEXT: lw a2, 336(sp)
; RV32ZVE32F-NEXT: lw a4, 340(sp)
-; RV32ZVE32F-NEXT: lw a6, 344(sp)
-; RV32ZVE32F-NEXT: lw t0, 348(sp)
-; RV32ZVE32F-NEXT: lw s5, 0(a2)
-; RV32ZVE32F-NEXT: lw s4, 4(a2)
-; RV32ZVE32F-NEXT: lw t6, 0(a4)
-; RV32ZVE32F-NEXT: lw t5, 4(a4)
-; RV32ZVE32F-NEXT: lw t3, 0(a6)
-; RV32ZVE32F-NEXT: lw t2, 4(a6)
-; RV32ZVE32F-NEXT: lw t1, 0(t0)
-; RV32ZVE32F-NEXT: lw a7, 4(t0)
-; RV32ZVE32F-NEXT: lw a6, 352(sp)
-; RV32ZVE32F-NEXT: lw t0, 356(sp)
-; RV32ZVE32F-NEXT: lw t4, 360(sp)
-; RV32ZVE32F-NEXT: lw a1, 364(sp)
+; RV32ZVE32F-NEXT: lw a5, 344(sp)
+; RV32ZVE32F-NEXT: lw a6, 348(sp)
+; RV32ZVE32F-NEXT: lw a7, 0(a2)
+; RV32ZVE32F-NEXT: lw t0, 4(a2)
+; RV32ZVE32F-NEXT: lw t1, 0(a4)
+; RV32ZVE32F-NEXT: lw t2, 4(a4)
+; RV32ZVE32F-NEXT: lw a1, 0(a5)
+; RV32ZVE32F-NEXT: lw a2, 4(a5)
+; RV32ZVE32F-NEXT: lw a3, 0(a6)
+; RV32ZVE32F-NEXT: lw a4, 4(a6)
+; RV32ZVE32F-NEXT: lw a5, 352(sp)
+; RV32ZVE32F-NEXT: lw a6, 356(sp)
+; RV32ZVE32F-NEXT: lw t3, 360(sp)
+; RV32ZVE32F-NEXT: lw t4, 364(sp)
+; RV32ZVE32F-NEXT: lw s4, 0(a5)
+; RV32ZVE32F-NEXT: sw s4, 116(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a5, 4(a5)
+; RV32ZVE32F-NEXT: sw a5, 112(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a5, 0(a6)
-; RV32ZVE32F-NEXT: lw a6, 4(a6)
-; RV32ZVE32F-NEXT: lw a2, 0(t0)
-; RV32ZVE32F-NEXT: sw a2, 108(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a2, 4(t0)
-; RV32ZVE32F-NEXT: sw a2, 100(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw s11, 0(t4)
-; RV32ZVE32F-NEXT: lw s9, 4(t4)
-; RV32ZVE32F-NEXT: lw s7, 0(a1)
-; RV32ZVE32F-NEXT: lw s6, 4(a1)
-; RV32ZVE32F-NEXT: lw a4, 368(sp)
-; RV32ZVE32F-NEXT: lw a3, 372(sp)
-; RV32ZVE32F-NEXT: lw a2, 376(sp)
-; RV32ZVE32F-NEXT: lw a1, 380(sp)
-; RV32ZVE32F-NEXT: lw s3, 0(a4)
-; RV32ZVE32F-NEXT: lw s2, 4(a4)
-; RV32ZVE32F-NEXT: lw t4, 0(a3)
-; RV32ZVE32F-NEXT: lw t0, 4(a3)
-; RV32ZVE32F-NEXT: lw a4, 0(a2)
-; RV32ZVE32F-NEXT: lw a3, 4(a2)
-; RV32ZVE32F-NEXT: lw a2, 0(a1)
-; RV32ZVE32F-NEXT: lw a1, 4(a1)
-; RV32ZVE32F-NEXT: sw a6, 196(a0)
-; RV32ZVE32F-NEXT: sw a5, 192(a0)
-; RV32ZVE32F-NEXT: sw a7, 188(a0)
-; RV32ZVE32F-NEXT: sw t1, 184(a0)
-; RV32ZVE32F-NEXT: sw t2, 180(a0)
-; RV32ZVE32F-NEXT: sw t3, 176(a0)
-; RV32ZVE32F-NEXT: sw t5, 172(a0)
-; RV32ZVE32F-NEXT: sw t6, 168(a0)
-; RV32ZVE32F-NEXT: sw s4, 164(a0)
-; RV32ZVE32F-NEXT: sw s5, 160(a0)
-; RV32ZVE32F-NEXT: sw s8, 156(a0)
-; RV32ZVE32F-NEXT: sw s10, 152(a0)
-; RV32ZVE32F-NEXT: sw ra, 148(a0)
-; RV32ZVE32F-NEXT: lw a5, 104(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 144(a0)
-; RV32ZVE32F-NEXT: lw a5, 112(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 140(a0)
-; RV32ZVE32F-NEXT: lw a5, 116(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 136(a0)
-; RV32ZVE32F-NEXT: lw a5, 120(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 132(a0)
-; RV32ZVE32F-NEXT: lw a5, 124(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 128(a0)
-; RV32ZVE32F-NEXT: lw a5, 128(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 124(a0)
-; RV32ZVE32F-NEXT: lw a5, 132(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 120(a0)
-; RV32ZVE32F-NEXT: lw a5, 136(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 116(a0)
-; RV32ZVE32F-NEXT: lw a5, 140(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 112(a0)
-; RV32ZVE32F-NEXT: lw a5, 144(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 108(a0)
-; RV32ZVE32F-NEXT: lw a5, 148(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 104(a0)
-; RV32ZVE32F-NEXT: lw a5, 152(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 100(a0)
-; RV32ZVE32F-NEXT: lw a5, 156(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 96(a0)
-; RV32ZVE32F-NEXT: lw a5, 160(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 92(a0)
-; RV32ZVE32F-NEXT: lw a5, 164(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 88(a0)
-; RV32ZVE32F-NEXT: lw a5, 168(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 84(a0)
-; RV32ZVE32F-NEXT: lw a5, 172(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 80(a0)
-; RV32ZVE32F-NEXT: lw a5, 176(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 76(a0)
-; RV32ZVE32F-NEXT: lw a5, 180(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 72(a0)
-; RV32ZVE32F-NEXT: lw a5, 184(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 68(a0)
-; RV32ZVE32F-NEXT: lw a5, 188(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 64(a0)
-; RV32ZVE32F-NEXT: lw a5, 208(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: lw a5, 216(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a5, 0(a0)
-; RV32ZVE32F-NEXT: sw a1, 252(a0)
-; RV32ZVE32F-NEXT: sw a2, 248(a0)
-; RV32ZVE32F-NEXT: sw a3, 244(a0)
-; RV32ZVE32F-NEXT: sw a4, 240(a0)
-; RV32ZVE32F-NEXT: sw t0, 236(a0)
-; RV32ZVE32F-NEXT: sw t4, 232(a0)
-; RV32ZVE32F-NEXT: sw s2, 228(a0)
-; RV32ZVE32F-NEXT: sw s3, 224(a0)
-; RV32ZVE32F-NEXT: sw s6, 220(a0)
-; RV32ZVE32F-NEXT: sw s7, 216(a0)
-; RV32ZVE32F-NEXT: sw s9, 212(a0)
-; RV32ZVE32F-NEXT: sw s11, 208(a0)
-; RV32ZVE32F-NEXT: lw a1, 100(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a1, 204(a0)
+; RV32ZVE32F-NEXT: sw a5, 124(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a5, 4(a6)
+; RV32ZVE32F-NEXT: sw a5, 120(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw ra, 0(t3)
+; RV32ZVE32F-NEXT: lw a5, 4(t3)
+; RV32ZVE32F-NEXT: sw a5, 108(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a5, 0(t4)
+; RV32ZVE32F-NEXT: sw a5, 104(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a5, 4(t4)
+; RV32ZVE32F-NEXT: sw a5, 100(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a5, 368(sp)
+; RV32ZVE32F-NEXT: lw a6, 372(sp)
+; RV32ZVE32F-NEXT: lw t3, 376(sp)
+; RV32ZVE32F-NEXT: lw t4, 380(sp)
+; RV32ZVE32F-NEXT: lw s4, 0(a5)
+; RV32ZVE32F-NEXT: lw s5, 4(a5)
+; RV32ZVE32F-NEXT: lw s6, 0(a6)
+; RV32ZVE32F-NEXT: lw s7, 4(a6)
+; RV32ZVE32F-NEXT: lw a5, 0(t3)
+; RV32ZVE32F-NEXT: lw a6, 4(t3)
+; RV32ZVE32F-NEXT: lw t3, 0(t4)
+; RV32ZVE32F-NEXT: lw t4, 4(t4)
+; RV32ZVE32F-NEXT: sw a1, 176(a0)
+; RV32ZVE32F-NEXT: sw a2, 180(a0)
+; RV32ZVE32F-NEXT: sw a3, 184(a0)
+; RV32ZVE32F-NEXT: sw a4, 188(a0)
+; RV32ZVE32F-NEXT: sw a7, 160(a0)
+; RV32ZVE32F-NEXT: sw t0, 164(a0)
+; RV32ZVE32F-NEXT: sw t1, 168(a0)
+; RV32ZVE32F-NEXT: sw t2, 172(a0)
+; RV32ZVE32F-NEXT: sw t5, 144(a0)
+; RV32ZVE32F-NEXT: sw t6, 148(a0)
+; RV32ZVE32F-NEXT: sw s2, 152(a0)
+; RV32ZVE32F-NEXT: sw s3, 156(a0)
+; RV32ZVE32F-NEXT: sw s8, 128(a0)
+; RV32ZVE32F-NEXT: sw s9, 132(a0)
+; RV32ZVE32F-NEXT: sw s10, 136(a0)
+; RV32ZVE32F-NEXT: sw s11, 140(a0)
+; RV32ZVE32F-NEXT: lw a1, 140(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 112(a0)
+; RV32ZVE32F-NEXT: lw a1, 136(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 116(a0)
+; RV32ZVE32F-NEXT: lw a1, 132(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 120(a0)
+; RV32ZVE32F-NEXT: lw a1, 128(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 124(a0)
+; RV32ZVE32F-NEXT: lw a1, 156(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 96(a0)
+; RV32ZVE32F-NEXT: lw a1, 152(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 100(a0)
+; RV32ZVE32F-NEXT: lw a1, 148(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 104(a0)
+; RV32ZVE32F-NEXT: lw a1, 144(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 108(a0)
+; RV32ZVE32F-NEXT: lw a1, 172(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 80(a0)
+; RV32ZVE32F-NEXT: lw a1, 168(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 84(a0)
+; RV32ZVE32F-NEXT: lw a1, 164(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 88(a0)
+; RV32ZVE32F-NEXT: lw a1, 160(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 92(a0)
+; RV32ZVE32F-NEXT: lw a1, 188(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 64(a0)
+; RV32ZVE32F-NEXT: lw a1, 184(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 68(a0)
+; RV32ZVE32F-NEXT: lw a1, 180(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 72(a0)
+; RV32ZVE32F-NEXT: lw a1, 176(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 76(a0)
+; RV32ZVE32F-NEXT: sw a5, 240(a0)
+; RV32ZVE32F-NEXT: sw a6, 244(a0)
+; RV32ZVE32F-NEXT: sw t3, 248(a0)
+; RV32ZVE32F-NEXT: sw t4, 252(a0)
+; RV32ZVE32F-NEXT: sw s4, 224(a0)
+; RV32ZVE32F-NEXT: sw s5, 228(a0)
+; RV32ZVE32F-NEXT: sw s6, 232(a0)
+; RV32ZVE32F-NEXT: sw s7, 236(a0)
+; RV32ZVE32F-NEXT: sw ra, 208(a0)
; RV32ZVE32F-NEXT: lw a1, 108(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 212(a0)
+; RV32ZVE32F-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 216(a0)
+; RV32ZVE32F-NEXT: lw a1, 100(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 220(a0)
+; RV32ZVE32F-NEXT: lw a1, 116(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 192(a0)
+; RV32ZVE32F-NEXT: lw a1, 112(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 196(a0)
+; RV32ZVE32F-NEXT: lw a1, 124(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 200(a0)
+; RV32ZVE32F-NEXT: lw a1, 120(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 204(a0)
; RV32ZVE32F-NEXT: lw a1, 220(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a1, 28(a0)
-; RV32ZVE32F-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a1, 24(a0)
-; RV32ZVE32F-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a1, 20(a0)
-; RV32ZVE32F-NEXT: lw a1, 244(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 16(a0)
-; RV32ZVE32F-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a1, 12(a0)
+; RV32ZVE32F-NEXT: lw a1, 216(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 20(a0)
+; RV32ZVE32F-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 24(a0)
+; RV32ZVE32F-NEXT: lw a1, 208(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 28(a0)
; RV32ZVE32F-NEXT: lw a1, 252(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 0(a0)
+; RV32ZVE32F-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 4(a0)
+; RV32ZVE32F-NEXT: lw a1, 244(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 8(a0)
-; RV32ZVE32F-NEXT: lw a1, 192(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a1, 60(a0)
-; RV32ZVE32F-NEXT: lw a1, 196(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a1, 56(a0)
-; RV32ZVE32F-NEXT: lw a1, 200(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a1, 52(a0)
+; RV32ZVE32F-NEXT: lw a1, 240(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 12(a0)
; RV32ZVE32F-NEXT: lw a1, 204(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 48(a0)
-; RV32ZVE32F-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a1, 44(a0)
-; RV32ZVE32F-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a1, 40(a0)
+; RV32ZVE32F-NEXT: lw a1, 200(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 52(a0)
+; RV32ZVE32F-NEXT: lw a1, 196(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 56(a0)
+; RV32ZVE32F-NEXT: lw a1, 192(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 60(a0)
+; RV32ZVE32F-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 32(a0)
; RV32ZVE32F-NEXT: lw a1, 232(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 36(a0)
-; RV32ZVE32F-NEXT: lw a1, 240(sp) # 4-byte Folded Reload
-; RV32ZVE32F-NEXT: sw a1, 32(a0)
+; RV32ZVE32F-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 40(a0)
+; RV32ZVE32F-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
+; RV32ZVE32F-NEXT: sw a1, 44(a0)
; RV32ZVE32F-NEXT: addi sp, s0, -512
; RV32ZVE32F-NEXT: lw ra, 508(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s0, 504(sp) # 4-byte Folded Reload
@@ -13812,51 +13812,51 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
; RV64ZVE32F-NEXT: ld s8, 336(a1)
; RV64ZVE32F-NEXT: ld s9, 352(a1)
; RV64ZVE32F-NEXT: ld s10, 368(a1)
-; RV64ZVE32F-NEXT: ld s11, 448(a1)
-; RV64ZVE32F-NEXT: ld ra, 464(a1)
-; RV64ZVE32F-NEXT: ld a3, 480(a1)
-; RV64ZVE32F-NEXT: ld a2, 496(a1)
-; RV64ZVE32F-NEXT: ld a6, 384(a1)
-; RV64ZVE32F-NEXT: ld a5, 400(a1)
-; RV64ZVE32F-NEXT: ld a4, 416(a1)
-; RV64ZVE32F-NEXT: ld a1, 432(a1)
-; RV64ZVE32F-NEXT: sd a2, 248(a0)
-; RV64ZVE32F-NEXT: sd a3, 240(a0)
-; RV64ZVE32F-NEXT: sd ra, 232(a0)
-; RV64ZVE32F-NEXT: sd s11, 224(a0)
-; RV64ZVE32F-NEXT: sd a1, 216(a0)
-; RV64ZVE32F-NEXT: sd a4, 208(a0)
-; RV64ZVE32F-NEXT: sd a5, 200(a0)
-; RV64ZVE32F-NEXT: sd a6, 192(a0)
-; RV64ZVE32F-NEXT: sd s10, 184(a0)
-; RV64ZVE32F-NEXT: sd s9, 176(a0)
-; RV64ZVE32F-NEXT: sd s8, 168(a0)
+; RV64ZVE32F-NEXT: ld s11, 384(a1)
+; RV64ZVE32F-NEXT: ld ra, 400(a1)
+; RV64ZVE32F-NEXT: ld a6, 416(a1)
+; RV64ZVE32F-NEXT: ld a5, 432(a1)
+; RV64ZVE32F-NEXT: ld a2, 448(a1)
+; RV64ZVE32F-NEXT: ld a3, 464(a1)
+; RV64ZVE32F-NEXT: ld a4, 480(a1)
+; RV64ZVE32F-NEXT: ld a1, 496(a1)
+; RV64ZVE32F-NEXT: sd a2, 224(a0)
+; RV64ZVE32F-NEXT: sd a3, 232(a0)
+; RV64ZVE32F-NEXT: sd a4, 240(a0)
+; RV64ZVE32F-NEXT: sd a1, 248(a0)
+; RV64ZVE32F-NEXT: sd s11, 192(a0)
+; RV64ZVE32F-NEXT: sd ra, 200(a0)
+; RV64ZVE32F-NEXT: sd a6, 208(a0)
+; RV64ZVE32F-NEXT: sd a5, 216(a0)
; RV64ZVE32F-NEXT: sd s7, 160(a0)
-; RV64ZVE32F-NEXT: sd s6, 152(a0)
-; RV64ZVE32F-NEXT: sd s5, 144(a0)
-; RV64ZVE32F-NEXT: sd s4, 136(a0)
+; RV64ZVE32F-NEXT: sd s8, 168(a0)
+; RV64ZVE32F-NEXT: sd s9, 176(a0)
+; RV64ZVE32F-NEXT: sd s10, 184(a0)
; RV64ZVE32F-NEXT: sd s3, 128(a0)
-; RV64ZVE32F-NEXT: sd s2, 120(a0)
-; RV64ZVE32F-NEXT: sd s1, 112(a0)
-; RV64ZVE32F-NEXT: sd s0, 104(a0)
+; RV64ZVE32F-NEXT: sd s4, 136(a0)
+; RV64ZVE32F-NEXT: sd s5, 144(a0)
+; RV64ZVE32F-NEXT: sd s6, 152(a0)
; RV64ZVE32F-NEXT: sd t6, 96(a0)
-; RV64ZVE32F-NEXT: sd t5, 88(a0)
-; RV64ZVE32F-NEXT: sd t4, 80(a0)
-; RV64ZVE32F-NEXT: sd t3, 72(a0)
+; RV64ZVE32F-NEXT: sd s0, 104(a0)
+; RV64ZVE32F-NEXT: sd s1, 112(a0)
+; RV64ZVE32F-NEXT: sd s2, 120(a0)
; RV64ZVE32F-NEXT: sd t2, 64(a0)
-; RV64ZVE32F-NEXT: sd t1, 56(a0)
-; RV64ZVE32F-NEXT: sd t0, 48(a0)
-; RV64ZVE32F-NEXT: sd a7, 40(a0)
+; RV64ZVE32F-NEXT: sd t3, 72(a0)
+; RV64ZVE32F-NEXT: sd t4, 80(a0)
+; RV64ZVE32F-NEXT: sd t5, 88(a0)
; RV64ZVE32F-NEXT: ld a1, 0(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: sd a1, 32(a0)
-; RV64ZVE32F-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
-; RV64ZVE32F-NEXT: sd a1, 24(a0)
-; RV64ZVE32F-NEXT: ld a1, 16(sp) # 8-byte Folded Reload
-; RV64ZVE32F-NEXT: sd a1, 16(a0)
-; RV64ZVE32F-NEXT: ld a1, 24(sp) # 8-byte Folded Reload
-; RV64ZVE32F-NEXT: sd a1, 8(a0)
+; RV64ZVE32F-NEXT: sd a7, 40(a0)
+; RV64ZVE32F-NEXT: sd t0, 48(a0)
+; RV64ZVE32F-NEXT: sd t1, 56(a0)
; RV64ZVE32F-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: sd a1, 0(a0)
+; RV64ZVE32F-NEXT: ld a1, 24(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT: sd a1, 8(a0)
+; RV64ZVE32F-NEXT: ld a1, 16(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT: sd a1, 16(a0)
+; RV64ZVE32F-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
+; RV64ZVE32F-NEXT: sd a1, 24(a0)
; RV64ZVE32F-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: ld s1, 120(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index bc7758717c1c15..323f08acac28e3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -2863,8 +2863,8 @@ define void @mscatter_v1i64(<1 x i64> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
; RV32ZVE32F-NEXT: # %bb.1: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
-; RV32ZVE32F-NEXT: sw a1, 4(a2)
; RV32ZVE32F-NEXT: sw a0, 0(a2)
+; RV32ZVE32F-NEXT: sw a1, 4(a2)
; RV32ZVE32F-NEXT: .LBB36_2: # %else
; RV32ZVE32F-NEXT: ret
;
@@ -2910,20 +2910,20 @@ define void @mscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV32ZVE32F-NEXT: .LBB37_2: # %else2
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB37_3: # %cond.store
-; RV32ZVE32F-NEXT: lw a4, 4(a0)
-; RV32ZVE32F-NEXT: lw a0, 0(a0)
+; RV32ZVE32F-NEXT: lw a4, 0(a0)
+; RV32ZVE32F-NEXT: lw a0, 4(a0)
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a5, v8
-; RV32ZVE32F-NEXT: sw a4, 4(a5)
-; RV32ZVE32F-NEXT: sw a0, 0(a5)
+; RV32ZVE32F-NEXT: sw a4, 0(a5)
+; RV32ZVE32F-NEXT: sw a0, 4(a5)
; RV32ZVE32F-NEXT: andi a3, a3, 2
; RV32ZVE32F-NEXT: beqz a3, .LBB37_2
; RV32ZVE32F-NEXT: .LBB37_4: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: sw a2, 4(a0)
; RV32ZVE32F-NEXT: sw a1, 0(a0)
+; RV32ZVE32F-NEXT: sw a2, 4(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_v2i64:
@@ -2987,20 +2987,20 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32ZVE32F-NEXT: .LBB38_4: # %else6
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB38_5: # %cond.store
-; RV32ZVE32F-NEXT: lw t0, 4(a0)
-; RV32ZVE32F-NEXT: lw a0, 0(a0)
+; RV32ZVE32F-NEXT: lw t0, 0(a0)
+; RV32ZVE32F-NEXT: lw a0, 4(a0)
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t1, v8
-; RV32ZVE32F-NEXT: sw t0, 4(t1)
-; RV32ZVE32F-NEXT: sw a0, 0(t1)
+; RV32ZVE32F-NEXT: sw t0, 0(t1)
+; RV32ZVE32F-NEXT: sw a0, 4(t1)
; RV32ZVE32F-NEXT: andi a0, a5, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB38_2
; RV32ZVE32F-NEXT: .LBB38_6: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-NEXT: sw a7, 4(a0)
; RV32ZVE32F-NEXT: sw a6, 0(a0)
+; RV32ZVE32F-NEXT: sw a7, 4(a0)
; RV32ZVE32F-NEXT: andi a0, a5, 4
; RV32ZVE32F-NEXT: beqz a0, .LBB38_3
; RV32ZVE32F-NEXT: .LBB38_7: # %cond.store3
@@ -3208,20 +3208,20 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB41_10: # %cond.store
-; RV32ZVE32F-NEXT: lw s1, 4(a0)
-; RV32ZVE32F-NEXT: lw a0, 0(a0)
+; RV32ZVE32F-NEXT: lw s1, 0(a0)
+; RV32ZVE32F-NEXT: lw a0, 4(a0)
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
-; RV32ZVE32F-NEXT: sw s1, 4(s2)
-; RV32ZVE32F-NEXT: sw a0, 0(s2)
+; RV32ZVE32F-NEXT: sw s1, 0(s2)
+; RV32ZVE32F-NEXT: sw a0, 4(s2)
; RV32ZVE32F-NEXT: andi a0, a7, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB41_2
; RV32ZVE32F-NEXT: .LBB41_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw s0, 4(a0)
; RV32ZVE32F-NEXT: sw t6, 0(a0)
+; RV32ZVE32F-NEXT: sw s0, 4(a0)
; RV32ZVE32F-NEXT: andi a0, a7, 4
; RV32ZVE32F-NEXT: beqz a0, .LBB41_3
; RV32ZVE32F-NEXT: .LBB41_12: # %cond.store3
@@ -3443,19 +3443,19 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB42_10: # %cond.store
-; RV32ZVE32F-NEXT: lw a1, 4(a0)
-; RV32ZVE32F-NEXT: lw a0, 0(a0)
+; RV32ZVE32F-NEXT: lw a1, 0(a0)
+; RV32ZVE32F-NEXT: lw a0, 4(a0)
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
-; RV32ZVE32F-NEXT: sw a1, 4(s2)
-; RV32ZVE32F-NEXT: sw a0, 0(s2)
+; RV32ZVE32F-NEXT: sw a1, 0(s2)
+; RV32ZVE32F-NEXT: sw a0, 4(s2)
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB42_2
; RV32ZVE32F-NEXT: .LBB42_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: sw s0, 0(a0)
+; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 4
; RV32ZVE32F-NEXT: beqz a0, .LBB42_3
; RV32ZVE32F-NEXT: .LBB42_12: # %cond.store3
@@ -3687,19 +3687,19 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB43_10: # %cond.store
-; RV32ZVE32F-NEXT: lw a1, 4(a0)
-; RV32ZVE32F-NEXT: lw a0, 0(a0)
+; RV32ZVE32F-NEXT: lw a1, 0(a0)
+; RV32ZVE32F-NEXT: lw a0, 4(a0)
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
-; RV32ZVE32F-NEXT: sw a1, 4(s2)
-; RV32ZVE32F-NEXT: sw a0, 0(s2)
+; RV32ZVE32F-NEXT: sw a1, 0(s2)
+; RV32ZVE32F-NEXT: sw a0, 4(s2)
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB43_2
; RV32ZVE32F-NEXT: .LBB43_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: sw s0, 0(a0)
+; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 4
; RV32ZVE32F-NEXT: beqz a0, .LBB43_3
; RV32ZVE32F-NEXT: .LBB43_12: # %cond.store3
@@ -3933,19 +3933,19 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB44_10: # %cond.store
-; RV32ZVE32F-NEXT: lw a1, 4(a0)
-; RV32ZVE32F-NEXT: lw a0, 0(a0)
+; RV32ZVE32F-NEXT: lw a1, 0(a0)
+; RV32ZVE32F-NEXT: lw a0, 4(a0)
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
-; RV32ZVE32F-NEXT: sw a1, 4(s2)
-; RV32ZVE32F-NEXT: sw a0, 0(s2)
+; RV32ZVE32F-NEXT: sw a1, 0(s2)
+; RV32ZVE32F-NEXT: sw a0, 4(s2)
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB44_2
; RV32ZVE32F-NEXT: .LBB44_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: sw s0, 0(a0)
+; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 4
; RV32ZVE32F-NEXT: beqz a0, .LBB44_3
; RV32ZVE32F-NEXT: .LBB44_12: # %cond.store3
@@ -4186,19 +4186,19 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB45_10: # %cond.store
-; RV32ZVE32F-NEXT: lw a1, 4(a0)
-; RV32ZVE32F-NEXT: lw a0, 0(a0)
+; RV32ZVE32F-NEXT: lw a1, 0(a0)
+; RV32ZVE32F-NEXT: lw a0, 4(a0)
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
-; RV32ZVE32F-NEXT: sw a1, 4(s2)
-; RV32ZVE32F-NEXT: sw a0, 0(s2)
+; RV32ZVE32F-NEXT: sw a1, 0(s2)
+; RV32ZVE32F-NEXT: sw a0, 4(s2)
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB45_2
; RV32ZVE32F-NEXT: .LBB45_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: sw s0, 0(a0)
+; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 4
; RV32ZVE32F-NEXT: beqz a0, .LBB45_3
; RV32ZVE32F-NEXT: .LBB45_12: # %cond.store3
@@ -4431,19 +4431,19 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB46_10: # %cond.store
-; RV32ZVE32F-NEXT: lw a1, 4(a0)
-; RV32ZVE32F-NEXT: lw a0, 0(a0)
+; RV32ZVE32F-NEXT: lw a1, 0(a0)
+; RV32ZVE32F-NEXT: lw a0, 4(a0)
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
-; RV32ZVE32F-NEXT: sw a1, 4(s2)
-; RV32ZVE32F-NEXT: sw a0, 0(s2)
+; RV32ZVE32F-NEXT: sw a1, 0(s2)
+; RV32ZVE32F-NEXT: sw a0, 4(s2)
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB46_2
; RV32ZVE32F-NEXT: .LBB46_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: sw s0, 0(a0)
+; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 4
; RV32ZVE32F-NEXT: beqz a0, .LBB46_3
; RV32ZVE32F-NEXT: .LBB46_12: # %cond.store3
@@ -4678,19 +4678,19 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB47_10: # %cond.store
-; RV32ZVE32F-NEXT: lw a1, 4(a0)
-; RV32ZVE32F-NEXT: lw a0, 0(a0)
+; RV32ZVE32F-NEXT: lw a1, 0(a0)
+; RV32ZVE32F-NEXT: lw a0, 4(a0)
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
-; RV32ZVE32F-NEXT: sw a1, 4(s2)
-; RV32ZVE32F-NEXT: sw a0, 0(s2)
+; RV32ZVE32F-NEXT: sw a1, 0(s2)
+; RV32ZVE32F-NEXT: sw a0, 4(s2)
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB47_2
; RV32ZVE32F-NEXT: .LBB47_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: sw s0, 0(a0)
+; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 4
; RV32ZVE32F-NEXT: beqz a0, .LBB47_3
; RV32ZVE32F-NEXT: .LBB47_12: # %cond.store3
@@ -4932,19 +4932,19 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB48_10: # %cond.store
-; RV32ZVE32F-NEXT: lw a1, 4(a0)
-; RV32ZVE32F-NEXT: lw a0, 0(a0)
+; RV32ZVE32F-NEXT: lw a1, 0(a0)
+; RV32ZVE32F-NEXT: lw a0, 4(a0)
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
-; RV32ZVE32F-NEXT: sw a1, 4(s2)
-; RV32ZVE32F-NEXT: sw a0, 0(s2)
+; RV32ZVE32F-NEXT: sw a1, 0(s2)
+; RV32ZVE32F-NEXT: sw a0, 4(s2)
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB48_2
; RV32ZVE32F-NEXT: .LBB48_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: sw s0, 0(a0)
+; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 4
; RV32ZVE32F-NEXT: beqz a0, .LBB48_3
; RV32ZVE32F-NEXT: .LBB48_12: # %cond.store3
@@ -5175,19 +5175,19 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB49_10: # %cond.store
-; RV32ZVE32F-NEXT: lw a1, 4(a0)
-; RV32ZVE32F-NEXT: lw a0, 0(a0)
+; RV32ZVE32F-NEXT: lw a1, 0(a0)
+; RV32ZVE32F-NEXT: lw a0, 4(a0)
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
-; RV32ZVE32F-NEXT: sw a1, 4(s2)
-; RV32ZVE32F-NEXT: sw a0, 0(s2)
+; RV32ZVE32F-NEXT: sw a1, 0(s2)
+; RV32ZVE32F-NEXT: sw a0, 4(s2)
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB49_2
; RV32ZVE32F-NEXT: .LBB49_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: sw s0, 0(a0)
+; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 4
; RV32ZVE32F-NEXT: beqz a0, .LBB49_3
; RV32ZVE32F-NEXT: .LBB49_12: # %cond.store3
@@ -5419,19 +5419,19 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB50_10: # %cond.store
-; RV32ZVE32F-NEXT: lw a1, 4(a0)
-; RV32ZVE32F-NEXT: lw a0, 0(a0)
+; RV32ZVE32F-NEXT: lw a1, 0(a0)
+; RV32ZVE32F-NEXT: lw a0, 4(a0)
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
-; RV32ZVE32F-NEXT: sw a1, 4(s2)
-; RV32ZVE32F-NEXT: sw a0, 0(s2)
+; RV32ZVE32F-NEXT: sw a1, 0(s2)
+; RV32ZVE32F-NEXT: sw a0, 4(s2)
; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB50_2
; RV32ZVE32F-NEXT: .LBB50_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: sw s0, 0(a0)
+; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 4
; RV32ZVE32F-NEXT: beqz a0, .LBB50_3
; RV32ZVE32F-NEXT: .LBB50_12: # %cond.store3
@@ -5705,19 +5705,19 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV32ZVE32F-NEXT: addi sp, sp, 48
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB51_10: # %cond.store
-; RV32ZVE32F-NEXT: lw a1, 4(a0)
-; RV32ZVE32F-NEXT: lw a0, 0(a0)
+; RV32ZVE32F-NEXT: lw a1, 0(a0)
+; RV32ZVE32F-NEXT: lw a0, 4(a0)
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
-; RV32ZVE32F-NEXT: sw a1, 4(s2)
-; RV32ZVE32F-NEXT: sw a0, 0(s2)
+; RV32ZVE32F-NEXT: sw a1, 0(s2)
+; RV32ZVE32F-NEXT: sw a0, 4(s2)
; RV32ZVE32F-NEXT: andi a0, a2, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB51_2
; RV32ZVE32F-NEXT: .LBB51_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: sw s0, 0(a0)
+; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: andi a0, a2, 4
; RV32ZVE32F-NEXT: beqz a0, .LBB51_3
; RV32ZVE32F-NEXT: .LBB51_12: # %cond.store3
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
index 016f95bfef7e71..080ffe1a78b511 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
@@ -833,8 +833,8 @@ define signext i64 @vpreduce_add_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -867,8 +867,8 @@ define signext i64 @vpreduce_umax_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -901,8 +901,8 @@ define signext i64 @vpreduce_smax_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -935,8 +935,8 @@ define signext i64 @vpreduce_umin_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -969,8 +969,8 @@ define signext i64 @vpreduce_smin_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1003,8 +1003,8 @@ define signext i64 @vpreduce_and_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1037,8 +1037,8 @@ define signext i64 @vpreduce_or_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1071,8 +1071,8 @@ define signext i64 @vpreduce_xor_v2i64(i64 signext %s, <2 x i64> %v, <2 x i1> %m
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1105,8 +1105,8 @@ define signext i64 @vpreduce_add_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1139,8 +1139,8 @@ define signext i64 @vpreduce_umax_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1173,8 +1173,8 @@ define signext i64 @vpreduce_smax_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1207,8 +1207,8 @@ define signext i64 @vpreduce_umin_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1241,8 +1241,8 @@ define signext i64 @vpreduce_smin_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1275,8 +1275,8 @@ define signext i64 @vpreduce_and_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1309,8 +1309,8 @@ define signext i64 @vpreduce_or_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1343,8 +1343,8 @@ define signext i64 @vpreduce_xor_v4i64(i64 signext %s, <4 x i64> %v, <4 x i1> %m
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll
index 5f3847e085055b..0cecec31e2bda3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll
@@ -1376,8 +1376,8 @@ define <8 x i1> @icmp_eq_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1404,8 +1404,8 @@ define <8 x i1> @icmp_eq_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 z
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1465,8 +1465,8 @@ define <8 x i1> @icmp_ne_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1493,8 +1493,8 @@ define <8 x i1> @icmp_ne_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 z
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1554,8 +1554,8 @@ define <8 x i1> @icmp_ugt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1582,8 +1582,8 @@ define <8 x i1> @icmp_ugt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1643,8 +1643,8 @@ define <8 x i1> @icmp_uge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1673,8 +1673,8 @@ define <8 x i1> @icmp_uge_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1734,8 +1734,8 @@ define <8 x i1> @icmp_ult_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1762,8 +1762,8 @@ define <8 x i1> @icmp_ult_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1823,8 +1823,8 @@ define <8 x i1> @icmp_sgt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1851,8 +1851,8 @@ define <8 x i1> @icmp_sgt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1912,8 +1912,8 @@ define <8 x i1> @icmp_sge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1942,8 +1942,8 @@ define <8 x i1> @icmp_sge_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2003,8 +2003,8 @@ define <8 x i1> @icmp_slt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2031,8 +2031,8 @@ define <8 x i1> @icmp_slt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2092,8 +2092,8 @@ define <8 x i1> @icmp_sle_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2120,8 +2120,8 @@ define <8 x i1> @icmp_sle_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll
index 0d8a9692442353..ff5f6960ed4e4d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll
@@ -86,8 +86,8 @@ define <2 x i64> @vslide1down_2xi64(<2 x i64> %v, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -111,8 +111,8 @@ define <4 x i64> @vslide1down_4xi64(<4 x i64> %v, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
index d1fb30c7daa3ea..e0b2dd1af91835 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
@@ -93,8 +93,8 @@ define <2 x i64> @vslide1up_2xi64(<2 x i64> %v, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -119,8 +119,8 @@ define <4 x i64> @vslide1up_4xi64(<4 x i64> %v, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll
index 072c88f6353eaf..4ef65032469e41 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll
@@ -259,10 +259,10 @@ define void @vnsrl_0_i64(ptr %in, ptr %out) {
;
; ZVE32F-LABEL: vnsrl_0_i64:
; ZVE32F: # %bb.0: # %entry
-; ZVE32F-NEXT: ld a2, 16(a0)
-; ZVE32F-NEXT: ld a0, 0(a0)
-; ZVE32F-NEXT: sd a2, 8(a1)
-; ZVE32F-NEXT: sd a0, 0(a1)
+; ZVE32F-NEXT: ld a2, 0(a0)
+; ZVE32F-NEXT: ld a0, 16(a0)
+; ZVE32F-NEXT: sd a2, 0(a1)
+; ZVE32F-NEXT: sd a0, 8(a1)
; ZVE32F-NEXT: ret
entry:
%0 = load <4 x i64>, ptr %in, align 8
@@ -285,10 +285,10 @@ define void @vnsrl_64_i64(ptr %in, ptr %out) {
;
; ZVE32F-LABEL: vnsrl_64_i64:
; ZVE32F: # %bb.0: # %entry
-; ZVE32F-NEXT: ld a2, 24(a0)
-; ZVE32F-NEXT: ld a0, 8(a0)
-; ZVE32F-NEXT: sd a2, 8(a1)
-; ZVE32F-NEXT: sd a0, 0(a1)
+; ZVE32F-NEXT: ld a2, 8(a0)
+; ZVE32F-NEXT: ld a0, 24(a0)
+; ZVE32F-NEXT: sd a2, 0(a1)
+; ZVE32F-NEXT: sd a0, 8(a1)
; ZVE32F-NEXT: ret
entry:
%0 = load <4 x i64>, ptr %in, align 8
@@ -310,10 +310,10 @@ define void @vnsrl_0_double(ptr %in, ptr %out) {
;
; ZVE32F-LABEL: vnsrl_0_double:
; ZVE32F: # %bb.0: # %entry
-; ZVE32F-NEXT: ld a2, 16(a0)
-; ZVE32F-NEXT: ld a0, 0(a0)
-; ZVE32F-NEXT: sd a2, 8(a1)
-; ZVE32F-NEXT: sd a0, 0(a1)
+; ZVE32F-NEXT: ld a2, 0(a0)
+; ZVE32F-NEXT: ld a0, 16(a0)
+; ZVE32F-NEXT: sd a2, 0(a1)
+; ZVE32F-NEXT: sd a0, 8(a1)
; ZVE32F-NEXT: ret
entry:
%0 = load <4 x double>, ptr %in, align 8
@@ -336,10 +336,10 @@ define void @vnsrl_64_double(ptr %in, ptr %out) {
;
; ZVE32F-LABEL: vnsrl_64_double:
; ZVE32F: # %bb.0: # %entry
-; ZVE32F-NEXT: ld a2, 24(a0)
-; ZVE32F-NEXT: ld a0, 8(a0)
-; ZVE32F-NEXT: sd a2, 8(a1)
-; ZVE32F-NEXT: sd a0, 0(a1)
+; ZVE32F-NEXT: ld a2, 8(a0)
+; ZVE32F-NEXT: ld a0, 24(a0)
+; ZVE32F-NEXT: sd a2, 0(a1)
+; ZVE32F-NEXT: sd a0, 8(a1)
; ZVE32F-NEXT: ret
entry:
%0 = load <4 x double>, ptr %in, align 8
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
index 9385fa69b2f049..28202dc07f9564 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
@@ -663,14 +663,14 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur
; ZVE32F-NEXT: add a6, a1, a6
; ZVE32F-NEXT: mul a7, a2, a5
; ZVE32F-NEXT: add a7, a1, a7
-; ZVE32F-NEXT: ld t0, 0(a6)
-; ZVE32F-NEXT: ld t1, 0(a7)
-; ZVE32F-NEXT: ld a6, 80(a6)
+; ZVE32F-NEXT: ld t0, 0(a7)
+; ZVE32F-NEXT: ld t1, 0(a6)
; ZVE32F-NEXT: ld a7, 80(a7)
-; ZVE32F-NEXT: sd t0, 8(a0)
-; ZVE32F-NEXT: sd t1, 0(a0)
-; ZVE32F-NEXT: sd a6, 24(a0)
+; ZVE32F-NEXT: ld a6, 80(a6)
+; ZVE32F-NEXT: sd t0, 0(a0)
+; ZVE32F-NEXT: sd t1, 8(a0)
; ZVE32F-NEXT: sd a7, 16(a0)
+; ZVE32F-NEXT: sd a6, 24(a0)
; ZVE32F-NEXT: addi a2, a2, 4
; ZVE32F-NEXT: addi a0, a0, 32
; ZVE32F-NEXT: addi a4, a4, 4
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
index eba3bd9d86dbb3..293b75dc207c86 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
@@ -281,9 +281,9 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV32-SLOW-NEXT: vmv.x.s a1, v8
; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32-SLOW-NEXT: vmv.x.s a2, v9
+; RV32-SLOW-NEXT: srli a3, a1, 8
; RV32-SLOW-NEXT: sb a1, 0(a2)
-; RV32-SLOW-NEXT: srli a1, a1, 8
-; RV32-SLOW-NEXT: sb a1, 1(a2)
+; RV32-SLOW-NEXT: sb a3, 1(a2)
; RV32-SLOW-NEXT: andi a1, a0, 2
; RV32-SLOW-NEXT: beqz a1, .LBB6_2
; RV32-SLOW-NEXT: .LBB6_6: # %cond.store1
@@ -293,9 +293,9 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32-SLOW-NEXT: vslidedown.vi v10, v9, 1
; RV32-SLOW-NEXT: vmv.x.s a2, v10
+; RV32-SLOW-NEXT: srli a3, a1, 8
; RV32-SLOW-NEXT: sb a1, 0(a2)
-; RV32-SLOW-NEXT: srli a1, a1, 8
-; RV32-SLOW-NEXT: sb a1, 1(a2)
+; RV32-SLOW-NEXT: sb a3, 1(a2)
; RV32-SLOW-NEXT: andi a1, a0, 4
; RV32-SLOW-NEXT: beqz a1, .LBB6_3
; RV32-SLOW-NEXT: .LBB6_7: # %cond.store3
@@ -305,9 +305,9 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32-SLOW-NEXT: vslidedown.vi v10, v9, 2
; RV32-SLOW-NEXT: vmv.x.s a2, v10
+; RV32-SLOW-NEXT: srli a3, a1, 8
; RV32-SLOW-NEXT: sb a1, 0(a2)
-; RV32-SLOW-NEXT: srli a1, a1, 8
-; RV32-SLOW-NEXT: sb a1, 1(a2)
+; RV32-SLOW-NEXT: sb a3, 1(a2)
; RV32-SLOW-NEXT: andi a0, a0, 8
; RV32-SLOW-NEXT: beqz a0, .LBB6_4
; RV32-SLOW-NEXT: .LBB6_8: # %cond.store5
@@ -317,9 +317,9 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32-SLOW-NEXT: vslidedown.vi v8, v9, 3
; RV32-SLOW-NEXT: vmv.x.s a1, v8
+; RV32-SLOW-NEXT: srli a2, a0, 8
; RV32-SLOW-NEXT: sb a0, 0(a1)
-; RV32-SLOW-NEXT: srli a0, a0, 8
-; RV32-SLOW-NEXT: sb a0, 1(a1)
+; RV32-SLOW-NEXT: sb a2, 1(a1)
; RV32-SLOW-NEXT: ret
;
; RV64-SLOW-LABEL: mscatter_v4i16_align1:
@@ -345,8 +345,8 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-SLOW-NEXT: vmv.x.s a2, v10
; RV64-SLOW-NEXT: srli a3, a1, 8
-; RV64-SLOW-NEXT: sb a3, 1(a2)
; RV64-SLOW-NEXT: sb a1, 0(a2)
+; RV64-SLOW-NEXT: sb a3, 1(a2)
; RV64-SLOW-NEXT: andi a1, a0, 2
; RV64-SLOW-NEXT: beqz a1, .LBB6_2
; RV64-SLOW-NEXT: .LBB6_6: # %cond.store1
@@ -357,8 +357,8 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV64-SLOW-NEXT: vslidedown.vi v9, v10, 1
; RV64-SLOW-NEXT: vmv.x.s a2, v9
; RV64-SLOW-NEXT: srli a3, a1, 8
-; RV64-SLOW-NEXT: sb a3, 1(a2)
; RV64-SLOW-NEXT: sb a1, 0(a2)
+; RV64-SLOW-NEXT: sb a3, 1(a2)
; RV64-SLOW-NEXT: andi a1, a0, 4
; RV64-SLOW-NEXT: beqz a1, .LBB6_3
; RV64-SLOW-NEXT: .LBB6_7: # %cond.store3
@@ -369,8 +369,8 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV64-SLOW-NEXT: vslidedown.vi v12, v10, 2
; RV64-SLOW-NEXT: vmv.x.s a2, v12
; RV64-SLOW-NEXT: srli a3, a1, 8
-; RV64-SLOW-NEXT: sb a3, 1(a2)
; RV64-SLOW-NEXT: sb a1, 0(a2)
+; RV64-SLOW-NEXT: sb a3, 1(a2)
; RV64-SLOW-NEXT: andi a0, a0, 8
; RV64-SLOW-NEXT: beqz a0, .LBB6_4
; RV64-SLOW-NEXT: .LBB6_8: # %cond.store5
@@ -381,8 +381,8 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m)
; RV64-SLOW-NEXT: vslidedown.vi v8, v10, 3
; RV64-SLOW-NEXT: vmv.x.s a1, v8
; RV64-SLOW-NEXT: srli a2, a0, 8
-; RV64-SLOW-NEXT: sb a2, 1(a1)
; RV64-SLOW-NEXT: sb a0, 0(a1)
+; RV64-SLOW-NEXT: sb a2, 1(a1)
; RV64-SLOW-NEXT: ret
;
; RV32-FAST-LABEL: mscatter_v4i16_align1:
@@ -418,9 +418,9 @@ define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m)
; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32-SLOW-NEXT: vmv.x.s a1, v8
; RV32-SLOW-NEXT: vmv.x.s a2, v9
+; RV32-SLOW-NEXT: srli a3, a1, 16
; RV32-SLOW-NEXT: sh a1, 0(a2)
-; RV32-SLOW-NEXT: srli a1, a1, 16
-; RV32-SLOW-NEXT: sh a1, 2(a2)
+; RV32-SLOW-NEXT: sh a3, 2(a2)
; RV32-SLOW-NEXT: andi a0, a0, 2
; RV32-SLOW-NEXT: beqz a0, .LBB7_2
; RV32-SLOW-NEXT: .LBB7_4: # %cond.store1
@@ -429,9 +429,9 @@ define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m)
; RV32-SLOW-NEXT: vmv.x.s a0, v8
; RV32-SLOW-NEXT: vslidedown.vi v8, v9, 1
; RV32-SLOW-NEXT: vmv.x.s a1, v8
+; RV32-SLOW-NEXT: srli a2, a0, 16
; RV32-SLOW-NEXT: sh a0, 0(a1)
-; RV32-SLOW-NEXT: srli a0, a0, 16
-; RV32-SLOW-NEXT: sh a0, 2(a1)
+; RV32-SLOW-NEXT: sh a2, 2(a1)
; RV32-SLOW-NEXT: ret
;
; RV64-SLOW-LABEL: mscatter_v2i32_align2:
@@ -450,9 +450,9 @@ define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m)
; RV64-SLOW-NEXT: vmv.x.s a1, v8
; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-SLOW-NEXT: vmv.x.s a2, v9
+; RV64-SLOW-NEXT: srli a3, a1, 16
; RV64-SLOW-NEXT: sh a1, 0(a2)
-; RV64-SLOW-NEXT: srli a1, a1, 16
-; RV64-SLOW-NEXT: sh a1, 2(a2)
+; RV64-SLOW-NEXT: sh a3, 2(a2)
; RV64-SLOW-NEXT: andi a0, a0, 2
; RV64-SLOW-NEXT: beqz a0, .LBB7_2
; RV64-SLOW-NEXT: .LBB7_4: # %cond.store1
@@ -462,9 +462,9 @@ define void @mscatter_v2i32_align2(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m)
; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64-SLOW-NEXT: vslidedown.vi v8, v9, 1
; RV64-SLOW-NEXT: vmv.x.s a1, v8
+; RV64-SLOW-NEXT: srli a2, a0, 16
; RV64-SLOW-NEXT: sh a0, 0(a1)
-; RV64-SLOW-NEXT: srli a0, a0, 16
-; RV64-SLOW-NEXT: sh a0, 2(a1)
+; RV64-SLOW-NEXT: sh a2, 2(a1)
; RV64-SLOW-NEXT: ret
;
; RV32-FAST-LABEL: mscatter_v2i32_align2:
@@ -605,18 +605,18 @@ define void @masked_store_v2i32_align2(<2 x i32> %val, ptr %a, <2 x i32> %m) nou
; SLOW-NEXT: .LBB9_3: # %cond.store
; SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; SLOW-NEXT: vmv.x.s a2, v8
+; SLOW-NEXT: srli a3, a2, 16
; SLOW-NEXT: sh a2, 0(a0)
-; SLOW-NEXT: srli a2, a2, 16
-; SLOW-NEXT: sh a2, 2(a0)
+; SLOW-NEXT: sh a3, 2(a0)
; SLOW-NEXT: andi a1, a1, 2
; SLOW-NEXT: beqz a1, .LBB9_2
; SLOW-NEXT: .LBB9_4: # %cond.store1
; SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; SLOW-NEXT: vslidedown.vi v8, v8, 1
; SLOW-NEXT: vmv.x.s a1, v8
+; SLOW-NEXT: srli a2, a1, 16
; SLOW-NEXT: sh a1, 4(a0)
-; SLOW-NEXT: srli a1, a1, 16
-; SLOW-NEXT: sh a1, 6(a0)
+; SLOW-NEXT: sh a2, 6(a0)
; SLOW-NEXT: ret
;
; FAST-LABEL: masked_store_v2i32_align2:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
index ea7f6beb22a7cb..da34396d72a794 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll
@@ -185,8 +185,8 @@ define <8 x i64> @vaaddu_vx_v8i64_floor(<8 x i64> %x, i64 %y) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -428,8 +428,8 @@ define <8 x i64> @vaaddu_vx_v8i64_ceil(<8 x i64> %x, i64 %y) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll
index 6246ef7db0cb33..2f7b7b6807ce21 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll
@@ -992,8 +992,8 @@ define <2 x i64> @vadd_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1018,8 +1018,8 @@ define <2 x i64> @vadd_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1086,8 +1086,8 @@ define <4 x i64> @vadd_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1112,8 +1112,8 @@ define <4 x i64> @vadd_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1180,8 +1180,8 @@ define <8 x i64> @vadd_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1206,8 +1206,8 @@ define <8 x i64> @vadd_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1274,8 +1274,8 @@ define <16 x i64> @vadd_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1300,8 +1300,8 @@ define <16 x i64> @vadd_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll
index c413dd86f37128..fb7816ce254583 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll
@@ -859,8 +859,8 @@ define <2 x i64> @vand_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -885,8 +885,8 @@ define <2 x i64> @vand_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -953,8 +953,8 @@ define <4 x i64> @vand_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -979,8 +979,8 @@ define <4 x i64> @vand_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1047,8 +1047,8 @@ define <8 x i64> @vand_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1073,8 +1073,8 @@ define <8 x i64> @vand_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1141,8 +1141,8 @@ define <11 x i64> @vand_vx_v11i64(<11 x i64> %va, i64 %b, <11 x i1> %m, i32 zero
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1167,8 +1167,8 @@ define <11 x i64> @vand_vx_v11i64_unmasked(<11 x i64> %va, i64 %b, i32 zeroext %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1235,8 +1235,8 @@ define <16 x i64> @vand_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1261,8 +1261,8 @@ define <16 x i64> @vand_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll
index e626727ffb8b4c..d1f77bf275ab5c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll
@@ -611,8 +611,8 @@ define <2 x i64> @vdiv_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -637,8 +637,8 @@ define <2 x i64> @vdiv_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -685,8 +685,8 @@ define <4 x i64> @vdiv_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -711,8 +711,8 @@ define <4 x i64> @vdiv_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -759,8 +759,8 @@ define <8 x i64> @vdiv_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -785,8 +785,8 @@ define <8 x i64> @vdiv_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -833,8 +833,8 @@ define <16 x i64> @vdiv_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -859,8 +859,8 @@ define <16 x i64> @vdiv_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll
index 3715449ef27f06..0219ebf503aa59 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll
@@ -610,8 +610,8 @@ define <2 x i64> @vdivu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -636,8 +636,8 @@ define <2 x i64> @vdivu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -684,8 +684,8 @@ define <4 x i64> @vdivu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -710,8 +710,8 @@ define <4 x i64> @vdivu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -758,8 +758,8 @@ define <8 x i64> @vdivu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -784,8 +784,8 @@ define <8 x i64> @vdivu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -832,8 +832,8 @@ define <16 x i64> @vdivu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zer
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -858,8 +858,8 @@ define <16 x i64> @vdivu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll
index 4805d6782a3b93..382101cbdffb33 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll
@@ -1375,8 +1375,8 @@ define <2 x i64> @vmacc_vx_nxv2i64(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x i1>
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1405,8 +1405,8 @@ define <2 x i64> @vmacc_vx_nxv2i64_unmasked(<2 x i64> %a, i64 %b, <2 x i64> %c,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1448,8 +1448,8 @@ define <2 x i64> @vmacc_vx_nxv2i64_ta(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1509,8 +1509,8 @@ define <4 x i64> @vmacc_vx_nxv4i64(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x i1>
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1539,8 +1539,8 @@ define <4 x i64> @vmacc_vx_nxv4i64_unmasked(<4 x i64> %a, i64 %b, <4 x i64> %c,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1582,8 +1582,8 @@ define <4 x i64> @vmacc_vx_nxv4i64_ta(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1643,8 +1643,8 @@ define <8 x i64> @vmacc_vx_nxv8i64(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x i1>
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1673,8 +1673,8 @@ define <8 x i64> @vmacc_vx_nxv8i64_unmasked(<8 x i64> %a, i64 %b, <8 x i64> %c,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1716,8 +1716,8 @@ define <8 x i64> @vmacc_vx_nxv8i64_ta(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll
index 6adc6ba9621a8f..9a3a513167d879 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll
@@ -744,8 +744,8 @@ define <2 x i64> @vmax_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -770,8 +770,8 @@ define <2 x i64> @vmax_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -818,8 +818,8 @@ define <4 x i64> @vmax_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -844,8 +844,8 @@ define <4 x i64> @vmax_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -892,8 +892,8 @@ define <8 x i64> @vmax_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -918,8 +918,8 @@ define <8 x i64> @vmax_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -966,8 +966,8 @@ define <16 x i64> @vmax_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -992,8 +992,8 @@ define <16 x i64> @vmax_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll
index baeb372c017e2e..5090cb2899eb70 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll
@@ -743,8 +743,8 @@ define <2 x i64> @vmaxu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -769,8 +769,8 @@ define <2 x i64> @vmaxu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -817,8 +817,8 @@ define <4 x i64> @vmaxu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -843,8 +843,8 @@ define <4 x i64> @vmaxu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -891,8 +891,8 @@ define <8 x i64> @vmaxu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -917,8 +917,8 @@ define <8 x i64> @vmaxu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -965,8 +965,8 @@ define <16 x i64> @vmaxu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zer
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -991,8 +991,8 @@ define <16 x i64> @vmaxu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll
index d0c21ce05c0254..a25eddc8d973fc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll
@@ -744,8 +744,8 @@ define <2 x i64> @vmin_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -770,8 +770,8 @@ define <2 x i64> @vmin_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -818,8 +818,8 @@ define <4 x i64> @vmin_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -844,8 +844,8 @@ define <4 x i64> @vmin_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -892,8 +892,8 @@ define <8 x i64> @vmin_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -918,8 +918,8 @@ define <8 x i64> @vmin_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -966,8 +966,8 @@ define <16 x i64> @vmin_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -992,8 +992,8 @@ define <16 x i64> @vmin_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll
index a730ba4729d252..70df4d000ba925 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll
@@ -743,8 +743,8 @@ define <2 x i64> @vminu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -769,8 +769,8 @@ define <2 x i64> @vminu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -817,8 +817,8 @@ define <4 x i64> @vminu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -843,8 +843,8 @@ define <4 x i64> @vminu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -891,8 +891,8 @@ define <8 x i64> @vminu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -917,8 +917,8 @@ define <8 x i64> @vminu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -965,8 +965,8 @@ define <16 x i64> @vminu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zer
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -991,8 +991,8 @@ define <16 x i64> @vminu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
index 8970fbf740d235..143ba263267fa2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
@@ -653,8 +653,8 @@ define <2 x i64> @vmul_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -679,8 +679,8 @@ define <2 x i64> @vmul_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -727,8 +727,8 @@ define <4 x i64> @vmul_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -753,8 +753,8 @@ define <4 x i64> @vmul_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -801,8 +801,8 @@ define <8 x i64> @vmul_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -827,8 +827,8 @@ define <8 x i64> @vmul_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -875,8 +875,8 @@ define <16 x i64> @vmul_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -901,8 +901,8 @@ define <16 x i64> @vmul_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll
index 805e2e2e6bd35d..7540b493aeded0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll
@@ -1375,8 +1375,8 @@ define <2 x i64> @vnmsac_vx_nxv2i64(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x i1
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1405,8 +1405,8 @@ define <2 x i64> @vnmsac_vx_nxv2i64_unmasked(<2 x i64> %a, i64 %b, <2 x i64> %c,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1448,8 +1448,8 @@ define <2 x i64> @vnmsac_vx_nxv2i64_ta(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1509,8 +1509,8 @@ define <4 x i64> @vnmsac_vx_nxv4i64(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x i1
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1539,8 +1539,8 @@ define <4 x i64> @vnmsac_vx_nxv4i64_unmasked(<4 x i64> %a, i64 %b, <4 x i64> %c,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1582,8 +1582,8 @@ define <4 x i64> @vnmsac_vx_nxv4i64_ta(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1643,8 +1643,8 @@ define <8 x i64> @vnmsac_vx_nxv8i64(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x i1
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1673,8 +1673,8 @@ define <8 x i64> @vnmsac_vx_nxv8i64_unmasked(<8 x i64> %a, i64 %b, <8 x i64> %c,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1716,8 +1716,8 @@ define <8 x i64> @vnmsac_vx_nxv8i64_ta(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll
index 09c281b525a643..e8a10fb8864249 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll
@@ -913,8 +913,8 @@ define <2 x i64> @vor_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -939,8 +939,8 @@ define <2 x i64> @vor_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl)
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1007,8 +1007,8 @@ define <4 x i64> @vor_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1033,8 +1033,8 @@ define <4 x i64> @vor_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl)
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1101,8 +1101,8 @@ define <8 x i64> @vor_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1127,8 +1127,8 @@ define <8 x i64> @vor_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl)
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1195,8 +1195,8 @@ define <16 x i64> @vor_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroe
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1221,8 +1221,8 @@ define <16 x i64> @vor_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %e
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll
index 2913cbdf0fffd2..a31405f75e8a4e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll
@@ -187,8 +187,8 @@ define <1 x i64> @vp_splat_v1i64(i64 %val, <1 x i1> %m, i32 zeroext %evl) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
@@ -209,8 +209,8 @@ define <2 x i64> @vp_splat_v2i64(i64 %val, <2 x i1> %m, i32 zeroext %evl) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
@@ -231,8 +231,8 @@ define <4 x i64> @vp_splat_v4i64(i64 %val, <4 x i1> %m, i32 zeroext %evl) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
@@ -253,8 +253,8 @@ define <8 x i64> @vp_splat_v8i64(i64 %val, <8 x i1> %m, i32 zeroext %evl) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index bdf76dc63ddd85..df1c84a9e05d81 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -566,8 +566,8 @@ define <2 x i64> @vpmerge_vx_v2i64(i64 %a, <2 x i64> %vb, <2 x i1> %m, i32 zeroe
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -586,8 +586,8 @@ define <2 x i64> @vpmerge_vx_v2i64(i64 %a, <2 x i64> %vb, <2 x i1> %m, i32 zeroe
; RV32ZVFHMIN: # %bb.0:
; RV32ZVFHMIN-NEXT: addi sp, sp, -16
; RV32ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; RV32ZVFHMIN-NEXT: sw a1, 12(sp)
; RV32ZVFHMIN-NEXT: sw a0, 8(sp)
+; RV32ZVFHMIN-NEXT: sw a1, 12(sp)
; RV32ZVFHMIN-NEXT: addi a0, sp, 8
; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32ZVFHMIN-NEXT: vlse64.v v9, (a0), zero
@@ -635,8 +635,8 @@ define <4 x i64> @vpmerge_vx_v4i64(i64 %a, <4 x i64> %vb, <4 x i1> %m, i32 zeroe
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -655,8 +655,8 @@ define <4 x i64> @vpmerge_vx_v4i64(i64 %a, <4 x i64> %vb, <4 x i1> %m, i32 zeroe
; RV32ZVFHMIN: # %bb.0:
; RV32ZVFHMIN-NEXT: addi sp, sp, -16
; RV32ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; RV32ZVFHMIN-NEXT: sw a1, 12(sp)
; RV32ZVFHMIN-NEXT: sw a0, 8(sp)
+; RV32ZVFHMIN-NEXT: sw a1, 12(sp)
; RV32ZVFHMIN-NEXT: addi a0, sp, 8
; RV32ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32ZVFHMIN-NEXT: vlse64.v v10, (a0), zero
@@ -704,8 +704,8 @@ define <8 x i64> @vpmerge_vx_v8i64(i64 %a, <8 x i64> %vb, <8 x i1> %m, i32 zeroe
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -724,8 +724,8 @@ define <8 x i64> @vpmerge_vx_v8i64(i64 %a, <8 x i64> %vb, <8 x i1> %m, i32 zeroe
; RV32ZVFHMIN: # %bb.0:
; RV32ZVFHMIN-NEXT: addi sp, sp, -16
; RV32ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; RV32ZVFHMIN-NEXT: sw a1, 12(sp)
; RV32ZVFHMIN-NEXT: sw a0, 8(sp)
+; RV32ZVFHMIN-NEXT: sw a1, 12(sp)
; RV32ZVFHMIN-NEXT: addi a0, sp, 8
; RV32ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32ZVFHMIN-NEXT: vlse64.v v12, (a0), zero
@@ -773,8 +773,8 @@ define <16 x i64> @vpmerge_vx_v16i64(i64 %a, <16 x i64> %vb, <16 x i1> %m, i32 z
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -793,8 +793,8 @@ define <16 x i64> @vpmerge_vx_v16i64(i64 %a, <16 x i64> %vb, <16 x i1> %m, i32 z
; RV32ZVFHMIN: # %bb.0:
; RV32ZVFHMIN-NEXT: addi sp, sp, -16
; RV32ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
-; RV32ZVFHMIN-NEXT: sw a1, 12(sp)
; RV32ZVFHMIN-NEXT: sw a0, 8(sp)
+; RV32ZVFHMIN-NEXT: sw a1, 12(sp)
; RV32ZVFHMIN-NEXT: addi a0, sp, 8
; RV32ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32ZVFHMIN-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll
index aa76324f3804f1..2ce6df5ce197ea 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll
@@ -611,8 +611,8 @@ define <2 x i64> @vrem_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -637,8 +637,8 @@ define <2 x i64> @vrem_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -685,8 +685,8 @@ define <4 x i64> @vrem_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -711,8 +711,8 @@ define <4 x i64> @vrem_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -759,8 +759,8 @@ define <8 x i64> @vrem_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -785,8 +785,8 @@ define <8 x i64> @vrem_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -833,8 +833,8 @@ define <16 x i64> @vrem_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -859,8 +859,8 @@ define <16 x i64> @vrem_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll
index 24fa9357f91660..08ae37e13caaea 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll
@@ -610,8 +610,8 @@ define <2 x i64> @vremu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -636,8 +636,8 @@ define <2 x i64> @vremu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -684,8 +684,8 @@ define <4 x i64> @vremu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -710,8 +710,8 @@ define <4 x i64> @vremu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -758,8 +758,8 @@ define <8 x i64> @vremu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -784,8 +784,8 @@ define <8 x i64> @vremu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -832,8 +832,8 @@ define <16 x i64> @vremu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zer
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -858,8 +858,8 @@ define <16 x i64> @vremu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll
index 563482b88e8bdb..bc2e0e5833f5cb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll
@@ -563,8 +563,8 @@ define <2 x i64> @vrsub_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -589,8 +589,8 @@ define <2 x i64> @vrsub_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -637,8 +637,8 @@ define <4 x i64> @vrsub_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -663,8 +663,8 @@ define <4 x i64> @vrsub_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -711,8 +711,8 @@ define <8 x i64> @vrsub_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -737,8 +737,8 @@ define <8 x i64> @vrsub_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -785,8 +785,8 @@ define <16 x i64> @vrsub_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zer
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -811,8 +811,8 @@ define <16 x i64> @vrsub_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll
index 5030fda9dea331..056a7f44c9da2d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll
@@ -1005,8 +1005,8 @@ define <2 x i64> @vsadd_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1031,8 +1031,8 @@ define <2 x i64> @vsadd_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1099,8 +1099,8 @@ define <4 x i64> @vsadd_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1125,8 +1125,8 @@ define <4 x i64> @vsadd_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1193,8 +1193,8 @@ define <8 x i64> @vsadd_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1219,8 +1219,8 @@ define <8 x i64> @vsadd_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1287,8 +1287,8 @@ define <16 x i64> @vsadd_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zer
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1313,8 +1313,8 @@ define <16 x i64> @vsadd_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll
index 741699289e0271..f69675b4a24b01 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll
@@ -441,8 +441,8 @@ define <2 x i64> @sadd_v2i64_vx(<2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -488,8 +488,8 @@ define <4 x i64> @sadd_v4i64_vx(<4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -535,8 +535,8 @@ define <8 x i64> @sadd_v8i64_vx(<8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -582,8 +582,8 @@ define <16 x i64> @sadd_v16i64_vx(<16 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll
index 562399ea33e7a8..b79ec290e3d3e1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll
@@ -1001,8 +1001,8 @@ define <2 x i64> @vsaddu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroex
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1027,8 +1027,8 @@ define <2 x i64> @vsaddu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %e
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1095,8 +1095,8 @@ define <4 x i64> @vsaddu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroex
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1121,8 +1121,8 @@ define <4 x i64> @vsaddu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %e
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1189,8 +1189,8 @@ define <8 x i64> @vsaddu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1215,8 +1215,8 @@ define <8 x i64> @vsaddu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %e
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1283,8 +1283,8 @@ define <16 x i64> @vsaddu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 ze
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1309,8 +1309,8 @@ define <16 x i64> @vsaddu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll
index 7b2cab294aa496..7397147a7580c1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll
@@ -441,8 +441,8 @@ define <2 x i64> @uadd_v2i64_vx(<2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -488,8 +488,8 @@ define <4 x i64> @uadd_v4i64_vx(<4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -535,8 +535,8 @@ define <8 x i64> @uadd_v8i64_vx(<8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -582,8 +582,8 @@ define <16 x i64> @uadd_v16i64_vx(<16 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll
index 549c6ca11e320e..c989736378479e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll
@@ -1037,8 +1037,8 @@ define <2 x i64> @vssub_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1063,8 +1063,8 @@ define <2 x i64> @vssub_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1133,8 +1133,8 @@ define <4 x i64> @vssub_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1159,8 +1159,8 @@ define <4 x i64> @vssub_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1229,8 +1229,8 @@ define <8 x i64> @vssub_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1255,8 +1255,8 @@ define <8 x i64> @vssub_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1325,8 +1325,8 @@ define <16 x i64> @vssub_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zer
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1351,8 +1351,8 @@ define <16 x i64> @vssub_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll
index efe28eb9021ce2..e1746a31847da5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll
@@ -441,8 +441,8 @@ define <2 x i64> @ssub_v2i64_vx(<2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -489,8 +489,8 @@ define <4 x i64> @ssub_v4i64_vx(<4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -537,8 +537,8 @@ define <8 x i64> @ssub_v8i64_vx(<8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -585,8 +585,8 @@ define <16 x i64> @ssub_v16i64_vx(<16 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll
index 683f1150310b39..1fc2a18631001b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll
@@ -1032,8 +1032,8 @@ define <2 x i64> @vssubu_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroex
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1058,8 +1058,8 @@ define <2 x i64> @vssubu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %e
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1128,8 +1128,8 @@ define <4 x i64> @vssubu_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroex
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1154,8 +1154,8 @@ define <4 x i64> @vssubu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %e
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1224,8 +1224,8 @@ define <8 x i64> @vssubu_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1250,8 +1250,8 @@ define <8 x i64> @vssubu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %e
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1320,8 +1320,8 @@ define <16 x i64> @vssubu_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 ze
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1346,8 +1346,8 @@ define <16 x i64> @vssubu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll
index dc9279f6e7fa09..28c29d2960b268 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll
@@ -441,8 +441,8 @@ define <2 x i64> @usub_v2i64_vx(<2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -489,8 +489,8 @@ define <4 x i64> @usub_v4i64_vx(<4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -537,8 +537,8 @@ define <8 x i64> @usub_v8i64_vx(<8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -585,8 +585,8 @@ define <16 x i64> @usub_v16i64_vx(<16 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll
index 6052c9ee20fe10..7f1ad26bcda7a3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll
@@ -641,8 +641,8 @@ define <2 x i64> @vsub_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -667,8 +667,8 @@ define <2 x i64> @vsub_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -715,8 +715,8 @@ define <4 x i64> @vsub_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -741,8 +741,8 @@ define <4 x i64> @vsub_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -789,8 +789,8 @@ define <8 x i64> @vsub_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -815,8 +815,8 @@ define <8 x i64> @vsub_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -863,8 +863,8 @@ define <16 x i64> @vsub_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -889,8 +889,8 @@ define <16 x i64> @vsub_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll
index 3a222e95566a48..50184796b38f53 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll
@@ -824,12 +824,12 @@ define <2 x i64> @vwadd_vx_v2i64_i64(ptr %x, ptr %y) nounwind {
; RV32-LABEL: vwadd_vx_v2i64_i64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: lw a2, 4(a1)
-; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: lw a2, 0(a1)
+; RV32-NEXT: lw a1, 4(a1)
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vle32.v v9, (a0)
-; RV32-NEXT: sw a2, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vlse64.v v8, (a0), zero
; RV32-NEXT: vwadd.wv v8, v8, v9
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
index 1fc6af2d4cc1c6..98f246b8741dcc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
@@ -773,8 +773,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i8(ptr %x, ptr %y) nounwind {
; RV32-NEXT: lbu a1, 0(a1)
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vle32.v v9, (a0)
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vlse64.v v8, (a0), zero
; RV32-NEXT: vwaddu.wv v8, v8, v9
@@ -805,8 +805,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i16(ptr %x, ptr %y) nounwind {
; RV32-NEXT: lhu a1, 0(a1)
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vle32.v v9, (a0)
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vlse64.v v8, (a0), zero
; RV32-NEXT: vwaddu.wv v8, v8, v9
@@ -837,8 +837,8 @@ define <2 x i64> @vwaddu_vx_v2i64_i32(ptr %x, ptr %y) nounwind {
; RV32-NEXT: lw a1, 0(a1)
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vle32.v v9, (a0)
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vlse64.v v8, (a0), zero
; RV32-NEXT: vwaddu.wv v8, v8, v9
@@ -866,12 +866,12 @@ define <2 x i64> @vwaddu_vx_v2i64_i64(ptr %x, ptr %y) nounwind {
; RV32-LABEL: vwaddu_vx_v2i64_i64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: lw a2, 4(a1)
-; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: lw a2, 0(a1)
+; RV32-NEXT: lw a1, 4(a1)
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vle32.v v9, (a0)
-; RV32-NEXT: sw a2, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vlse64.v v8, (a0), zero
; RV32-NEXT: vwaddu.wv v8, v8, v9
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
index 97c7f101c25827..01f2fe506e85f7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
@@ -853,12 +853,12 @@ define <2 x i64> @vwmul_vx_v2i64_i64(ptr %x, ptr %y) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: lw a2, 4(a1)
-; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: lw a2, 0(a1)
+; RV32-NEXT: lw a1, 4(a1)
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: sw a2, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vlse64.v v9, (a0), zero
; RV32-NEXT: vsext.vf2 v10, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll
index c73b3a0dce6be4..db2f544ab30669 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll
@@ -794,8 +794,8 @@ define <2 x i64> @vwmulsu_vx_v2i64_i8(ptr %x, ptr %y) {
; RV32-NEXT: lbu a1, 0(a1)
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vlse64.v v9, (a0), zero
; RV32-NEXT: vsext.vf2 v10, v8
@@ -828,8 +828,8 @@ define <2 x i64> @vwmulsu_vx_v2i64_i16(ptr %x, ptr %y) {
; RV32-NEXT: lhu a1, 0(a1)
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vlse64.v v9, (a0), zero
; RV32-NEXT: vsext.vf2 v10, v8
@@ -862,8 +862,8 @@ define <2 x i64> @vwmulsu_vx_v2i64_i32(ptr %x, ptr %y) {
; RV32-NEXT: lw a1, 0(a1)
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vlse64.v v9, (a0), zero
; RV32-NEXT: vsext.vf2 v10, v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll
index d632dc4c2a30d2..7a925165d98163 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll
@@ -830,12 +830,12 @@ define <2 x i64> @vwsub_vx_v2i64_i64(ptr %x, ptr %y) nounwind {
; RV32-LABEL: vwsub_vx_v2i64_i64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: lw a2, 4(a1)
-; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: lw a2, 0(a1)
+; RV32-NEXT: lw a1, 4(a1)
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vle32.v v9, (a0)
-; RV32-NEXT: sw a2, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vlse64.v v8, (a0), zero
; RV32-NEXT: vwsub.wv v8, v8, v9
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
index 9b5f4a5012f4ed..4c08a8c15a388e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
@@ -776,8 +776,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i8(ptr %x, ptr %y) nounwind {
; RV32-NEXT: lbu a1, 0(a1)
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vle32.v v9, (a0)
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vlse64.v v8, (a0), zero
; RV32-NEXT: vwsubu.wv v8, v8, v9
@@ -809,8 +809,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i16(ptr %x, ptr %y) nounwind {
; RV32-NEXT: lhu a1, 0(a1)
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vle32.v v9, (a0)
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vlse64.v v8, (a0), zero
; RV32-NEXT: vwsubu.wv v8, v8, v9
@@ -842,8 +842,8 @@ define <2 x i64> @vwsubu_vx_v2i64_i32(ptr %x, ptr %y) nounwind {
; RV32-NEXT: lw a1, 0(a1)
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vle32.v v9, (a0)
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vlse64.v v8, (a0), zero
; RV32-NEXT: vwsubu.wv v8, v8, v9
@@ -872,12 +872,12 @@ define <2 x i64> @vwsubu_vx_v2i64_i64(ptr %x, ptr %y) nounwind {
; RV32-LABEL: vwsubu_vx_v2i64_i64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: lw a2, 4(a1)
-; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: lw a2, 0(a1)
+; RV32-NEXT: lw a1, 4(a1)
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vle32.v v9, (a0)
-; RV32-NEXT: sw a2, 12(sp)
-; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a2, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vlse64.v v8, (a0), zero
; RV32-NEXT: vwsubu.wv v8, v8, v9
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll
index 16487a0784125e..677b4ea6e91eca 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll
@@ -1173,8 +1173,8 @@ define <2 x i64> @vxor_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1199,8 +1199,8 @@ define <2 x i64> @vxor_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1287,8 +1287,8 @@ define <4 x i64> @vxor_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1313,8 +1313,8 @@ define <4 x i64> @vxor_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1401,8 +1401,8 @@ define <8 x i64> @vxor_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1427,8 +1427,8 @@ define <8 x i64> @vxor_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1515,8 +1515,8 @@ define <16 x i64> @vxor_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zero
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1541,8 +1541,8 @@ define <16 x i64> @vxor_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index 5a880105f68379..5a1f7f54305846 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -154,10 +154,10 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) {
; CHECK-NOV-NEXT: .LBB3_8: # %entry
; CHECK-NOV-NEXT: lui a1, 524288
; CHECK-NOV-NEXT: .LBB3_9: # %entry
-; CHECK-NOV-NEXT: sw a1, 12(a0)
-; CHECK-NOV-NEXT: sw a2, 8(a0)
-; CHECK-NOV-NEXT: sw a4, 4(a0)
; CHECK-NOV-NEXT: sw a5, 0(a0)
+; CHECK-NOV-NEXT: sw a4, 4(a0)
+; CHECK-NOV-NEXT: sw a2, 8(a0)
+; CHECK-NOV-NEXT: sw a1, 12(a0)
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB3_10: # %entry
; CHECK-NOV-NEXT: mv a1, a6
@@ -219,10 +219,10 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) {
; CHECK-NOV-NEXT: .LBB4_4: # %entry
; CHECK-NOV-NEXT: mv a5, a3
; CHECK-NOV-NEXT: .LBB4_5: # %entry
-; CHECK-NOV-NEXT: sw a5, 12(a0)
-; CHECK-NOV-NEXT: sw a4, 8(a0)
-; CHECK-NOV-NEXT: sw a2, 4(a0)
; CHECK-NOV-NEXT: sw a1, 0(a0)
+; CHECK-NOV-NEXT: sw a2, 4(a0)
+; CHECK-NOV-NEXT: sw a4, 8(a0)
+; CHECK-NOV-NEXT: sw a5, 12(a0)
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB4_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
@@ -282,10 +282,10 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) {
; CHECK-NOV-NEXT: and a2, a6, a2
; CHECK-NOV-NEXT: negw a4, a4
; CHECK-NOV-NEXT: and a1, a4, a1
-; CHECK-NOV-NEXT: sw a1, 12(a0)
-; CHECK-NOV-NEXT: sw a2, 8(a0)
-; CHECK-NOV-NEXT: sw a3, 4(a0)
; CHECK-NOV-NEXT: sw a5, 0(a0)
+; CHECK-NOV-NEXT: sw a3, 4(a0)
+; CHECK-NOV-NEXT: sw a2, 8(a0)
+; CHECK-NOV-NEXT: sw a1, 12(a0)
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB5_6: # %entry
; CHECK-NOV-NEXT: mv a1, a4
@@ -380,10 +380,10 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB6_8: # %entry
; CHECK-NOV-NEXT: lui a0, 524288
; CHECK-NOV-NEXT: .LBB6_9: # %entry
-; CHECK-NOV-NEXT: sw a0, 12(s0)
-; CHECK-NOV-NEXT: sw s1, 8(s0)
-; CHECK-NOV-NEXT: sw a2, 4(s0)
; CHECK-NOV-NEXT: sw a3, 0(s0)
+; CHECK-NOV-NEXT: sw a2, 4(s0)
+; CHECK-NOV-NEXT: sw s1, 8(s0)
+; CHECK-NOV-NEXT: sw a0, 12(s0)
; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
@@ -555,10 +555,10 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB7_4: # %entry
; CHECK-NOV-NEXT: mv a3, a1
; CHECK-NOV-NEXT: .LBB7_5: # %entry
-; CHECK-NOV-NEXT: sw a3, 12(s0)
-; CHECK-NOV-NEXT: sw a2, 8(s0)
-; CHECK-NOV-NEXT: sw s1, 4(s0)
; CHECK-NOV-NEXT: sw a0, 0(s0)
+; CHECK-NOV-NEXT: sw s1, 4(s0)
+; CHECK-NOV-NEXT: sw a2, 8(s0)
+; CHECK-NOV-NEXT: sw a3, 12(s0)
; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
@@ -728,10 +728,10 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: and a4, a4, s1
; CHECK-NOV-NEXT: negw a2, a2
; CHECK-NOV-NEXT: and a0, a2, a0
-; CHECK-NOV-NEXT: sw a0, 12(s0)
-; CHECK-NOV-NEXT: sw a4, 8(s0)
-; CHECK-NOV-NEXT: sw a1, 4(s0)
; CHECK-NOV-NEXT: sw a3, 0(s0)
+; CHECK-NOV-NEXT: sw a1, 4(s0)
+; CHECK-NOV-NEXT: sw a4, 8(s0)
+; CHECK-NOV-NEXT: sw a0, 12(s0)
; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
@@ -992,10 +992,10 @@ define <4 x i16> @stest_f32i16(<4 x float> %x) {
; CHECK-NOV-NEXT: .LBB12_8: # %entry
; CHECK-NOV-NEXT: lui a1, 1048568
; CHECK-NOV-NEXT: .LBB12_9: # %entry
-; CHECK-NOV-NEXT: sh a1, 6(a0)
-; CHECK-NOV-NEXT: sh a2, 4(a0)
-; CHECK-NOV-NEXT: sh a3, 2(a0)
; CHECK-NOV-NEXT: sh a4, 0(a0)
+; CHECK-NOV-NEXT: sh a3, 2(a0)
+; CHECK-NOV-NEXT: sh a2, 4(a0)
+; CHECK-NOV-NEXT: sh a1, 6(a0)
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB12_10: # %entry
; CHECK-NOV-NEXT: mv a1, a5
@@ -1059,10 +1059,10 @@ define <4 x i16> @utest_f32i16(<4 x float> %x) {
; CHECK-NOV-NEXT: .LBB13_4: # %entry
; CHECK-NOV-NEXT: mv a5, a3
; CHECK-NOV-NEXT: .LBB13_5: # %entry
-; CHECK-NOV-NEXT: sh a5, 6(a0)
-; CHECK-NOV-NEXT: sh a4, 4(a0)
-; CHECK-NOV-NEXT: sh a2, 2(a0)
; CHECK-NOV-NEXT: sh a1, 0(a0)
+; CHECK-NOV-NEXT: sh a2, 2(a0)
+; CHECK-NOV-NEXT: sh a4, 4(a0)
+; CHECK-NOV-NEXT: sh a5, 6(a0)
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB13_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
@@ -1123,10 +1123,10 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) {
; CHECK-NOV-NEXT: and a2, a6, a2
; CHECK-NOV-NEXT: negw a4, a4
; CHECK-NOV-NEXT: and a1, a4, a1
-; CHECK-NOV-NEXT: sh a1, 6(a0)
-; CHECK-NOV-NEXT: sh a2, 4(a0)
-; CHECK-NOV-NEXT: sh a3, 2(a0)
; CHECK-NOV-NEXT: sh a5, 0(a0)
+; CHECK-NOV-NEXT: sh a3, 2(a0)
+; CHECK-NOV-NEXT: sh a2, 4(a0)
+; CHECK-NOV-NEXT: sh a1, 6(a0)
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB14_6: # %entry
; CHECK-NOV-NEXT: mv a1, a4
@@ -1237,14 +1237,14 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
; CHECK-NOV-NEXT: bge s1, a7, .LBB15_19
; CHECK-NOV-NEXT: .LBB15_2: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
+; CHECK-NOV-NEXT: fcvt.l.s a3, fs4, rtz
; CHECK-NOV-NEXT: bge a1, a7, .LBB15_20
; CHECK-NOV-NEXT: .LBB15_3: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz
-; CHECK-NOV-NEXT: bge a2, a7, .LBB15_21
+; CHECK-NOV-NEXT: fcvt.l.s a2, fs3, rtz
+; CHECK-NOV-NEXT: bge a3, a7, .LBB15_21
; CHECK-NOV-NEXT: .LBB15_4: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz
-; CHECK-NOV-NEXT: bge a3, a7, .LBB15_22
+; CHECK-NOV-NEXT: bge a2, a7, .LBB15_22
; CHECK-NOV-NEXT: .LBB15_5: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz
; CHECK-NOV-NEXT: bge a4, a7, .LBB15_23
@@ -1261,9 +1261,9 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB15_10: # %entry
; CHECK-NOV-NEXT: bge a7, a4, .LBB15_28
; CHECK-NOV-NEXT: .LBB15_11: # %entry
-; CHECK-NOV-NEXT: bge a7, a3, .LBB15_29
+; CHECK-NOV-NEXT: bge a7, a2, .LBB15_29
; CHECK-NOV-NEXT: .LBB15_12: # %entry
-; CHECK-NOV-NEXT: bge a7, a2, .LBB15_30
+; CHECK-NOV-NEXT: bge a7, a3, .LBB15_30
; CHECK-NOV-NEXT: .LBB15_13: # %entry
; CHECK-NOV-NEXT: bge a7, a1, .LBB15_31
; CHECK-NOV-NEXT: .LBB15_14: # %entry
@@ -1273,14 +1273,14 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB15_16: # %entry
; CHECK-NOV-NEXT: lui a0, 1048568
; CHECK-NOV-NEXT: .LBB15_17: # %entry
-; CHECK-NOV-NEXT: sh a0, 14(s0)
-; CHECK-NOV-NEXT: sh s1, 12(s0)
+; CHECK-NOV-NEXT: sh a3, 8(s0)
; CHECK-NOV-NEXT: sh a1, 10(s0)
-; CHECK-NOV-NEXT: sh a2, 8(s0)
-; CHECK-NOV-NEXT: sh a3, 6(s0)
-; CHECK-NOV-NEXT: sh a4, 4(s0)
-; CHECK-NOV-NEXT: sh a5, 2(s0)
+; CHECK-NOV-NEXT: sh s1, 12(s0)
+; CHECK-NOV-NEXT: sh a0, 14(s0)
; CHECK-NOV-NEXT: sh a6, 0(s0)
+; CHECK-NOV-NEXT: sh a5, 2(s0)
+; CHECK-NOV-NEXT: sh a4, 4(s0)
+; CHECK-NOV-NEXT: sh a2, 6(s0)
; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
@@ -1305,18 +1305,18 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: blt s1, a7, .LBB15_2
; CHECK-NOV-NEXT: .LBB15_19: # %entry
; CHECK-NOV-NEXT: mv s1, a7
-; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
+; CHECK-NOV-NEXT: fcvt.l.s a3, fs4, rtz
; CHECK-NOV-NEXT: blt a1, a7, .LBB15_3
; CHECK-NOV-NEXT: .LBB15_20: # %entry
; CHECK-NOV-NEXT: mv a1, a7
-; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz
-; CHECK-NOV-NEXT: blt a2, a7, .LBB15_4
+; CHECK-NOV-NEXT: fcvt.l.s a2, fs3, rtz
+; CHECK-NOV-NEXT: blt a3, a7, .LBB15_4
; CHECK-NOV-NEXT: .LBB15_21: # %entry
-; CHECK-NOV-NEXT: mv a2, a7
+; CHECK-NOV-NEXT: mv a3, a7
; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz
-; CHECK-NOV-NEXT: blt a3, a7, .LBB15_5
+; CHECK-NOV-NEXT: blt a2, a7, .LBB15_5
; CHECK-NOV-NEXT: .LBB15_22: # %entry
-; CHECK-NOV-NEXT: mv a3, a7
+; CHECK-NOV-NEXT: mv a2, a7
; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz
; CHECK-NOV-NEXT: blt a4, a7, .LBB15_6
; CHECK-NOV-NEXT: .LBB15_23: # %entry
@@ -1338,12 +1338,12 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: blt a7, a4, .LBB15_11
; CHECK-NOV-NEXT: .LBB15_28: # %entry
; CHECK-NOV-NEXT: lui a4, 1048568
-; CHECK-NOV-NEXT: blt a7, a3, .LBB15_12
+; CHECK-NOV-NEXT: blt a7, a2, .LBB15_12
; CHECK-NOV-NEXT: .LBB15_29: # %entry
-; CHECK-NOV-NEXT: lui a3, 1048568
-; CHECK-NOV-NEXT: blt a7, a2, .LBB15_13
-; CHECK-NOV-NEXT: .LBB15_30: # %entry
; CHECK-NOV-NEXT: lui a2, 1048568
+; CHECK-NOV-NEXT: blt a7, a3, .LBB15_13
+; CHECK-NOV-NEXT: .LBB15_30: # %entry
+; CHECK-NOV-NEXT: lui a3, 1048568
; CHECK-NOV-NEXT: blt a7, a1, .LBB15_14
; CHECK-NOV-NEXT: .LBB15_31: # %entry
; CHECK-NOV-NEXT: lui a1, 1048568
@@ -1585,40 +1585,40 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: fcvt.lu.s s1, fs6, rtz
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-NOV-NEXT: lui a1, 16
-; CHECK-NOV-NEXT: addiw a1, a1, -1
-; CHECK-NOV-NEXT: bgeu a0, a1, .LBB16_10
+; CHECK-NOV-NEXT: lui a3, 16
+; CHECK-NOV-NEXT: addiw a3, a3, -1
+; CHECK-NOV-NEXT: bgeu a0, a3, .LBB16_10
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: fcvt.lu.s a2, fs5, rtz
-; CHECK-NOV-NEXT: bgeu s1, a1, .LBB16_11
+; CHECK-NOV-NEXT: fcvt.lu.s a1, fs5, rtz
+; CHECK-NOV-NEXT: bgeu s1, a3, .LBB16_11
; CHECK-NOV-NEXT: .LBB16_2: # %entry
-; CHECK-NOV-NEXT: fcvt.lu.s a3, fs4, rtz
-; CHECK-NOV-NEXT: bgeu a2, a1, .LBB16_12
+; CHECK-NOV-NEXT: fcvt.lu.s a2, fs4, rtz
+; CHECK-NOV-NEXT: bgeu a1, a3, .LBB16_12
; CHECK-NOV-NEXT: .LBB16_3: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a4, fs3, rtz
-; CHECK-NOV-NEXT: bgeu a3, a1, .LBB16_13
+; CHECK-NOV-NEXT: bgeu a2, a3, .LBB16_13
; CHECK-NOV-NEXT: .LBB16_4: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a5, fs2, rtz
-; CHECK-NOV-NEXT: bgeu a4, a1, .LBB16_14
+; CHECK-NOV-NEXT: bgeu a4, a3, .LBB16_14
; CHECK-NOV-NEXT: .LBB16_5: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a6, fs1, rtz
-; CHECK-NOV-NEXT: bgeu a5, a1, .LBB16_15
+; CHECK-NOV-NEXT: bgeu a5, a3, .LBB16_15
; CHECK-NOV-NEXT: .LBB16_6: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a7, fs0, rtz
-; CHECK-NOV-NEXT: bgeu a6, a1, .LBB16_16
+; CHECK-NOV-NEXT: bgeu a6, a3, .LBB16_16
; CHECK-NOV-NEXT: .LBB16_7: # %entry
-; CHECK-NOV-NEXT: bltu a7, a1, .LBB16_9
+; CHECK-NOV-NEXT: bltu a7, a3, .LBB16_9
; CHECK-NOV-NEXT: .LBB16_8: # %entry
-; CHECK-NOV-NEXT: mv a7, a1
+; CHECK-NOV-NEXT: mv a7, a3
; CHECK-NOV-NEXT: .LBB16_9: # %entry
-; CHECK-NOV-NEXT: sh a7, 14(s0)
-; CHECK-NOV-NEXT: sh a6, 12(s0)
-; CHECK-NOV-NEXT: sh a5, 10(s0)
; CHECK-NOV-NEXT: sh a4, 8(s0)
-; CHECK-NOV-NEXT: sh a3, 6(s0)
-; CHECK-NOV-NEXT: sh a2, 4(s0)
-; CHECK-NOV-NEXT: sh s1, 2(s0)
+; CHECK-NOV-NEXT: sh a5, 10(s0)
+; CHECK-NOV-NEXT: sh a6, 12(s0)
+; CHECK-NOV-NEXT: sh a7, 14(s0)
; CHECK-NOV-NEXT: sh a0, 0(s0)
+; CHECK-NOV-NEXT: sh s1, 2(s0)
+; CHECK-NOV-NEXT: sh a1, 4(s0)
+; CHECK-NOV-NEXT: sh a2, 6(s0)
; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
@@ -1638,32 +1638,32 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: addi sp, sp, 128
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB16_10: # %entry
-; CHECK-NOV-NEXT: mv a0, a1
-; CHECK-NOV-NEXT: fcvt.lu.s a2, fs5, rtz
-; CHECK-NOV-NEXT: bltu s1, a1, .LBB16_2
+; CHECK-NOV-NEXT: mv a0, a3
+; CHECK-NOV-NEXT: fcvt.lu.s a1, fs5, rtz
+; CHECK-NOV-NEXT: bltu s1, a3, .LBB16_2
; CHECK-NOV-NEXT: .LBB16_11: # %entry
-; CHECK-NOV-NEXT: mv s1, a1
-; CHECK-NOV-NEXT: fcvt.lu.s a3, fs4, rtz
-; CHECK-NOV-NEXT: bltu a2, a1, .LBB16_3
+; CHECK-NOV-NEXT: mv s1, a3
+; CHECK-NOV-NEXT: fcvt.lu.s a2, fs4, rtz
+; CHECK-NOV-NEXT: bltu a1, a3, .LBB16_3
; CHECK-NOV-NEXT: .LBB16_12: # %entry
-; CHECK-NOV-NEXT: mv a2, a1
+; CHECK-NOV-NEXT: mv a1, a3
; CHECK-NOV-NEXT: fcvt.lu.s a4, fs3, rtz
-; CHECK-NOV-NEXT: bltu a3, a1, .LBB16_4
+; CHECK-NOV-NEXT: bltu a2, a3, .LBB16_4
; CHECK-NOV-NEXT: .LBB16_13: # %entry
-; CHECK-NOV-NEXT: mv a3, a1
+; CHECK-NOV-NEXT: mv a2, a3
; CHECK-NOV-NEXT: fcvt.lu.s a5, fs2, rtz
-; CHECK-NOV-NEXT: bltu a4, a1, .LBB16_5
+; CHECK-NOV-NEXT: bltu a4, a3, .LBB16_5
; CHECK-NOV-NEXT: .LBB16_14: # %entry
-; CHECK-NOV-NEXT: mv a4, a1
+; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: fcvt.lu.s a6, fs1, rtz
-; CHECK-NOV-NEXT: bltu a5, a1, .LBB16_6
+; CHECK-NOV-NEXT: bltu a5, a3, .LBB16_6
; CHECK-NOV-NEXT: .LBB16_15: # %entry
-; CHECK-NOV-NEXT: mv a5, a1
+; CHECK-NOV-NEXT: mv a5, a3
; CHECK-NOV-NEXT: fcvt.lu.s a7, fs0, rtz
-; CHECK-NOV-NEXT: bltu a6, a1, .LBB16_7
+; CHECK-NOV-NEXT: bltu a6, a3, .LBB16_7
; CHECK-NOV-NEXT: .LBB16_16: # %entry
-; CHECK-NOV-NEXT: mv a6, a1
-; CHECK-NOV-NEXT: bgeu a7, a1, .LBB16_8
+; CHECK-NOV-NEXT: mv a6, a3
+; CHECK-NOV-NEXT: bgeu a7, a3, .LBB16_8
; CHECK-NOV-NEXT: j .LBB16_9
;
; CHECK-V-LABEL: utesth_f16i16:
@@ -1946,14 +1946,14 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: and t0, t0, s1
; CHECK-NOV-NEXT: negw a3, a3
; CHECK-NOV-NEXT: and a0, a3, a0
-; CHECK-NOV-NEXT: sh a0, 14(s0)
-; CHECK-NOV-NEXT: sh t0, 12(s0)
-; CHECK-NOV-NEXT: sh a1, 10(s0)
; CHECK-NOV-NEXT: sh a2, 8(s0)
-; CHECK-NOV-NEXT: sh a4, 6(s0)
-; CHECK-NOV-NEXT: sh a5, 4(s0)
-; CHECK-NOV-NEXT: sh a6, 2(s0)
+; CHECK-NOV-NEXT: sh a1, 10(s0)
+; CHECK-NOV-NEXT: sh t0, 12(s0)
+; CHECK-NOV-NEXT: sh a0, 14(s0)
; CHECK-NOV-NEXT: sh a7, 0(s0)
+; CHECK-NOV-NEXT: sh a6, 2(s0)
+; CHECK-NOV-NEXT: sh a5, 4(s0)
+; CHECK-NOV-NEXT: sh a4, 6(s0)
; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
@@ -3488,10 +3488,10 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: .LBB30_8: # %entry
; CHECK-NOV-NEXT: lui a1, 524288
; CHECK-NOV-NEXT: .LBB30_9: # %entry
-; CHECK-NOV-NEXT: sw a1, 12(a0)
-; CHECK-NOV-NEXT: sw a2, 8(a0)
-; CHECK-NOV-NEXT: sw a4, 4(a0)
; CHECK-NOV-NEXT: sw a5, 0(a0)
+; CHECK-NOV-NEXT: sw a4, 4(a0)
+; CHECK-NOV-NEXT: sw a2, 8(a0)
+; CHECK-NOV-NEXT: sw a1, 12(a0)
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB30_10: # %entry
; CHECK-NOV-NEXT: mv a1, a6
@@ -3551,10 +3551,10 @@ define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: .LBB31_4: # %entry
; CHECK-NOV-NEXT: mv a5, a3
; CHECK-NOV-NEXT: .LBB31_5: # %entry
-; CHECK-NOV-NEXT: sw a5, 12(a0)
-; CHECK-NOV-NEXT: sw a4, 8(a0)
-; CHECK-NOV-NEXT: sw a2, 4(a0)
; CHECK-NOV-NEXT: sw a1, 0(a0)
+; CHECK-NOV-NEXT: sw a2, 4(a0)
+; CHECK-NOV-NEXT: sw a4, 8(a0)
+; CHECK-NOV-NEXT: sw a5, 12(a0)
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB31_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
@@ -3613,10 +3613,10 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: sgtz a5, a1
; CHECK-NOV-NEXT: negw a5, a5
; CHECK-NOV-NEXT: and a1, a5, a1
-; CHECK-NOV-NEXT: sw a1, 12(a0)
-; CHECK-NOV-NEXT: sw a2, 8(a0)
-; CHECK-NOV-NEXT: sw a4, 4(a0)
; CHECK-NOV-NEXT: sw a3, 0(a0)
+; CHECK-NOV-NEXT: sw a4, 4(a0)
+; CHECK-NOV-NEXT: sw a2, 8(a0)
+; CHECK-NOV-NEXT: sw a1, 12(a0)
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB32_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
@@ -3709,10 +3709,10 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB33_8: # %entry
; CHECK-NOV-NEXT: lui a0, 524288
; CHECK-NOV-NEXT: .LBB33_9: # %entry
-; CHECK-NOV-NEXT: sw a0, 12(s0)
-; CHECK-NOV-NEXT: sw s1, 8(s0)
-; CHECK-NOV-NEXT: sw a2, 4(s0)
; CHECK-NOV-NEXT: sw a3, 0(s0)
+; CHECK-NOV-NEXT: sw a2, 4(s0)
+; CHECK-NOV-NEXT: sw s1, 8(s0)
+; CHECK-NOV-NEXT: sw a0, 12(s0)
; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
@@ -3882,10 +3882,10 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: .LBB34_4: # %entry
; CHECK-NOV-NEXT: mv a3, a1
; CHECK-NOV-NEXT: .LBB34_5: # %entry
-; CHECK-NOV-NEXT: sw a3, 12(s0)
-; CHECK-NOV-NEXT: sw a2, 8(s0)
-; CHECK-NOV-NEXT: sw s1, 4(s0)
; CHECK-NOV-NEXT: sw a0, 0(s0)
+; CHECK-NOV-NEXT: sw s1, 4(s0)
+; CHECK-NOV-NEXT: sw a2, 8(s0)
+; CHECK-NOV-NEXT: sw a3, 12(s0)
; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
@@ -4054,10 +4054,10 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: sgtz a4, a0
; CHECK-NOV-NEXT: negw a4, a4
; CHECK-NOV-NEXT: and a0, a4, a0
-; CHECK-NOV-NEXT: sw a0, 12(s0)
-; CHECK-NOV-NEXT: sw a3, 8(s0)
-; CHECK-NOV-NEXT: sw a1, 4(s0)
; CHECK-NOV-NEXT: sw a2, 0(s0)
+; CHECK-NOV-NEXT: sw a1, 4(s0)
+; CHECK-NOV-NEXT: sw a3, 8(s0)
+; CHECK-NOV-NEXT: sw a0, 12(s0)
; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
@@ -4311,10 +4311,10 @@ define <4 x i16> @stest_f32i16_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: .LBB39_8: # %entry
; CHECK-NOV-NEXT: lui a1, 1048568
; CHECK-NOV-NEXT: .LBB39_9: # %entry
-; CHECK-NOV-NEXT: sh a1, 6(a0)
-; CHECK-NOV-NEXT: sh a2, 4(a0)
-; CHECK-NOV-NEXT: sh a3, 2(a0)
; CHECK-NOV-NEXT: sh a4, 0(a0)
+; CHECK-NOV-NEXT: sh a3, 2(a0)
+; CHECK-NOV-NEXT: sh a2, 4(a0)
+; CHECK-NOV-NEXT: sh a1, 6(a0)
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB39_10: # %entry
; CHECK-NOV-NEXT: mv a1, a5
@@ -4376,10 +4376,10 @@ define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: .LBB40_4: # %entry
; CHECK-NOV-NEXT: mv a5, a3
; CHECK-NOV-NEXT: .LBB40_5: # %entry
-; CHECK-NOV-NEXT: sh a5, 6(a0)
-; CHECK-NOV-NEXT: sh a4, 4(a0)
-; CHECK-NOV-NEXT: sh a2, 2(a0)
; CHECK-NOV-NEXT: sh a1, 0(a0)
+; CHECK-NOV-NEXT: sh a2, 2(a0)
+; CHECK-NOV-NEXT: sh a4, 4(a0)
+; CHECK-NOV-NEXT: sh a5, 6(a0)
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB40_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
@@ -4439,10 +4439,10 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: sgtz a5, a1
; CHECK-NOV-NEXT: negw a5, a5
; CHECK-NOV-NEXT: and a1, a5, a1
-; CHECK-NOV-NEXT: sh a1, 6(a0)
-; CHECK-NOV-NEXT: sh a2, 4(a0)
-; CHECK-NOV-NEXT: sh a4, 2(a0)
; CHECK-NOV-NEXT: sh a3, 0(a0)
+; CHECK-NOV-NEXT: sh a4, 2(a0)
+; CHECK-NOV-NEXT: sh a2, 4(a0)
+; CHECK-NOV-NEXT: sh a1, 6(a0)
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB41_6: # %entry
; CHECK-NOV-NEXT: mv a1, a3
@@ -4551,14 +4551,14 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz
; CHECK-NOV-NEXT: bge s1, a7, .LBB42_19
; CHECK-NOV-NEXT: .LBB42_2: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
+; CHECK-NOV-NEXT: fcvt.l.s a3, fs4, rtz
; CHECK-NOV-NEXT: bge a1, a7, .LBB42_20
; CHECK-NOV-NEXT: .LBB42_3: # %entry
-; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz
-; CHECK-NOV-NEXT: bge a2, a7, .LBB42_21
+; CHECK-NOV-NEXT: fcvt.l.s a2, fs3, rtz
+; CHECK-NOV-NEXT: bge a3, a7, .LBB42_21
; CHECK-NOV-NEXT: .LBB42_4: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz
-; CHECK-NOV-NEXT: bge a3, a7, .LBB42_22
+; CHECK-NOV-NEXT: bge a2, a7, .LBB42_22
; CHECK-NOV-NEXT: .LBB42_5: # %entry
; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz
; CHECK-NOV-NEXT: bge a4, a7, .LBB42_23
@@ -4575,9 +4575,9 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB42_10: # %entry
; CHECK-NOV-NEXT: bge a7, a4, .LBB42_28
; CHECK-NOV-NEXT: .LBB42_11: # %entry
-; CHECK-NOV-NEXT: bge a7, a3, .LBB42_29
+; CHECK-NOV-NEXT: bge a7, a2, .LBB42_29
; CHECK-NOV-NEXT: .LBB42_12: # %entry
-; CHECK-NOV-NEXT: bge a7, a2, .LBB42_30
+; CHECK-NOV-NEXT: bge a7, a3, .LBB42_30
; CHECK-NOV-NEXT: .LBB42_13: # %entry
; CHECK-NOV-NEXT: bge a7, a1, .LBB42_31
; CHECK-NOV-NEXT: .LBB42_14: # %entry
@@ -4587,14 +4587,14 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .LBB42_16: # %entry
; CHECK-NOV-NEXT: lui a0, 1048568
; CHECK-NOV-NEXT: .LBB42_17: # %entry
-; CHECK-NOV-NEXT: sh a0, 14(s0)
-; CHECK-NOV-NEXT: sh s1, 12(s0)
+; CHECK-NOV-NEXT: sh a3, 8(s0)
; CHECK-NOV-NEXT: sh a1, 10(s0)
-; CHECK-NOV-NEXT: sh a2, 8(s0)
-; CHECK-NOV-NEXT: sh a3, 6(s0)
-; CHECK-NOV-NEXT: sh a4, 4(s0)
-; CHECK-NOV-NEXT: sh a5, 2(s0)
+; CHECK-NOV-NEXT: sh s1, 12(s0)
+; CHECK-NOV-NEXT: sh a0, 14(s0)
; CHECK-NOV-NEXT: sh a6, 0(s0)
+; CHECK-NOV-NEXT: sh a5, 2(s0)
+; CHECK-NOV-NEXT: sh a4, 4(s0)
+; CHECK-NOV-NEXT: sh a2, 6(s0)
; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
@@ -4619,18 +4619,18 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: blt s1, a7, .LBB42_2
; CHECK-NOV-NEXT: .LBB42_19: # %entry
; CHECK-NOV-NEXT: mv s1, a7
-; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz
+; CHECK-NOV-NEXT: fcvt.l.s a3, fs4, rtz
; CHECK-NOV-NEXT: blt a1, a7, .LBB42_3
; CHECK-NOV-NEXT: .LBB42_20: # %entry
; CHECK-NOV-NEXT: mv a1, a7
-; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz
-; CHECK-NOV-NEXT: blt a2, a7, .LBB42_4
+; CHECK-NOV-NEXT: fcvt.l.s a2, fs3, rtz
+; CHECK-NOV-NEXT: blt a3, a7, .LBB42_4
; CHECK-NOV-NEXT: .LBB42_21: # %entry
-; CHECK-NOV-NEXT: mv a2, a7
+; CHECK-NOV-NEXT: mv a3, a7
; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz
-; CHECK-NOV-NEXT: blt a3, a7, .LBB42_5
+; CHECK-NOV-NEXT: blt a2, a7, .LBB42_5
; CHECK-NOV-NEXT: .LBB42_22: # %entry
-; CHECK-NOV-NEXT: mv a3, a7
+; CHECK-NOV-NEXT: mv a2, a7
; CHECK-NOV-NEXT: fcvt.l.s a5, fs1, rtz
; CHECK-NOV-NEXT: blt a4, a7, .LBB42_6
; CHECK-NOV-NEXT: .LBB42_23: # %entry
@@ -4652,12 +4652,12 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: blt a7, a4, .LBB42_11
; CHECK-NOV-NEXT: .LBB42_28: # %entry
; CHECK-NOV-NEXT: lui a4, 1048568
-; CHECK-NOV-NEXT: blt a7, a3, .LBB42_12
+; CHECK-NOV-NEXT: blt a7, a2, .LBB42_12
; CHECK-NOV-NEXT: .LBB42_29: # %entry
-; CHECK-NOV-NEXT: lui a3, 1048568
-; CHECK-NOV-NEXT: blt a7, a2, .LBB42_13
-; CHECK-NOV-NEXT: .LBB42_30: # %entry
; CHECK-NOV-NEXT: lui a2, 1048568
+; CHECK-NOV-NEXT: blt a7, a3, .LBB42_13
+; CHECK-NOV-NEXT: .LBB42_30: # %entry
+; CHECK-NOV-NEXT: lui a3, 1048568
; CHECK-NOV-NEXT: blt a7, a1, .LBB42_14
; CHECK-NOV-NEXT: .LBB42_31: # %entry
; CHECK-NOV-NEXT: lui a1, 1048568
@@ -4897,40 +4897,40 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: fcvt.lu.s s1, fs6, rtz
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fcvt.lu.s a0, fa0, rtz
-; CHECK-NOV-NEXT: lui a1, 16
-; CHECK-NOV-NEXT: addiw a1, a1, -1
-; CHECK-NOV-NEXT: bgeu a0, a1, .LBB43_10
+; CHECK-NOV-NEXT: lui a3, 16
+; CHECK-NOV-NEXT: addiw a3, a3, -1
+; CHECK-NOV-NEXT: bgeu a0, a3, .LBB43_10
; CHECK-NOV-NEXT: # %bb.1: # %entry
-; CHECK-NOV-NEXT: fcvt.lu.s a2, fs5, rtz
-; CHECK-NOV-NEXT: bgeu s1, a1, .LBB43_11
+; CHECK-NOV-NEXT: fcvt.lu.s a1, fs5, rtz
+; CHECK-NOV-NEXT: bgeu s1, a3, .LBB43_11
; CHECK-NOV-NEXT: .LBB43_2: # %entry
-; CHECK-NOV-NEXT: fcvt.lu.s a3, fs4, rtz
-; CHECK-NOV-NEXT: bgeu a2, a1, .LBB43_12
+; CHECK-NOV-NEXT: fcvt.lu.s a2, fs4, rtz
+; CHECK-NOV-NEXT: bgeu a1, a3, .LBB43_12
; CHECK-NOV-NEXT: .LBB43_3: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a4, fs3, rtz
-; CHECK-NOV-NEXT: bgeu a3, a1, .LBB43_13
+; CHECK-NOV-NEXT: bgeu a2, a3, .LBB43_13
; CHECK-NOV-NEXT: .LBB43_4: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a5, fs2, rtz
-; CHECK-NOV-NEXT: bgeu a4, a1, .LBB43_14
+; CHECK-NOV-NEXT: bgeu a4, a3, .LBB43_14
; CHECK-NOV-NEXT: .LBB43_5: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a6, fs1, rtz
-; CHECK-NOV-NEXT: bgeu a5, a1, .LBB43_15
+; CHECK-NOV-NEXT: bgeu a5, a3, .LBB43_15
; CHECK-NOV-NEXT: .LBB43_6: # %entry
; CHECK-NOV-NEXT: fcvt.lu.s a7, fs0, rtz
-; CHECK-NOV-NEXT: bgeu a6, a1, .LBB43_16
+; CHECK-NOV-NEXT: bgeu a6, a3, .LBB43_16
; CHECK-NOV-NEXT: .LBB43_7: # %entry
-; CHECK-NOV-NEXT: bltu a7, a1, .LBB43_9
+; CHECK-NOV-NEXT: bltu a7, a3, .LBB43_9
; CHECK-NOV-NEXT: .LBB43_8: # %entry
-; CHECK-NOV-NEXT: mv a7, a1
+; CHECK-NOV-NEXT: mv a7, a3
; CHECK-NOV-NEXT: .LBB43_9: # %entry
-; CHECK-NOV-NEXT: sh a7, 14(s0)
-; CHECK-NOV-NEXT: sh a6, 12(s0)
-; CHECK-NOV-NEXT: sh a5, 10(s0)
; CHECK-NOV-NEXT: sh a4, 8(s0)
-; CHECK-NOV-NEXT: sh a3, 6(s0)
-; CHECK-NOV-NEXT: sh a2, 4(s0)
-; CHECK-NOV-NEXT: sh s1, 2(s0)
+; CHECK-NOV-NEXT: sh a5, 10(s0)
+; CHECK-NOV-NEXT: sh a6, 12(s0)
+; CHECK-NOV-NEXT: sh a7, 14(s0)
; CHECK-NOV-NEXT: sh a0, 0(s0)
+; CHECK-NOV-NEXT: sh s1, 2(s0)
+; CHECK-NOV-NEXT: sh a1, 4(s0)
+; CHECK-NOV-NEXT: sh a2, 6(s0)
; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
@@ -4950,32 +4950,32 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: addi sp, sp, 128
; CHECK-NOV-NEXT: ret
; CHECK-NOV-NEXT: .LBB43_10: # %entry
-; CHECK-NOV-NEXT: mv a0, a1
-; CHECK-NOV-NEXT: fcvt.lu.s a2, fs5, rtz
-; CHECK-NOV-NEXT: bltu s1, a1, .LBB43_2
+; CHECK-NOV-NEXT: mv a0, a3
+; CHECK-NOV-NEXT: fcvt.lu.s a1, fs5, rtz
+; CHECK-NOV-NEXT: bltu s1, a3, .LBB43_2
; CHECK-NOV-NEXT: .LBB43_11: # %entry
-; CHECK-NOV-NEXT: mv s1, a1
-; CHECK-NOV-NEXT: fcvt.lu.s a3, fs4, rtz
-; CHECK-NOV-NEXT: bltu a2, a1, .LBB43_3
+; CHECK-NOV-NEXT: mv s1, a3
+; CHECK-NOV-NEXT: fcvt.lu.s a2, fs4, rtz
+; CHECK-NOV-NEXT: bltu a1, a3, .LBB43_3
; CHECK-NOV-NEXT: .LBB43_12: # %entry
-; CHECK-NOV-NEXT: mv a2, a1
+; CHECK-NOV-NEXT: mv a1, a3
; CHECK-NOV-NEXT: fcvt.lu.s a4, fs3, rtz
-; CHECK-NOV-NEXT: bltu a3, a1, .LBB43_4
+; CHECK-NOV-NEXT: bltu a2, a3, .LBB43_4
; CHECK-NOV-NEXT: .LBB43_13: # %entry
-; CHECK-NOV-NEXT: mv a3, a1
+; CHECK-NOV-NEXT: mv a2, a3
; CHECK-NOV-NEXT: fcvt.lu.s a5, fs2, rtz
-; CHECK-NOV-NEXT: bltu a4, a1, .LBB43_5
+; CHECK-NOV-NEXT: bltu a4, a3, .LBB43_5
; CHECK-NOV-NEXT: .LBB43_14: # %entry
-; CHECK-NOV-NEXT: mv a4, a1
+; CHECK-NOV-NEXT: mv a4, a3
; CHECK-NOV-NEXT: fcvt.lu.s a6, fs1, rtz
-; CHECK-NOV-NEXT: bltu a5, a1, .LBB43_6
+; CHECK-NOV-NEXT: bltu a5, a3, .LBB43_6
; CHECK-NOV-NEXT: .LBB43_15: # %entry
-; CHECK-NOV-NEXT: mv a5, a1
+; CHECK-NOV-NEXT: mv a5, a3
; CHECK-NOV-NEXT: fcvt.lu.s a7, fs0, rtz
-; CHECK-NOV-NEXT: bltu a6, a1, .LBB43_7
+; CHECK-NOV-NEXT: bltu a6, a3, .LBB43_7
; CHECK-NOV-NEXT: .LBB43_16: # %entry
-; CHECK-NOV-NEXT: mv a6, a1
-; CHECK-NOV-NEXT: bgeu a7, a1, .LBB43_8
+; CHECK-NOV-NEXT: mv a6, a3
+; CHECK-NOV-NEXT: bgeu a7, a3, .LBB43_8
; CHECK-NOV-NEXT: j .LBB43_9
;
; CHECK-V-LABEL: utesth_f16i16_mm:
@@ -5257,14 +5257,14 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: sgtz t0, a0
; CHECK-NOV-NEXT: negw t0, t0
; CHECK-NOV-NEXT: and a0, t0, a0
-; CHECK-NOV-NEXT: sh a0, 14(s0)
-; CHECK-NOV-NEXT: sh a7, 12(s0)
-; CHECK-NOV-NEXT: sh a1, 10(s0)
; CHECK-NOV-NEXT: sh a2, 8(s0)
-; CHECK-NOV-NEXT: sh a4, 6(s0)
-; CHECK-NOV-NEXT: sh a5, 4(s0)
-; CHECK-NOV-NEXT: sh a6, 2(s0)
+; CHECK-NOV-NEXT: sh a1, 10(s0)
+; CHECK-NOV-NEXT: sh a7, 12(s0)
+; CHECK-NOV-NEXT: sh a0, 14(s0)
; CHECK-NOV-NEXT: sh a3, 0(s0)
+; CHECK-NOV-NEXT: sh a6, 2(s0)
+; CHECK-NOV-NEXT: sh a5, 4(s0)
+; CHECK-NOV-NEXT: sh a4, 6(s0)
; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll b/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll
index 3aca3130cc5459..5c14ed1e813c0e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-tama.ll
@@ -1372,8 +1372,8 @@ define <vscale x 1 x i1> @intrinsic_vmseq_mask_vx_nxv1i64_i64(<vscale x 1 x i64>
; RV32-LABEL: intrinsic_vmseq_mask_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1408,8 +1408,8 @@ define <vscale x 1 x i1> @intrinsic_vmsge_mask_vx_nxv1i64_i64(<vscale x 1 x i64>
; RV32-LABEL: intrinsic_vmsge_mask_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll
index accc18519d6260..5349878b5d1117 100644
--- a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll
@@ -31,14 +31,14 @@ define void @memset_1(ptr %a, i8 %value) nounwind {
define void @memset_2(ptr %a, i8 %value) nounwind {
; RV32-LABEL: memset_2:
; RV32: # %bb.0:
-; RV32-NEXT: sb a1, 1(a0)
; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: sb a1, 1(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: memset_2:
; RV64: # %bb.0:
-; RV64-NEXT: sb a1, 1(a0)
; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: sb a1, 1(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: memset_2:
@@ -63,18 +63,18 @@ define void @memset_2(ptr %a, i8 %value) nounwind {
define void @memset_4(ptr %a, i8 %value) nounwind {
; RV32-LABEL: memset_4:
; RV32: # %bb.0:
-; RV32-NEXT: sb a1, 3(a0)
-; RV32-NEXT: sb a1, 2(a0)
-; RV32-NEXT: sb a1, 1(a0)
; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sb a1, 2(a0)
+; RV32-NEXT: sb a1, 3(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: memset_4:
; RV64: # %bb.0:
-; RV64-NEXT: sb a1, 3(a0)
-; RV64-NEXT: sb a1, 2(a0)
-; RV64-NEXT: sb a1, 1(a0)
; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sb a1, 2(a0)
+; RV64-NEXT: sb a1, 3(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: memset_4:
@@ -102,26 +102,26 @@ define void @memset_4(ptr %a, i8 %value) nounwind {
define void @memset_8(ptr %a, i8 %value) nounwind {
; RV32-LABEL: memset_8:
; RV32: # %bb.0:
-; RV32-NEXT: sb a1, 7(a0)
-; RV32-NEXT: sb a1, 6(a0)
-; RV32-NEXT: sb a1, 5(a0)
; RV32-NEXT: sb a1, 4(a0)
-; RV32-NEXT: sb a1, 3(a0)
-; RV32-NEXT: sb a1, 2(a0)
-; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sb a1, 5(a0)
+; RV32-NEXT: sb a1, 6(a0)
+; RV32-NEXT: sb a1, 7(a0)
; RV32-NEXT: sb a1, 0(a0)
+; RV32-NEXT: sb a1, 1(a0)
+; RV32-NEXT: sb a1, 2(a0)
+; RV32-NEXT: sb a1, 3(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: memset_8:
; RV64: # %bb.0:
-; RV64-NEXT: sb a1, 7(a0)
-; RV64-NEXT: sb a1, 6(a0)
-; RV64-NEXT: sb a1, 5(a0)
; RV64-NEXT: sb a1, 4(a0)
-; RV64-NEXT: sb a1, 3(a0)
-; RV64-NEXT: sb a1, 2(a0)
-; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sb a1, 5(a0)
+; RV64-NEXT: sb a1, 6(a0)
+; RV64-NEXT: sb a1, 7(a0)
; RV64-NEXT: sb a1, 0(a0)
+; RV64-NEXT: sb a1, 1(a0)
+; RV64-NEXT: sb a1, 2(a0)
+; RV64-NEXT: sb a1, 3(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: memset_8:
@@ -130,8 +130,8 @@ define void @memset_8(ptr %a, i8 %value) nounwind {
; RV32-FAST-NEXT: lui a2, 4112
; RV32-FAST-NEXT: addi a2, a2, 257
; RV32-FAST-NEXT: mul a1, a1, a2
-; RV32-FAST-NEXT: sw a1, 4(a0)
; RV32-FAST-NEXT: sw a1, 0(a0)
+; RV32-FAST-NEXT: sw a1, 4(a0)
; RV32-FAST-NEXT: ret
;
; RV64-FAST-LABEL: memset_8:
@@ -270,8 +270,8 @@ define void @aligned_memset_8(ptr align 8 %a, i8 %value) nounwind {
; RV32-BOTH-NEXT: lui a2, 4112
; RV32-BOTH-NEXT: addi a2, a2, 257
; RV32-BOTH-NEXT: mul a1, a1, a2
-; RV32-BOTH-NEXT: sw a1, 4(a0)
; RV32-BOTH-NEXT: sw a1, 0(a0)
+; RV32-BOTH-NEXT: sw a1, 4(a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_memset_8:
@@ -377,14 +377,14 @@ define void @bzero_1(ptr %a) nounwind {
define void @bzero_2(ptr %a) nounwind {
; RV32-LABEL: bzero_2:
; RV32: # %bb.0:
-; RV32-NEXT: sb zero, 1(a0)
; RV32-NEXT: sb zero, 0(a0)
+; RV32-NEXT: sb zero, 1(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: bzero_2:
; RV64: # %bb.0:
-; RV64-NEXT: sb zero, 1(a0)
; RV64-NEXT: sb zero, 0(a0)
+; RV64-NEXT: sb zero, 1(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: bzero_2:
@@ -403,18 +403,18 @@ define void @bzero_2(ptr %a) nounwind {
define void @bzero_4(ptr %a) nounwind {
; RV32-LABEL: bzero_4:
; RV32: # %bb.0:
-; RV32-NEXT: sb zero, 3(a0)
-; RV32-NEXT: sb zero, 2(a0)
-; RV32-NEXT: sb zero, 1(a0)
; RV32-NEXT: sb zero, 0(a0)
+; RV32-NEXT: sb zero, 1(a0)
+; RV32-NEXT: sb zero, 2(a0)
+; RV32-NEXT: sb zero, 3(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: bzero_4:
; RV64: # %bb.0:
-; RV64-NEXT: sb zero, 3(a0)
-; RV64-NEXT: sb zero, 2(a0)
-; RV64-NEXT: sb zero, 1(a0)
; RV64-NEXT: sb zero, 0(a0)
+; RV64-NEXT: sb zero, 1(a0)
+; RV64-NEXT: sb zero, 2(a0)
+; RV64-NEXT: sb zero, 3(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: bzero_4:
@@ -433,32 +433,32 @@ define void @bzero_4(ptr %a) nounwind {
define void @bzero_8(ptr %a) nounwind {
; RV32-LABEL: bzero_8:
; RV32: # %bb.0:
-; RV32-NEXT: sb zero, 7(a0)
-; RV32-NEXT: sb zero, 6(a0)
-; RV32-NEXT: sb zero, 5(a0)
; RV32-NEXT: sb zero, 4(a0)
-; RV32-NEXT: sb zero, 3(a0)
-; RV32-NEXT: sb zero, 2(a0)
-; RV32-NEXT: sb zero, 1(a0)
+; RV32-NEXT: sb zero, 5(a0)
+; RV32-NEXT: sb zero, 6(a0)
+; RV32-NEXT: sb zero, 7(a0)
; RV32-NEXT: sb zero, 0(a0)
+; RV32-NEXT: sb zero, 1(a0)
+; RV32-NEXT: sb zero, 2(a0)
+; RV32-NEXT: sb zero, 3(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: bzero_8:
; RV64: # %bb.0:
-; RV64-NEXT: sb zero, 7(a0)
-; RV64-NEXT: sb zero, 6(a0)
-; RV64-NEXT: sb zero, 5(a0)
; RV64-NEXT: sb zero, 4(a0)
-; RV64-NEXT: sb zero, 3(a0)
-; RV64-NEXT: sb zero, 2(a0)
-; RV64-NEXT: sb zero, 1(a0)
+; RV64-NEXT: sb zero, 5(a0)
+; RV64-NEXT: sb zero, 6(a0)
+; RV64-NEXT: sb zero, 7(a0)
; RV64-NEXT: sb zero, 0(a0)
+; RV64-NEXT: sb zero, 1(a0)
+; RV64-NEXT: sb zero, 2(a0)
+; RV64-NEXT: sb zero, 3(a0)
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: bzero_8:
; RV32-FAST: # %bb.0:
-; RV32-FAST-NEXT: sw zero, 4(a0)
; RV32-FAST-NEXT: sw zero, 0(a0)
+; RV32-FAST-NEXT: sw zero, 4(a0)
; RV32-FAST-NEXT: ret
;
; RV64-FAST-LABEL: bzero_8:
@@ -608,8 +608,8 @@ define void @aligned_bzero_4(ptr %a) nounwind {
define void @aligned_bzero_8(ptr %a) nounwind {
; RV32-BOTH-LABEL: aligned_bzero_8:
; RV32-BOTH: # %bb.0:
-; RV32-BOTH-NEXT: sw zero, 4(a0)
; RV32-BOTH-NEXT: sw zero, 0(a0)
+; RV32-BOTH-NEXT: sw zero, 4(a0)
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: aligned_bzero_8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll
index 9cdced88c7bdc2..59ba857dca8a5f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll
@@ -27,15 +27,15 @@ define signext i32 @foo(i32 signext %aa) #0 {
; CHECK-NEXT: lw a6, 24(s1)
; CHECK-NEXT: lw a7, 20(s1)
; CHECK-NEXT: lw t1, 16(s1)
-; CHECK-NEXT: lw a1, 12(s1)
-; CHECK-NEXT: lw t2, 8(s1)
+; CHECK-NEXT: lw t2, 12(s1)
+; CHECK-NEXT: lw t3, 8(s1)
; CHECK-NEXT: sw a0, 52(s1)
; CHECK-NEXT: sw a0, 48(s1)
; CHECK-NEXT: addi sp, sp, -32
-; CHECK-NEXT: sd t2, 16(sp)
-; CHECK-NEXT: sd a1, 8(sp)
; CHECK-NEXT: addi a1, s1, 48
; CHECK-NEXT: sd t1, 0(sp)
+; CHECK-NEXT: sd t2, 8(sp)
+; CHECK-NEXT: sd t3, 16(sp)
; CHECK-NEXT: mv a0, t0
; CHECK-NEXT: call gfunc
; CHECK-NEXT: addi sp, sp, 32
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr104480.ll b/llvm/test/CodeGen/RISCV/rvv/pr104480.ll
index 93cf4d3766089a..1e34d9aa6d0568 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr104480.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr104480.ll
@@ -45,9 +45,9 @@ define <vscale x 4 x i64> @test_mulhs_expand(<vscale x 4 x i64> %broadcast.splat
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: lui a0, 349525
; CHECK-NEXT: addi a1, a0, 1365
-; CHECK-NEXT: sw a1, 12(sp)
; CHECK-NEXT: addi a0, a0, 1366
; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: sw a1, 12(sp)
; CHECK-NEXT: addi a0, sp, 8
; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; CHECK-NEXT: vlse64.v v12, (a0), zero
@@ -73,9 +73,9 @@ define <vscale x 4 x i64> @test_mulhu_expand(<vscale x 4 x i64> %broadcast.splat
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: lui a0, 699051
; CHECK-NEXT: addi a1, a0, -1366
-; CHECK-NEXT: sw a1, 12(sp)
; CHECK-NEXT: addi a0, a0, -1365
; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: sw a1, 12(sp)
; CHECK-NEXT: addi a0, sp, 8
; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; CHECK-NEXT: vlse64.v v12, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll
index c164b62a679be0..7a2e40e86f0027 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll
@@ -45,8 +45,8 @@ define <vscale x 16 x i32> @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5,
; CHECK-NEXT: add t1, s1, t1
; CHECK-NEXT: addi t1, t1, 64
; CHECK-NEXT: vs8r.v v8, (t1)
-; CHECK-NEXT: sd t0, 8(sp)
; CHECK-NEXT: sd t1, 0(sp)
+; CHECK-NEXT: sd t0, 8(sp)
; CHECK-NEXT: vmv8r.v v16, v8
; CHECK-NEXT: call bar
; CHECK-NEXT: addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
index eb8c58d2d37790..7315fd6cfbbecb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll
@@ -2375,8 +2375,8 @@ define <vscale x 1 x i1> @icmp_eq_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vs
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -2401,8 +2401,8 @@ define <vscale x 1 x i1> @icmp_eq_vx_swap_nxv1i64(<vscale x 1 x i64> %va, i64 %b
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -2457,8 +2457,8 @@ define <vscale x 1 x i1> @icmp_ne_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vs
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -2483,8 +2483,8 @@ define <vscale x 1 x i1> @icmp_ne_vx_swap_nxv1i64(<vscale x 1 x i64> %va, i64 %b
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -2539,8 +2539,8 @@ define <vscale x 1 x i1> @icmp_ugt_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -2565,8 +2565,8 @@ define <vscale x 1 x i1> @icmp_ugt_vx_swap_nxv1i64(<vscale x 1 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -2621,8 +2621,8 @@ define <vscale x 1 x i1> @icmp_uge_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -2649,8 +2649,8 @@ define <vscale x 1 x i1> @icmp_uge_vx_swap_nxv1i64(<vscale x 1 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -2705,8 +2705,8 @@ define <vscale x 1 x i1> @icmp_ult_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -2731,8 +2731,8 @@ define <vscale x 1 x i1> @icmp_ult_vx_swap_nxv1i64(<vscale x 1 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -2787,8 +2787,8 @@ define <vscale x 1 x i1> @icmp_sgt_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -2813,8 +2813,8 @@ define <vscale x 1 x i1> @icmp_sgt_vx_swap_nxv1i64(<vscale x 1 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -2869,8 +2869,8 @@ define <vscale x 1 x i1> @icmp_sge_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -2897,8 +2897,8 @@ define <vscale x 1 x i1> @icmp_sge_vx_swap_nxv1i64(<vscale x 1 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -2953,8 +2953,8 @@ define <vscale x 1 x i1> @icmp_slt_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -2979,8 +2979,8 @@ define <vscale x 1 x i1> @icmp_slt_vx_swap_nxv1i64(<vscale x 1 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -3035,8 +3035,8 @@ define <vscale x 1 x i1> @icmp_sle_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -3061,8 +3061,8 @@ define <vscale x 1 x i1> @icmp_sle_vx_swap_nxv1i64(<vscale x 1 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -3122,8 +3122,8 @@ define <vscale x 8 x i1> @icmp_eq_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vs
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -3150,8 +3150,8 @@ define <vscale x 8 x i1> @icmp_eq_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %b
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -3211,8 +3211,8 @@ define <vscale x 8 x i1> @icmp_ne_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vs
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -3239,8 +3239,8 @@ define <vscale x 8 x i1> @icmp_ne_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %b
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -3300,8 +3300,8 @@ define <vscale x 8 x i1> @icmp_ugt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -3328,8 +3328,8 @@ define <vscale x 8 x i1> @icmp_ugt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -3389,8 +3389,8 @@ define <vscale x 8 x i1> @icmp_uge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -3419,8 +3419,8 @@ define <vscale x 8 x i1> @icmp_uge_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -3480,8 +3480,8 @@ define <vscale x 8 x i1> @icmp_ult_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -3508,8 +3508,8 @@ define <vscale x 8 x i1> @icmp_ult_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -3569,8 +3569,8 @@ define <vscale x 8 x i1> @icmp_sgt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -3597,8 +3597,8 @@ define <vscale x 8 x i1> @icmp_sgt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -3658,8 +3658,8 @@ define <vscale x 8 x i1> @icmp_sge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -3688,8 +3688,8 @@ define <vscale x 8 x i1> @icmp_sge_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -3749,8 +3749,8 @@ define <vscale x 8 x i1> @icmp_slt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -3777,8 +3777,8 @@ define <vscale x 8 x i1> @icmp_slt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -3838,8 +3838,8 @@ define <vscale x 8 x i1> @icmp_sle_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -3866,8 +3866,8 @@ define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
index 90ffeff9689e07..cd3f02f0400f0c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll
@@ -2055,8 +2055,8 @@ define <vscale x 8 x i1> @icmp_eq_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2080,8 +2080,8 @@ define <vscale x 8 x i1> @icmp_eq_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2145,8 +2145,8 @@ define <vscale x 8 x i1> @icmp_ne_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2170,8 +2170,8 @@ define <vscale x 8 x i1> @icmp_ne_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2215,8 +2215,8 @@ define <vscale x 8 x i1> @icmp_ugt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2240,8 +2240,8 @@ define <vscale x 8 x i1> @icmp_ugt_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2285,8 +2285,8 @@ define <vscale x 8 x i1> @icmp_uge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2311,8 +2311,8 @@ define <vscale x 8 x i1> @icmp_uge_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2417,8 +2417,8 @@ define <vscale x 8 x i1> @icmp_ult_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2442,8 +2442,8 @@ define <vscale x 8 x i1> @icmp_ult_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2538,8 +2538,8 @@ define <vscale x 8 x i1> @icmp_ule_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2563,8 +2563,8 @@ define <vscale x 8 x i1> @icmp_ule_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2609,8 +2609,8 @@ define <vscale x 8 x i1> @icmp_sgt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2634,8 +2634,8 @@ define <vscale x 8 x i1> @icmp_sgt_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2679,8 +2679,8 @@ define <vscale x 8 x i1> @icmp_sge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2705,8 +2705,8 @@ define <vscale x 8 x i1> @icmp_sge_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2791,8 +2791,8 @@ define <vscale x 8 x i1> @icmp_slt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2816,8 +2816,8 @@ define <vscale x 8 x i1> @icmp_slt_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2902,8 +2902,8 @@ define <vscale x 8 x i1> @icmp_sle_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2927,8 +2927,8 @@ define <vscale x 8 x i1> @icmp_sle_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll
index 28583efccdbca4..3912bc48c79519 100644
--- a/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll
@@ -14,9 +14,9 @@ define i32 @splat_vector_split_i64() {
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 3
-; CHECK-NEXT: sw zero, 12(sp)
; CHECK-NEXT: lui a0, 1044480
; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: sw zero, 12(sp)
; CHECK-NEXT: li a0, 56
; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; CHECK-NEXT: vsrl.vx v10, v8, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll
index 721f03120bd499..27a4d37a83da83 100644
--- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll
@@ -483,10 +483,10 @@ define <vscale x 8 x i64> @mul_bigimm_stepvector_nxv8i64() {
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: li a0, 7
+; RV32-NEXT: lui a1, 797989
+; RV32-NEXT: addi a1, a1, -683
+; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
-; RV32-NEXT: lui a0, 797989
-; RV32-NEXT: addi a0, a0, -683
-; RV32-NEXT: sw a0, 8(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
@@ -531,9 +531,9 @@ define <vscale x 16 x i64> @stepvector_nxv16i64() {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -558,10 +558,10 @@ define <vscale x 16 x i64> @add_stepvector_nxv16i64() {
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 1
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -592,11 +592,11 @@ define <vscale x 16 x i64> @mul_stepvector_nxv16i64() {
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a1, a0, 1
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -630,23 +630,23 @@ define <vscale x 16 x i64> @mul_bigimm_stepvector_nxv16i64() {
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: li a0, 7
+; RV32-NEXT: lui a1, 797989
+; RV32-NEXT: addi a1, a1, -683
+; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
-; RV32-NEXT: lui a0, 797989
-; RV32-NEXT: addi a0, a0, -683
-; RV32-NEXT: sw a0, 8(sp)
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: lui a1, 11557
; RV32-NEXT: addi a1, a1, -683
; RV32-NEXT: mul a1, a0, a1
-; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: srli a0, a0, 3
-; RV32-NEXT: lui a1, 92455
-; RV32-NEXT: addi a1, a1, -1368
-; RV32-NEXT: mulhu a1, a0, a1
-; RV32-NEXT: slli a2, a0, 1
+; RV32-NEXT: lui a2, 92455
+; RV32-NEXT: addi a2, a2, -1368
+; RV32-NEXT: mulhu a2, a0, a2
+; RV32-NEXT: slli a3, a0, 1
; RV32-NEXT: slli a0, a0, 6
-; RV32-NEXT: sub a0, a0, a2
-; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sub a0, a0, a3
+; RV32-NEXT: add a0, a2, a0
+; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: sw a0, 4(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
@@ -683,10 +683,10 @@ define <vscale x 16 x i64> @shl_stepvector_nxv16i64() {
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 2
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/unmasked-ta.ll b/llvm/test/CodeGen/RISCV/rvv/unmasked-ta.ll
index ae8c36a7cb5e3d..bcc7bb9f072f6d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/unmasked-ta.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/unmasked-ta.ll
@@ -279,8 +279,8 @@ define <vscale x 1 x i64> @intrinsic_vmacc_vx_nxv1i64_i64_nxv1i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vmacc_vx_nxv1i64_i64_nxv1i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -314,8 +314,8 @@ define <vscale x 1 x i64> @intrinsic_vmadd_vx_nxv1i64_i64_nxv1i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vmadd_vx_nxv1i64_i64_nxv1i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -349,8 +349,8 @@ define <vscale x 1 x i64> @intrinsic_vnmsac_vx_nxv1i64_i64_nxv1i64(<vscale x 1
; RV32-LABEL: intrinsic_vnmsac_vx_nxv1i64_i64_nxv1i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -384,8 +384,8 @@ define <vscale x 1 x i64> @intrinsic_vnmsub_vx_nxv1i64_i64_nxv1i64(<vscale x 1
; RV32-LABEL: intrinsic_vnmsub_vx_nxv1i64_i64_nxv1i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -905,8 +905,8 @@ define <vscale x 1 x i64> @intrinsic_vmv.s.x_x_nxv1i64(i64 %0, iXLen %1) nounwin
; RV32-LABEL: intrinsic_vmv.s.x_x_nxv1i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll
index 1f027aef3103d4..aeb3f6c174859c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll
@@ -1157,8 +1157,8 @@ define <vscale x 1 x i64> @intrinsic_vrsub_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vrsub_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1192,8 +1192,8 @@ define <vscale x 1 x i64> @intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1296,8 +1296,8 @@ define <vscale x 1 x i64> @intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1468,8 +1468,8 @@ define <vscale x 1 x i64> @intrinsic_vssub_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vssub_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1503,8 +1503,8 @@ define <vscale x 1 x i64> @intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2492,8 +2492,8 @@ define <vscale x 8 x i64> @intrinsic_vmerge_vxm_nxv8i64_nxv8i64_i64(<vscale x 8
; RV32-LABEL: intrinsic_vmerge_vxm_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -2523,9 +2523,9 @@ define <vscale x 8 x i64> @intrinsic_vmerge_vim_nxv8i64_nxv8i64_i64(<vscale x 8
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: li a1, 15
+; RV32-NEXT: li a2, -1
+; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: li a1, -1
-; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: addi a1, sp, 8
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a1), zero
@@ -2673,8 +2673,8 @@ define <vscale x 1 x i64> @intrinsic_vmv.v.x_x_nxv1i64(<vscale x 1 x i64> %0, i6
; RV32-LABEL: intrinsic_vmv.v.x_x_nxv1i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vaadd.ll b/llvm/test/CodeGen/RISCV/rvv/vaadd.ll
index 096e60b6285ffd..28b8b180b76772 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vaadd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vaadd.ll
@@ -1938,8 +1938,8 @@ define <vscale x 1 x i64> @intrinsic_vaadd_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vaadd_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1975,8 +1975,8 @@ define <vscale x 1 x i64> @intrinsic_vaadd_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vaadd_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2012,8 +2012,8 @@ define <vscale x 2 x i64> @intrinsic_vaadd_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vaadd_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2049,8 +2049,8 @@ define <vscale x 2 x i64> @intrinsic_vaadd_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vaadd_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2086,8 +2086,8 @@ define <vscale x 4 x i64> @intrinsic_vaadd_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vaadd_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2123,8 +2123,8 @@ define <vscale x 4 x i64> @intrinsic_vaadd_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vaadd_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2160,8 +2160,8 @@ define <vscale x 8 x i64> @intrinsic_vaadd_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vaadd_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2197,8 +2197,8 @@ define <vscale x 8 x i64> @intrinsic_vaadd_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vaadd_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
index cd9edca1d4c44f..b03948c15d0780 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll
@@ -163,8 +163,8 @@ define <vscale x 8 x i64> @vaaddu_vx_nxv8i64_floor(<vscale x 8 x i64> %x, i64 %y
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -382,8 +382,8 @@ define <vscale x 8 x i64> @vaaddu_vx_nxv8i64_ceil(<vscale x 8 x i64> %x, i64 %y)
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu.ll
index a15a1932360a5c..7dd556dd16cf13 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vaaddu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu.ll
@@ -1938,8 +1938,8 @@ define <vscale x 1 x i64> @intrinsic_vaaddu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vaaddu_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1975,8 +1975,8 @@ define <vscale x 1 x i64> @intrinsic_vaaddu_mask_vx_nxv1i64_nxv1i64_i64(<vscale
; RV32-LABEL: intrinsic_vaaddu_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2012,8 +2012,8 @@ define <vscale x 2 x i64> @intrinsic_vaaddu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vaaddu_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2049,8 +2049,8 @@ define <vscale x 2 x i64> @intrinsic_vaaddu_mask_vx_nxv2i64_nxv2i64_i64(<vscale
; RV32-LABEL: intrinsic_vaaddu_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2086,8 +2086,8 @@ define <vscale x 4 x i64> @intrinsic_vaaddu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vaaddu_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2123,8 +2123,8 @@ define <vscale x 4 x i64> @intrinsic_vaaddu_mask_vx_nxv4i64_nxv4i64_i64(<vscale
; RV32-LABEL: intrinsic_vaaddu_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2160,8 +2160,8 @@ define <vscale x 8 x i64> @intrinsic_vaaddu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vaaddu_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2197,8 +2197,8 @@ define <vscale x 8 x i64> @intrinsic_vaaddu_mask_vx_nxv8i64_nxv8i64_i64(<vscale
; RV32-LABEL: intrinsic_vaaddu_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadc.ll b/llvm/test/CodeGen/RISCV/rvv/vadc.ll
index c33fb3a0c9a891..d2bd58bf3dc67e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadc.ll
@@ -975,8 +975,8 @@ define <vscale x 1 x i64> @intrinsic_vadc_vxm_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vadc_vxm_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1011,8 +1011,8 @@ define <vscale x 2 x i64> @intrinsic_vadc_vxm_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vadc_vxm_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1047,8 +1047,8 @@ define <vscale x 4 x i64> @intrinsic_vadc_vxm_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vadc_vxm_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1083,8 +1083,8 @@ define <vscale x 8 x i64> @intrinsic_vadc_vxm_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vadc_vxm_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
index 2b141097366cfb..e6dd81f42d838e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
@@ -594,8 +594,8 @@ define <vscale x 1 x i64> @vadd_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -639,8 +639,8 @@ define <vscale x 2 x i64> @vadd_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -684,8 +684,8 @@ define <vscale x 4 x i64> @vadd_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -729,8 +729,8 @@ define <vscale x 8 x i64> @vadd_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -774,10 +774,10 @@ define <vscale x 8 x i64> @vadd_xx_nxv8i64(i64 %a, i64 %b) nounwind {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: add a2, a0, a2
-; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: add a1, a1, a3
; RV32-NEXT: sltu a0, a2, a0
; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: sw a2, 8(sp)
; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
index feeef73e538ae0..58dee75dd55a15 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
@@ -1484,8 +1484,8 @@ define <vscale x 1 x i64> @vadd_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1510,8 +1510,8 @@ define <vscale x 1 x i64> @vadd_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1578,8 +1578,8 @@ define <vscale x 2 x i64> @vadd_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1604,8 +1604,8 @@ define <vscale x 2 x i64> @vadd_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1672,8 +1672,8 @@ define <vscale x 4 x i64> @vadd_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1698,8 +1698,8 @@ define <vscale x 4 x i64> @vadd_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1766,8 +1766,8 @@ define <vscale x 8 x i64> @vadd_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1792,8 +1792,8 @@ define <vscale x 8 x i64> @vadd_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd.ll b/llvm/test/CodeGen/RISCV/rvv/vadd.ll
index 2654e7daafb0c2..6c8cab84840258 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd.ll
@@ -1858,8 +1858,8 @@ define <vscale x 1 x i64> @intrinsic_vadd_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i
; RV32-LABEL: intrinsic_vadd_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1936,8 +1936,8 @@ define <vscale x 1 x i64> @intrinsic_vadd_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vadd_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1971,8 +1971,8 @@ define <vscale x 2 x i64> @intrinsic_vadd_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i
; RV32-LABEL: intrinsic_vadd_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2006,8 +2006,8 @@ define <vscale x 2 x i64> @intrinsic_vadd_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vadd_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2041,8 +2041,8 @@ define <vscale x 4 x i64> @intrinsic_vadd_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i
; RV32-LABEL: intrinsic_vadd_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2076,8 +2076,8 @@ define <vscale x 4 x i64> @intrinsic_vadd_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vadd_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2111,8 +2111,8 @@ define <vscale x 8 x i64> @intrinsic_vadd_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i
; RV32-LABEL: intrinsic_vadd_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2146,8 +2146,8 @@ define <vscale x 8 x i64> @intrinsic_vadd_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vadd_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll
index a84e2c984f669c..de1e220e9f5666 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll
@@ -971,8 +971,8 @@ define <vscale x 1 x i64> @vand_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1037,8 +1037,8 @@ define <vscale x 2 x i64> @vand_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1103,8 +1103,8 @@ define <vscale x 4 x i64> @vand_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1169,8 +1169,8 @@ define <vscale x 8 x i64> @vand_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1225,9 +1225,9 @@ define <vscale x 8 x i64> @vand_xx_nxv8i64(i64 %a, i64 %b) nounwind {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: and a1, a1, a3
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: and a0, a0, a2
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll
index 7b4a68d5867f99..03ea4646fcf8ac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll
@@ -1311,8 +1311,8 @@ define <vscale x 1 x i64> @vand_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1337,8 +1337,8 @@ define <vscale x 1 x i64> @vand_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1405,8 +1405,8 @@ define <vscale x 2 x i64> @vand_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1431,8 +1431,8 @@ define <vscale x 2 x i64> @vand_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1499,8 +1499,8 @@ define <vscale x 4 x i64> @vand_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1525,8 +1525,8 @@ define <vscale x 4 x i64> @vand_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1593,8 +1593,8 @@ define <vscale x 8 x i64> @vand_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1619,8 +1619,8 @@ define <vscale x 8 x i64> @vand_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vand.ll b/llvm/test/CodeGen/RISCV/rvv/vand.ll
index 2cf5eab4da6a61..94b4b5748b220d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vand.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vand.ll
@@ -1898,8 +1898,8 @@ define <vscale x 1 x i64> @intrinsic_vand_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i
; RV32-LABEL: intrinsic_vand_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 1 x i64> @intrinsic_vand_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vand_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vand_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i
; RV32-LABEL: intrinsic_vand_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2005,8 +2005,8 @@ define <vscale x 2 x i64> @intrinsic_vand_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vand_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2040,8 +2040,8 @@ define <vscale x 4 x i64> @intrinsic_vand_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i
; RV32-LABEL: intrinsic_vand_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2076,8 +2076,8 @@ define <vscale x 4 x i64> @intrinsic_vand_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vand_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2111,8 +2111,8 @@ define <vscale x 8 x i64> @intrinsic_vand_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i
; RV32-LABEL: intrinsic_vand_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2147,8 +2147,8 @@ define <vscale x 8 x i64> @intrinsic_vand_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vand_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
index f25a3f937f1b4f..fb912c8a28432e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
@@ -1415,8 +1415,8 @@ define <vscale x 1 x i64> @vandn_vx_nxv1i64(i64 %x, <vscale x 1 x i64> %y) {
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV32-NEXT: not a0, a0
; CHECK-RV32-NEXT: not a1, a1
-; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: sw a0, 8(sp)
+; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: addi a0, sp, 8
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1437,8 +1437,8 @@ define <vscale x 1 x i64> @vandn_vx_nxv1i64(i64 %x, <vscale x 1 x i64> %y) {
; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16
; CHECK-ZVKB32-NEXT: not a0, a0
; CHECK-ZVKB32-NEXT: not a1, a1
-; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: sw a0, 8(sp)
+; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: addi a0, sp, 8
; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; CHECK-ZVKB32-NEXT: vlse64.v v9, (a0), zero
@@ -1465,8 +1465,8 @@ define <vscale x 1 x i64> @vandn_vx_swapped_nxv1i64(i64 %x, <vscale x 1 x i64> %
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV32-NEXT: not a0, a0
; CHECK-RV32-NEXT: not a1, a1
-; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: sw a0, 8(sp)
+; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: addi a0, sp, 8
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1487,8 +1487,8 @@ define <vscale x 1 x i64> @vandn_vx_swapped_nxv1i64(i64 %x, <vscale x 1 x i64> %
; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16
; CHECK-ZVKB32-NEXT: not a0, a0
; CHECK-ZVKB32-NEXT: not a1, a1
-; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: sw a0, 8(sp)
+; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: addi a0, sp, 8
; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; CHECK-ZVKB32-NEXT: vlse64.v v9, (a0), zero
@@ -1551,8 +1551,8 @@ define <vscale x 2 x i64> @vandn_vx_nxv2i64(i64 %x, <vscale x 2 x i64> %y) {
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV32-NEXT: not a0, a0
; CHECK-RV32-NEXT: not a1, a1
-; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: sw a0, 8(sp)
+; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: addi a0, sp, 8
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; CHECK-RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1573,8 +1573,8 @@ define <vscale x 2 x i64> @vandn_vx_nxv2i64(i64 %x, <vscale x 2 x i64> %y) {
; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16
; CHECK-ZVKB32-NEXT: not a0, a0
; CHECK-ZVKB32-NEXT: not a1, a1
-; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: sw a0, 8(sp)
+; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: addi a0, sp, 8
; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; CHECK-ZVKB32-NEXT: vlse64.v v10, (a0), zero
@@ -1601,8 +1601,8 @@ define <vscale x 2 x i64> @vandn_vx_swapped_nxv2i64(i64 %x, <vscale x 2 x i64> %
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV32-NEXT: not a0, a0
; CHECK-RV32-NEXT: not a1, a1
-; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: sw a0, 8(sp)
+; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: addi a0, sp, 8
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; CHECK-RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1623,8 +1623,8 @@ define <vscale x 2 x i64> @vandn_vx_swapped_nxv2i64(i64 %x, <vscale x 2 x i64> %
; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16
; CHECK-ZVKB32-NEXT: not a0, a0
; CHECK-ZVKB32-NEXT: not a1, a1
-; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: sw a0, 8(sp)
+; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: addi a0, sp, 8
; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; CHECK-ZVKB32-NEXT: vlse64.v v10, (a0), zero
@@ -1687,8 +1687,8 @@ define <vscale x 4 x i64> @vandn_vx_nxv4i64(i64 %x, <vscale x 4 x i64> %y) {
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV32-NEXT: not a0, a0
; CHECK-RV32-NEXT: not a1, a1
-; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: sw a0, 8(sp)
+; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: addi a0, sp, 8
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; CHECK-RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1709,8 +1709,8 @@ define <vscale x 4 x i64> @vandn_vx_nxv4i64(i64 %x, <vscale x 4 x i64> %y) {
; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16
; CHECK-ZVKB32-NEXT: not a0, a0
; CHECK-ZVKB32-NEXT: not a1, a1
-; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: sw a0, 8(sp)
+; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: addi a0, sp, 8
; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; CHECK-ZVKB32-NEXT: vlse64.v v12, (a0), zero
@@ -1737,8 +1737,8 @@ define <vscale x 4 x i64> @vandn_vx_swapped_nxv4i64(i64 %x, <vscale x 4 x i64> %
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV32-NEXT: not a0, a0
; CHECK-RV32-NEXT: not a1, a1
-; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: sw a0, 8(sp)
+; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: addi a0, sp, 8
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; CHECK-RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1759,8 +1759,8 @@ define <vscale x 4 x i64> @vandn_vx_swapped_nxv4i64(i64 %x, <vscale x 4 x i64> %
; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16
; CHECK-ZVKB32-NEXT: not a0, a0
; CHECK-ZVKB32-NEXT: not a1, a1
-; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: sw a0, 8(sp)
+; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: addi a0, sp, 8
; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; CHECK-ZVKB32-NEXT: vlse64.v v12, (a0), zero
@@ -1823,8 +1823,8 @@ define <vscale x 8 x i64> @vandn_vx_nxv8i64(i64 %x, <vscale x 8 x i64> %y) {
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV32-NEXT: not a0, a0
; CHECK-RV32-NEXT: not a1, a1
-; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: sw a0, 8(sp)
+; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: addi a0, sp, 8
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; CHECK-RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1845,8 +1845,8 @@ define <vscale x 8 x i64> @vandn_vx_nxv8i64(i64 %x, <vscale x 8 x i64> %y) {
; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16
; CHECK-ZVKB32-NEXT: not a0, a0
; CHECK-ZVKB32-NEXT: not a1, a1
-; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: sw a0, 8(sp)
+; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: addi a0, sp, 8
; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; CHECK-ZVKB32-NEXT: vlse64.v v16, (a0), zero
@@ -1873,8 +1873,8 @@ define <vscale x 8 x i64> @vandn_vx_swapped_nxv8i64(i64 %x, <vscale x 8 x i64> %
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV32-NEXT: not a0, a0
; CHECK-RV32-NEXT: not a1, a1
-; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: sw a0, 8(sp)
+; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: addi a0, sp, 8
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; CHECK-RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1895,8 +1895,8 @@ define <vscale x 8 x i64> @vandn_vx_swapped_nxv8i64(i64 %x, <vscale x 8 x i64> %
; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16
; CHECK-ZVKB32-NEXT: not a0, a0
; CHECK-ZVKB32-NEXT: not a1, a1
-; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: sw a0, 8(sp)
+; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: addi a0, sp, 8
; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; CHECK-ZVKB32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vandn-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vandn-vp.ll
index 95866543828fc7..5986033638853b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vandn-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vandn-vp.ll
@@ -1112,8 +1112,8 @@ define <vscale x 1 x i64> @vandn_vx_vp_nxv1i64(i64 %a, <vscale x 1 x i64> %b, <v
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV32-NEXT: not a0, a0
; CHECK-RV32-NEXT: not a1, a1
-; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: sw a0, 8(sp)
+; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: addi a0, sp, 8
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1135,8 +1135,8 @@ define <vscale x 1 x i64> @vandn_vx_vp_nxv1i64(i64 %a, <vscale x 1 x i64> %b, <v
; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16
; CHECK-ZVKB32-NEXT: not a0, a0
; CHECK-ZVKB32-NEXT: not a1, a1
-; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: sw a0, 8(sp)
+; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: addi a0, sp, 8
; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; CHECK-ZVKB32-NEXT: vlse64.v v9, (a0), zero
@@ -1203,8 +1203,8 @@ define <vscale x 2 x i64> @vandn_vx_vp_nxv2i64(i64 %a, <vscale x 2 x i64> %b, <v
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV32-NEXT: not a0, a0
; CHECK-RV32-NEXT: not a1, a1
-; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: sw a0, 8(sp)
+; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: addi a0, sp, 8
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; CHECK-RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1226,8 +1226,8 @@ define <vscale x 2 x i64> @vandn_vx_vp_nxv2i64(i64 %a, <vscale x 2 x i64> %b, <v
; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16
; CHECK-ZVKB32-NEXT: not a0, a0
; CHECK-ZVKB32-NEXT: not a1, a1
-; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: sw a0, 8(sp)
+; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: addi a0, sp, 8
; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; CHECK-ZVKB32-NEXT: vlse64.v v10, (a0), zero
@@ -1294,8 +1294,8 @@ define <vscale x 4 x i64> @vandn_vx_vp_nxv4i64(i64 %a, <vscale x 4 x i64> %b, <v
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV32-NEXT: not a0, a0
; CHECK-RV32-NEXT: not a1, a1
-; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: sw a0, 8(sp)
+; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: addi a0, sp, 8
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; CHECK-RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1317,8 +1317,8 @@ define <vscale x 4 x i64> @vandn_vx_vp_nxv4i64(i64 %a, <vscale x 4 x i64> %b, <v
; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16
; CHECK-ZVKB32-NEXT: not a0, a0
; CHECK-ZVKB32-NEXT: not a1, a1
-; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: sw a0, 8(sp)
+; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: addi a0, sp, 8
; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; CHECK-ZVKB32-NEXT: vlse64.v v12, (a0), zero
@@ -1385,8 +1385,8 @@ define <vscale x 8 x i64> @vandn_vx_vp_nxv8i64(i64 %a, <vscale x 8 x i64> %b, <v
; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16
; CHECK-RV32-NEXT: not a0, a0
; CHECK-RV32-NEXT: not a1, a1
-; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: sw a0, 8(sp)
+; CHECK-RV32-NEXT: sw a1, 12(sp)
; CHECK-RV32-NEXT: addi a0, sp, 8
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; CHECK-RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1408,8 +1408,8 @@ define <vscale x 8 x i64> @vandn_vx_vp_nxv8i64(i64 %a, <vscale x 8 x i64> %b, <v
; CHECK-ZVKB32-NEXT: .cfi_def_cfa_offset 16
; CHECK-ZVKB32-NEXT: not a0, a0
; CHECK-ZVKB32-NEXT: not a1, a1
-; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: sw a0, 8(sp)
+; CHECK-ZVKB32-NEXT: sw a1, 12(sp)
; CHECK-ZVKB32-NEXT: addi a0, sp, 8
; CHECK-ZVKB32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; CHECK-ZVKB32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vandn.ll b/llvm/test/CodeGen/RISCV/rvv/vandn.ll
index a5e29d488da522..de4ad5088d374b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vandn.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vandn.ll
@@ -1898,8 +1898,8 @@ define <vscale x 1 x i64> @intrinsic_vandn_vx_nxv1i64_i64(<vscale x 1 x i64> %0,
; RV32-LABEL: intrinsic_vandn_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 1 x i64> @intrinsic_vandn_mask_vx_nxv1i64_i64(<vscale x 1 x i64
; RV32-LABEL: intrinsic_vandn_mask_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vandn_vx_nxv2i64_i64(<vscale x 2 x i64> %0,
; RV32-LABEL: intrinsic_vandn_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2005,8 +2005,8 @@ define <vscale x 2 x i64> @intrinsic_vandn_mask_vx_nxv2i64_i64(<vscale x 2 x i64
; RV32-LABEL: intrinsic_vandn_mask_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2040,8 +2040,8 @@ define <vscale x 4 x i64> @intrinsic_vandn_vx_nxv4i64_i64(<vscale x 4 x i64> %0,
; RV32-LABEL: intrinsic_vandn_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2076,8 +2076,8 @@ define <vscale x 4 x i64> @intrinsic_vandn_mask_vx_nxv4i64_i64(<vscale x 4 x i64
; RV32-LABEL: intrinsic_vandn_mask_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2111,8 +2111,8 @@ define <vscale x 8 x i64> @intrinsic_vandn_vx_nxv8i64_i64(<vscale x 8 x i64> %0,
; RV32-LABEL: intrinsic_vandn_vx_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2147,8 +2147,8 @@ define <vscale x 8 x i64> @intrinsic_vandn_mask_vx_nxv8i64_i64(<vscale x 8 x i64
; RV32-LABEL: intrinsic_vandn_mask_vx_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vasub.ll b/llvm/test/CodeGen/RISCV/rvv/vasub.ll
index c3cb66f7f230a6..43903ba5833bef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vasub.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vasub.ll
@@ -1938,8 +1938,8 @@ define <vscale x 1 x i64> @intrinsic_vasub_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vasub_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1975,8 +1975,8 @@ define <vscale x 1 x i64> @intrinsic_vasub_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vasub_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2012,8 +2012,8 @@ define <vscale x 2 x i64> @intrinsic_vasub_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vasub_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2049,8 +2049,8 @@ define <vscale x 2 x i64> @intrinsic_vasub_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vasub_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2086,8 +2086,8 @@ define <vscale x 4 x i64> @intrinsic_vasub_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vasub_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2123,8 +2123,8 @@ define <vscale x 4 x i64> @intrinsic_vasub_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vasub_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2160,8 +2160,8 @@ define <vscale x 8 x i64> @intrinsic_vasub_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vasub_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2197,8 +2197,8 @@ define <vscale x 8 x i64> @intrinsic_vasub_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vasub_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vasubu.ll b/llvm/test/CodeGen/RISCV/rvv/vasubu.ll
index 6d790a9ce0277d..cfa0f31857e150 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vasubu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vasubu.ll
@@ -1938,8 +1938,8 @@ define <vscale x 1 x i64> @intrinsic_vasubu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vasubu_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1975,8 +1975,8 @@ define <vscale x 1 x i64> @intrinsic_vasubu_mask_vx_nxv1i64_nxv1i64_i64(<vscale
; RV32-LABEL: intrinsic_vasubu_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2012,8 +2012,8 @@ define <vscale x 2 x i64> @intrinsic_vasubu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vasubu_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2049,8 +2049,8 @@ define <vscale x 2 x i64> @intrinsic_vasubu_mask_vx_nxv2i64_nxv2i64_i64(<vscale
; RV32-LABEL: intrinsic_vasubu_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2086,8 +2086,8 @@ define <vscale x 4 x i64> @intrinsic_vasubu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vasubu_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2123,8 +2123,8 @@ define <vscale x 4 x i64> @intrinsic_vasubu_mask_vx_nxv4i64_nxv4i64_i64(<vscale
; RV32-LABEL: intrinsic_vasubu_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2160,8 +2160,8 @@ define <vscale x 8 x i64> @intrinsic_vasubu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vasubu_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2197,8 +2197,8 @@ define <vscale x 8 x i64> @intrinsic_vasubu_mask_vx_nxv8i64_nxv8i64_i64(<vscale
; RV32-LABEL: intrinsic_vasubu_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vclmul.ll b/llvm/test/CodeGen/RISCV/rvv/vclmul.ll
index 1184d4b96cc5e3..c132145f71af07 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vclmul.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vclmul.ll
@@ -203,8 +203,8 @@ define <vscale x 1 x i64> @intrinsic_vclmul_vx_nxv1i64_i64(<vscale x 1 x i64> %0
; RV32-LABEL: intrinsic_vclmul_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -239,8 +239,8 @@ define <vscale x 1 x i64> @intrinsic_vclmul_mask_vx_nxv1i64_i64(<vscale x 1 x i6
; RV32-LABEL: intrinsic_vclmul_mask_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -274,8 +274,8 @@ define <vscale x 2 x i64> @intrinsic_vclmul_vx_nxv2i64_i64(<vscale x 2 x i64> %0
; RV32-LABEL: intrinsic_vclmul_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -310,8 +310,8 @@ define <vscale x 2 x i64> @intrinsic_vclmul_mask_vx_nxv2i64_i64(<vscale x 2 x i6
; RV32-LABEL: intrinsic_vclmul_mask_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -345,8 +345,8 @@ define <vscale x 4 x i64> @intrinsic_vclmul_vx_nxv4i64_i64(<vscale x 4 x i64> %0
; RV32-LABEL: intrinsic_vclmul_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -381,8 +381,8 @@ define <vscale x 4 x i64> @intrinsic_vclmul_mask_vx_nxv4i64_i64(<vscale x 4 x i6
; RV32-LABEL: intrinsic_vclmul_mask_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -416,8 +416,8 @@ define <vscale x 8 x i64> @intrinsic_vclmul_vx_nxv8i64_i64(<vscale x 8 x i64> %0
; RV32-LABEL: intrinsic_vclmul_vx_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -452,8 +452,8 @@ define <vscale x 8 x i64> @intrinsic_vclmul_mask_vx_nxv8i64_i64(<vscale x 8 x i6
; RV32-LABEL: intrinsic_vclmul_mask_vx_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vclmulh.ll b/llvm/test/CodeGen/RISCV/rvv/vclmulh.ll
index 98befd81ace0c6..287c8e95d1f401 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vclmulh.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vclmulh.ll
@@ -203,8 +203,8 @@ define <vscale x 1 x i64> @intrinsic_vclmulh_vx_nxv1i64_i64(<vscale x 1 x i64> %
; RV32-LABEL: intrinsic_vclmulh_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -239,8 +239,8 @@ define <vscale x 1 x i64> @intrinsic_vclmulh_mask_vx_nxv1i64_i64(<vscale x 1 x i
; RV32-LABEL: intrinsic_vclmulh_mask_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -274,8 +274,8 @@ define <vscale x 2 x i64> @intrinsic_vclmulh_vx_nxv2i64_i64(<vscale x 2 x i64> %
; RV32-LABEL: intrinsic_vclmulh_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -310,8 +310,8 @@ define <vscale x 2 x i64> @intrinsic_vclmulh_mask_vx_nxv2i64_i64(<vscale x 2 x i
; RV32-LABEL: intrinsic_vclmulh_mask_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -345,8 +345,8 @@ define <vscale x 4 x i64> @intrinsic_vclmulh_vx_nxv4i64_i64(<vscale x 4 x i64> %
; RV32-LABEL: intrinsic_vclmulh_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -381,8 +381,8 @@ define <vscale x 4 x i64> @intrinsic_vclmulh_mask_vx_nxv4i64_i64(<vscale x 4 x i
; RV32-LABEL: intrinsic_vclmulh_mask_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -416,8 +416,8 @@ define <vscale x 8 x i64> @intrinsic_vclmulh_vx_nxv8i64_i64(<vscale x 8 x i64> %
; RV32-LABEL: intrinsic_vclmulh_vx_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -452,8 +452,8 @@ define <vscale x 8 x i64> @intrinsic_vclmulh_mask_vx_nxv8i64_i64(<vscale x 8 x i
; RV32-LABEL: intrinsic_vclmulh_mask_vx_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
index ef9b2104b2d2d2..24ce211f0a984f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
@@ -770,8 +770,8 @@ define <vscale x 1 x i64> @vdiv_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -797,10 +797,10 @@ define <vscale x 1 x i64> @vdiv_vi_nxv1i64_0(<vscale x 1 x i64> %va) {
; RV32-V-NEXT: .cfi_def_cfa_offset 16
; RV32-V-NEXT: lui a0, 748983
; RV32-V-NEXT: addi a0, a0, -586
+; RV32-V-NEXT: lui a1, 898779
+; RV32-V-NEXT: addi a1, a1, 1755
+; RV32-V-NEXT: sw a1, 8(sp)
; RV32-V-NEXT: sw a0, 12(sp)
-; RV32-V-NEXT: lui a0, 898779
-; RV32-V-NEXT: addi a0, a0, 1755
-; RV32-V-NEXT: sw a0, 8(sp)
; RV32-V-NEXT: addi a0, sp, 8
; RV32-V-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-V-NEXT: vlse64.v v9, (a0), zero
@@ -849,8 +849,8 @@ define <vscale x 2 x i64> @vdiv_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -876,10 +876,10 @@ define <vscale x 2 x i64> @vdiv_vi_nxv2i64_0(<vscale x 2 x i64> %va) {
; RV32-V-NEXT: .cfi_def_cfa_offset 16
; RV32-V-NEXT: lui a0, 748983
; RV32-V-NEXT: addi a0, a0, -586
+; RV32-V-NEXT: lui a1, 898779
+; RV32-V-NEXT: addi a1, a1, 1755
+; RV32-V-NEXT: sw a1, 8(sp)
; RV32-V-NEXT: sw a0, 12(sp)
-; RV32-V-NEXT: lui a0, 898779
-; RV32-V-NEXT: addi a0, a0, 1755
-; RV32-V-NEXT: sw a0, 8(sp)
; RV32-V-NEXT: addi a0, sp, 8
; RV32-V-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-V-NEXT: vlse64.v v10, (a0), zero
@@ -928,8 +928,8 @@ define <vscale x 4 x i64> @vdiv_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -955,10 +955,10 @@ define <vscale x 4 x i64> @vdiv_vi_nxv4i64_0(<vscale x 4 x i64> %va) {
; RV32-V-NEXT: .cfi_def_cfa_offset 16
; RV32-V-NEXT: lui a0, 748983
; RV32-V-NEXT: addi a0, a0, -586
+; RV32-V-NEXT: lui a1, 898779
+; RV32-V-NEXT: addi a1, a1, 1755
+; RV32-V-NEXT: sw a1, 8(sp)
; RV32-V-NEXT: sw a0, 12(sp)
-; RV32-V-NEXT: lui a0, 898779
-; RV32-V-NEXT: addi a0, a0, 1755
-; RV32-V-NEXT: sw a0, 8(sp)
; RV32-V-NEXT: addi a0, sp, 8
; RV32-V-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-V-NEXT: vlse64.v v12, (a0), zero
@@ -1007,8 +1007,8 @@ define <vscale x 8 x i64> @vdiv_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1034,10 +1034,10 @@ define <vscale x 8 x i64> @vdiv_vi_nxv8i64_0(<vscale x 8 x i64> %va) {
; RV32-V-NEXT: .cfi_def_cfa_offset 16
; RV32-V-NEXT: lui a0, 748983
; RV32-V-NEXT: addi a0, a0, -586
+; RV32-V-NEXT: lui a1, 898779
+; RV32-V-NEXT: addi a1, a1, 1755
+; RV32-V-NEXT: sw a1, 8(sp)
; RV32-V-NEXT: sw a0, 12(sp)
-; RV32-V-NEXT: lui a0, 898779
-; RV32-V-NEXT: addi a0, a0, 1755
-; RV32-V-NEXT: sw a0, 8(sp)
; RV32-V-NEXT: addi a0, sp, 8
; RV32-V-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-V-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll
index a4b7ca7f39768f..061c2d1066a141 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll
@@ -892,8 +892,8 @@ define <vscale x 1 x i64> @vdiv_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -918,8 +918,8 @@ define <vscale x 1 x i64> @vdiv_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -966,8 +966,8 @@ define <vscale x 2 x i64> @vdiv_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -992,8 +992,8 @@ define <vscale x 2 x i64> @vdiv_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1040,8 +1040,8 @@ define <vscale x 4 x i64> @vdiv_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1066,8 +1066,8 @@ define <vscale x 4 x i64> @vdiv_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1114,8 +1114,8 @@ define <vscale x 8 x i64> @vdiv_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1140,8 +1140,8 @@ define <vscale x 8 x i64> @vdiv_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv.ll
index fd32afcc7ce6e2..a2d3a39d57d6b3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdiv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdiv.ll
@@ -1898,8 +1898,8 @@ define <vscale x 1 x i64> @intrinsic_vdiv_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i
; RV32-LABEL: intrinsic_vdiv_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 1 x i64> @intrinsic_vdiv_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vdiv_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vdiv_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i
; RV32-LABEL: intrinsic_vdiv_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2005,8 +2005,8 @@ define <vscale x 2 x i64> @intrinsic_vdiv_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vdiv_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2040,8 +2040,8 @@ define <vscale x 4 x i64> @intrinsic_vdiv_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i
; RV32-LABEL: intrinsic_vdiv_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2076,8 +2076,8 @@ define <vscale x 4 x i64> @intrinsic_vdiv_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vdiv_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2111,8 +2111,8 @@ define <vscale x 8 x i64> @intrinsic_vdiv_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i
; RV32-LABEL: intrinsic_vdiv_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2147,8 +2147,8 @@ define <vscale x 8 x i64> @intrinsic_vdiv_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vdiv_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
index 4f2fb937ca73f4..c8ec465e3cd3e8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
@@ -662,8 +662,8 @@ define <vscale x 1 x i64> @vdivu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -688,9 +688,9 @@ define <vscale x 1 x i64> @vdivu_vi_nxv1i64_0(<vscale x 1 x i64> %va) {
; RV32-V-NEXT: addi sp, sp, -16
; RV32-V-NEXT: .cfi_def_cfa_offset 16
; RV32-V-NEXT: lui a0, 131072
+; RV32-V-NEXT: li a1, 1
+; RV32-V-NEXT: sw a1, 8(sp)
; RV32-V-NEXT: sw a0, 12(sp)
-; RV32-V-NEXT: li a0, 1
-; RV32-V-NEXT: sw a0, 8(sp)
; RV32-V-NEXT: addi a0, sp, 8
; RV32-V-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-V-NEXT: vlse64.v v9, (a0), zero
@@ -759,8 +759,8 @@ define <vscale x 2 x i64> @vdivu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -785,9 +785,9 @@ define <vscale x 2 x i64> @vdivu_vi_nxv2i64_0(<vscale x 2 x i64> %va) {
; RV32-V-NEXT: addi sp, sp, -16
; RV32-V-NEXT: .cfi_def_cfa_offset 16
; RV32-V-NEXT: lui a0, 131072
+; RV32-V-NEXT: li a1, 1
+; RV32-V-NEXT: sw a1, 8(sp)
; RV32-V-NEXT: sw a0, 12(sp)
-; RV32-V-NEXT: li a0, 1
-; RV32-V-NEXT: sw a0, 8(sp)
; RV32-V-NEXT: addi a0, sp, 8
; RV32-V-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-V-NEXT: vlse64.v v10, (a0), zero
@@ -856,8 +856,8 @@ define <vscale x 4 x i64> @vdivu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -882,9 +882,9 @@ define <vscale x 4 x i64> @vdivu_vi_nxv4i64_0(<vscale x 4 x i64> %va) {
; RV32-V-NEXT: addi sp, sp, -16
; RV32-V-NEXT: .cfi_def_cfa_offset 16
; RV32-V-NEXT: lui a0, 131072
+; RV32-V-NEXT: li a1, 1
+; RV32-V-NEXT: sw a1, 8(sp)
; RV32-V-NEXT: sw a0, 12(sp)
-; RV32-V-NEXT: li a0, 1
-; RV32-V-NEXT: sw a0, 8(sp)
; RV32-V-NEXT: addi a0, sp, 8
; RV32-V-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-V-NEXT: vlse64.v v12, (a0), zero
@@ -953,8 +953,8 @@ define <vscale x 8 x i64> @vdivu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -979,9 +979,9 @@ define <vscale x 8 x i64> @vdivu_vi_nxv8i64_0(<vscale x 8 x i64> %va) {
; RV32-V-NEXT: addi sp, sp, -16
; RV32-V-NEXT: .cfi_def_cfa_offset 16
; RV32-V-NEXT: lui a0, 131072
+; RV32-V-NEXT: li a1, 1
+; RV32-V-NEXT: sw a1, 8(sp)
; RV32-V-NEXT: sw a0, 12(sp)
-; RV32-V-NEXT: li a0, 1
-; RV32-V-NEXT: sw a0, 8(sp)
; RV32-V-NEXT: addi a0, sp, 8
; RV32-V-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-V-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll
index 67c3f9dbf2869a..25f6a2300f6739 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll
@@ -891,8 +891,8 @@ define <vscale x 1 x i64> @vdivu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -917,8 +917,8 @@ define <vscale x 1 x i64> @vdivu_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -965,8 +965,8 @@ define <vscale x 2 x i64> @vdivu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -991,8 +991,8 @@ define <vscale x 2 x i64> @vdivu_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1039,8 +1039,8 @@ define <vscale x 4 x i64> @vdivu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1065,8 +1065,8 @@ define <vscale x 4 x i64> @vdivu_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1113,8 +1113,8 @@ define <vscale x 8 x i64> @vdivu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1139,8 +1139,8 @@ define <vscale x 8 x i64> @vdivu_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu.ll
index 99d63c218d8e60..85beeb1d8a37ba 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu.ll
@@ -1898,8 +1898,8 @@ define <vscale x 1 x i64> @intrinsic_vdivu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vdivu_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 1 x i64> @intrinsic_vdivu_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vdivu_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vdivu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vdivu_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2005,8 +2005,8 @@ define <vscale x 2 x i64> @intrinsic_vdivu_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vdivu_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2040,8 +2040,8 @@ define <vscale x 4 x i64> @intrinsic_vdivu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vdivu_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2076,8 +2076,8 @@ define <vscale x 4 x i64> @intrinsic_vdivu_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vdivu_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2111,8 +2111,8 @@ define <vscale x 8 x i64> @intrinsic_vdivu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vdivu_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2147,8 +2147,8 @@ define <vscale x 8 x i64> @intrinsic_vdivu_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vdivu_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll b/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll
index 122ac13cb25731..b19995a5aba9a4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll
@@ -32,8 +32,8 @@ define void @vec3_setcc_crash(ptr %in, ptr %out) {
; RV32-NEXT: or a0, a0, a3
; RV32-NEXT: neg a3, a6
; RV32-NEXT: and a2, a3, a2
-; RV32-NEXT: sb a2, 2(a1)
; RV32-NEXT: sh a0, 0(a1)
+; RV32-NEXT: sb a2, 2(a1)
; RV32-NEXT: ret
;
; RV64-LABEL: vec3_setcc_crash:
@@ -59,8 +59,8 @@ define void @vec3_setcc_crash(ptr %in, ptr %out) {
; RV64-NEXT: or a0, a0, a3
; RV64-NEXT: negw a3, a6
; RV64-NEXT: and a2, a3, a2
-; RV64-NEXT: sb a2, 2(a1)
; RV64-NEXT: sh a0, 0(a1)
+; RV64-NEXT: sb a2, 2(a1)
; RV64-NEXT: ret
%a = load <3 x i8>, ptr %in
%cmp = icmp sgt <3 x i8> %a, zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll
index e232ac255c56f0..9fe9af478f0b2c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll
@@ -1651,8 +1651,8 @@ define <vscale x 1 x i64> @vmacc_vx_nxv1i64(<vscale x 1 x i64> %a, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1681,8 +1681,8 @@ define <vscale x 1 x i64> @vmacc_vx_nxv1i64_unmasked(<vscale x 1 x i64> %a, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1724,8 +1724,8 @@ define <vscale x 1 x i64> @vmacc_vx_nxv1i64_ta(<vscale x 1 x i64> %a, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1785,8 +1785,8 @@ define <vscale x 2 x i64> @vmacc_vx_nxv2i64(<vscale x 2 x i64> %a, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1815,8 +1815,8 @@ define <vscale x 2 x i64> @vmacc_vx_nxv2i64_unmasked(<vscale x 2 x i64> %a, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1858,8 +1858,8 @@ define <vscale x 2 x i64> @vmacc_vx_nxv2i64_ta(<vscale x 2 x i64> %a, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1919,8 +1919,8 @@ define <vscale x 4 x i64> @vmacc_vx_nxv4i64(<vscale x 4 x i64> %a, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1949,8 +1949,8 @@ define <vscale x 4 x i64> @vmacc_vx_nxv4i64_unmasked(<vscale x 4 x i64> %a, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1992,8 +1992,8 @@ define <vscale x 4 x i64> @vmacc_vx_nxv4i64_ta(<vscale x 4 x i64> %a, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2055,8 +2055,8 @@ define <vscale x 8 x i64> @vmacc_vx_nxv8i64(<vscale x 8 x i64> %a, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -2085,8 +2085,8 @@ define <vscale x 8 x i64> @vmacc_vx_nxv8i64_unmasked(<vscale x 8 x i64> %a, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -2129,8 +2129,8 @@ define <vscale x 8 x i64> @vmacc_vx_nxv8i64_ta(<vscale x 8 x i64> %a, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmacc.ll b/llvm/test/CodeGen/RISCV/rvv/vmacc.ll
index 622fcb5e9f7156..b8b4baf53b677d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmacc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmacc.ll
@@ -1566,8 +1566,8 @@ define <vscale x 1 x i64> @intrinsic_vmacc_vx_nxv1i64_i64_nxv1i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vmacc_vx_nxv1i64_i64_nxv1i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1602,8 +1602,8 @@ define <vscale x 1 x i64> @intrinsic_vmacc_mask_vx_nxv1i64_i64_nxv1i64(<vscale x
; RV32-LABEL: intrinsic_vmacc_mask_vx_nxv1i64_i64_nxv1i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1639,8 +1639,8 @@ define <vscale x 2 x i64> @intrinsic_vmacc_vx_nxv2i64_i64_nxv2i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vmacc_vx_nxv2i64_i64_nxv2i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1675,8 +1675,8 @@ define <vscale x 2 x i64> @intrinsic_vmacc_mask_vx_nxv2i64_i64_nxv2i64(<vscale x
; RV32-LABEL: intrinsic_vmacc_mask_vx_nxv2i64_i64_nxv2i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1712,8 +1712,8 @@ define <vscale x 4 x i64> @intrinsic_vmacc_vx_nxv4i64_i64_nxv4i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vmacc_vx_nxv4i64_i64_nxv4i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1748,8 +1748,8 @@ define <vscale x 4 x i64> @intrinsic_vmacc_mask_vx_nxv4i64_i64_nxv4i64(<vscale x
; RV32-LABEL: intrinsic_vmacc_mask_vx_nxv4i64_i64_nxv4i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in.ll b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in.ll
index eb09de906fac53..31c12db79a946f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in.ll
@@ -934,8 +934,8 @@ define <vscale x 1 x i1> @intrinsic_vmadc.carry.in_vxm_nxv1i1_nxv1i64_i64(<vscal
; RV32-LABEL: intrinsic_vmadc.carry.in_vxm_nxv1i1_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -970,8 +970,8 @@ define <vscale x 2 x i1> @intrinsic_vmadc.carry.in_vxm_nxv2i1_nxv2i64_i64(<vscal
; RV32-LABEL: intrinsic_vmadc.carry.in_vxm_nxv2i1_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1006,8 +1006,8 @@ define <vscale x 4 x i1> @intrinsic_vmadc.carry.in_vxm_nxv4i1_nxv4i64_i64(<vscal
; RV32-LABEL: intrinsic_vmadc.carry.in_vxm_nxv4i1_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1042,8 +1042,8 @@ define <vscale x 8 x i1> @intrinsic_vmadc.carry.in_vxm_nxv8i1_nxv8i64_i64(<vscal
; RV32-LABEL: intrinsic_vmadc.carry.in_vxm_nxv8i1_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadc.ll b/llvm/test/CodeGen/RISCV/rvv/vmadc.ll
index 5ec84f89f67c7c..4777903558e4cb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmadc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmadc.ll
@@ -813,8 +813,8 @@ define <vscale x 1 x i1> @intrinsic_vmadc_vx_nxv1i1_nxv1i64_i64(<vscale x 1 x i6
; RV32-LABEL: intrinsic_vmadc_vx_nxv1i1_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -845,8 +845,8 @@ define <vscale x 2 x i1> @intrinsic_vmadc_vx_nxv2i1_nxv2i64_i64(<vscale x 2 x i6
; RV32-LABEL: intrinsic_vmadc_vx_nxv2i1_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -877,8 +877,8 @@ define <vscale x 4 x i1> @intrinsic_vmadc_vx_nxv4i1_nxv4i64_i64(<vscale x 4 x i6
; RV32-LABEL: intrinsic_vmadc_vx_nxv4i1_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -909,8 +909,8 @@ define <vscale x 8 x i1> @intrinsic_vmadc_vx_nxv8i1_nxv8i64_i64(<vscale x 8 x i6
; RV32-LABEL: intrinsic_vmadc_vx_nxv8i1_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll
index e1988c058fac34..87877efeb77113 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll
@@ -458,8 +458,8 @@ define <vscale x 1 x i64> @vmadd_vx_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -495,8 +495,8 @@ define <vscale x 2 x i64> @vmadd_vx_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -532,8 +532,8 @@ define <vscale x 4 x i64> @vmadd_vx_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -570,8 +570,8 @@ define <vscale x 8 x i64> @vmadd_vx_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll
index 5401bf7db49e38..ef74b26ba2646d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll
@@ -1635,8 +1635,8 @@ define <vscale x 1 x i64> @vmadd_vx_nxv1i64(<vscale x 1 x i64> %a, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1665,8 +1665,8 @@ define <vscale x 1 x i64> @vmadd_vx_nxv1i64_unmasked(<vscale x 1 x i64> %a, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1708,8 +1708,8 @@ define <vscale x 1 x i64> @vmadd_vx_nxv1i64_ta(<vscale x 1 x i64> %a, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1770,8 +1770,8 @@ define <vscale x 2 x i64> @vmadd_vx_nxv2i64(<vscale x 2 x i64> %a, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1800,8 +1800,8 @@ define <vscale x 2 x i64> @vmadd_vx_nxv2i64_unmasked(<vscale x 2 x i64> %a, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1843,8 +1843,8 @@ define <vscale x 2 x i64> @vmadd_vx_nxv2i64_ta(<vscale x 2 x i64> %a, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1905,8 +1905,8 @@ define <vscale x 4 x i64> @vmadd_vx_nxv4i64(<vscale x 4 x i64> %a, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1935,8 +1935,8 @@ define <vscale x 4 x i64> @vmadd_vx_nxv4i64_unmasked(<vscale x 4 x i64> %a, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1978,8 +1978,8 @@ define <vscale x 4 x i64> @vmadd_vx_nxv4i64_ta(<vscale x 4 x i64> %a, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2042,8 +2042,8 @@ define <vscale x 8 x i64> @vmadd_vx_nxv8i64(<vscale x 8 x i64> %a, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -2072,8 +2072,8 @@ define <vscale x 8 x i64> @vmadd_vx_nxv8i64_unmasked(<vscale x 8 x i64> %a, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -2116,8 +2116,8 @@ define <vscale x 8 x i64> @vmadd_vx_nxv8i64_ta(<vscale x 8 x i64> %a, i64 %b, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd.ll
index 184ce741aba9a6..829d082ab7a4fd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmadd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmadd.ll
@@ -1566,8 +1566,8 @@ define <vscale x 1 x i64> @intrinsic_vmadd_vx_nxv1i64_i64_nxv1i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vmadd_vx_nxv1i64_i64_nxv1i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1602,8 +1602,8 @@ define <vscale x 1 x i64> @intrinsic_vmadd_mask_vx_nxv1i64_i64_nxv1i64(<vscale x
; RV32-LABEL: intrinsic_vmadd_mask_vx_nxv1i64_i64_nxv1i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1639,8 +1639,8 @@ define <vscale x 2 x i64> @intrinsic_vmadd_vx_nxv2i64_i64_nxv2i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vmadd_vx_nxv2i64_i64_nxv2i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1675,8 +1675,8 @@ define <vscale x 2 x i64> @intrinsic_vmadd_mask_vx_nxv2i64_i64_nxv2i64(<vscale x
; RV32-LABEL: intrinsic_vmadd_mask_vx_nxv2i64_i64_nxv2i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1712,8 +1712,8 @@ define <vscale x 4 x i64> @intrinsic_vmadd_vx_nxv4i64_i64_nxv4i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vmadd_vx_nxv4i64_i64_nxv4i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1748,8 +1748,8 @@ define <vscale x 4 x i64> @intrinsic_vmadd_mask_vx_nxv4i64_i64_nxv4i64(<vscale x
; RV32-LABEL: intrinsic_vmadd_mask_vx_nxv4i64_i64_nxv4i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll
index 52720755dd5b5b..edf4211101e882 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll
@@ -666,8 +666,8 @@ define <vscale x 1 x i64> @vmax_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -715,8 +715,8 @@ define <vscale x 2 x i64> @vmax_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -764,8 +764,8 @@ define <vscale x 4 x i64> @vmax_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -813,8 +813,8 @@ define <vscale x 8 x i64> @vmax_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
index f65e708f5303cc..85cd97019fdfb1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll
@@ -1123,8 +1123,8 @@ define <vscale x 1 x i64> @vmax_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1149,8 +1149,8 @@ define <vscale x 1 x i64> @vmax_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1197,8 +1197,8 @@ define <vscale x 2 x i64> @vmax_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1223,8 +1223,8 @@ define <vscale x 2 x i64> @vmax_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1271,8 +1271,8 @@ define <vscale x 4 x i64> @vmax_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1297,8 +1297,8 @@ define <vscale x 4 x i64> @vmax_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1345,8 +1345,8 @@ define <vscale x 8 x i64> @vmax_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1371,8 +1371,8 @@ define <vscale x 8 x i64> @vmax_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax.ll b/llvm/test/CodeGen/RISCV/rvv/vmax.ll
index 86f17dc20f23e7..5acca2b80919e7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmax.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmax.ll
@@ -1898,8 +1898,8 @@ define <vscale x 1 x i64> @intrinsic_vmax_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i
; RV32-LABEL: intrinsic_vmax_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 1 x i64> @intrinsic_vmax_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmax_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vmax_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i
; RV32-LABEL: intrinsic_vmax_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2005,8 +2005,8 @@ define <vscale x 2 x i64> @intrinsic_vmax_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmax_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2040,8 +2040,8 @@ define <vscale x 4 x i64> @intrinsic_vmax_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i
; RV32-LABEL: intrinsic_vmax_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2076,8 +2076,8 @@ define <vscale x 4 x i64> @intrinsic_vmax_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmax_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2111,8 +2111,8 @@ define <vscale x 8 x i64> @intrinsic_vmax_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i
; RV32-LABEL: intrinsic_vmax_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2147,8 +2147,8 @@ define <vscale x 8 x i64> @intrinsic_vmax_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmax_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll
index 8eb70fbc91fa54..e7fd3699f0fed9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll
@@ -666,8 +666,8 @@ define <vscale x 1 x i64> @vmax_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -715,8 +715,8 @@ define <vscale x 2 x i64> @vmax_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -764,8 +764,8 @@ define <vscale x 4 x i64> @vmax_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -813,8 +813,8 @@ define <vscale x 8 x i64> @vmax_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
index df1ad58e5ecbde..40e11e62110935 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll
@@ -1122,8 +1122,8 @@ define <vscale x 1 x i64> @vmaxu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1148,8 +1148,8 @@ define <vscale x 1 x i64> @vmaxu_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1196,8 +1196,8 @@ define <vscale x 2 x i64> @vmaxu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1222,8 +1222,8 @@ define <vscale x 2 x i64> @vmaxu_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1270,8 +1270,8 @@ define <vscale x 4 x i64> @vmaxu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1296,8 +1296,8 @@ define <vscale x 4 x i64> @vmaxu_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1344,8 +1344,8 @@ define <vscale x 8 x i64> @vmaxu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1370,8 +1370,8 @@ define <vscale x 8 x i64> @vmaxu_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu.ll
index e2a5b95b2b4ad1..f1a7cedbb4417d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmaxu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu.ll
@@ -1898,8 +1898,8 @@ define <vscale x 1 x i64> @intrinsic_vmaxu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vmaxu_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 1 x i64> @intrinsic_vmaxu_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmaxu_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vmaxu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vmaxu_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2005,8 +2005,8 @@ define <vscale x 2 x i64> @intrinsic_vmaxu_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmaxu_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2040,8 +2040,8 @@ define <vscale x 4 x i64> @intrinsic_vmaxu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vmaxu_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2076,8 +2076,8 @@ define <vscale x 4 x i64> @intrinsic_vmaxu_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmaxu_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2111,8 +2111,8 @@ define <vscale x 8 x i64> @intrinsic_vmaxu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vmaxu_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2147,8 +2147,8 @@ define <vscale x 8 x i64> @intrinsic_vmaxu_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmaxu_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll
index 7f526a21deac1c..fc7ed2979a06dc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll
@@ -666,8 +666,8 @@ define <vscale x 1 x i64> @vmin_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -715,8 +715,8 @@ define <vscale x 2 x i64> @vmin_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -764,8 +764,8 @@ define <vscale x 4 x i64> @vmin_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -813,8 +813,8 @@ define <vscale x 8 x i64> @vmin_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
index 0bf0638633aa45..2e4ace7212777c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll
@@ -1123,8 +1123,8 @@ define <vscale x 1 x i64> @vmin_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1149,8 +1149,8 @@ define <vscale x 1 x i64> @vmin_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1197,8 +1197,8 @@ define <vscale x 2 x i64> @vmin_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1223,8 +1223,8 @@ define <vscale x 2 x i64> @vmin_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1271,8 +1271,8 @@ define <vscale x 4 x i64> @vmin_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1297,8 +1297,8 @@ define <vscale x 4 x i64> @vmin_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1345,8 +1345,8 @@ define <vscale x 8 x i64> @vmin_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1371,8 +1371,8 @@ define <vscale x 8 x i64> @vmin_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin.ll b/llvm/test/CodeGen/RISCV/rvv/vmin.ll
index 311c9f9f1526bd..002c0081c8e32a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmin.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmin.ll
@@ -1898,8 +1898,8 @@ define <vscale x 1 x i64> @intrinsic_vmin_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i
; RV32-LABEL: intrinsic_vmin_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 1 x i64> @intrinsic_vmin_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmin_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vmin_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i
; RV32-LABEL: intrinsic_vmin_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2005,8 +2005,8 @@ define <vscale x 2 x i64> @intrinsic_vmin_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmin_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2040,8 +2040,8 @@ define <vscale x 4 x i64> @intrinsic_vmin_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i
; RV32-LABEL: intrinsic_vmin_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2076,8 +2076,8 @@ define <vscale x 4 x i64> @intrinsic_vmin_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmin_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2111,8 +2111,8 @@ define <vscale x 8 x i64> @intrinsic_vmin_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i
; RV32-LABEL: intrinsic_vmin_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2147,8 +2147,8 @@ define <vscale x 8 x i64> @intrinsic_vmin_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmin_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll
index d22a7dcccf0adc..032faed878a03d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll
@@ -666,8 +666,8 @@ define <vscale x 1 x i64> @vmin_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -715,8 +715,8 @@ define <vscale x 2 x i64> @vmin_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -764,8 +764,8 @@ define <vscale x 4 x i64> @vmin_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -813,8 +813,8 @@ define <vscale x 8 x i64> @vmin_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
index 2acebdf2e646d4..17a10868ff05b5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll
@@ -1122,8 +1122,8 @@ define <vscale x 1 x i64> @vminu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1148,8 +1148,8 @@ define <vscale x 1 x i64> @vminu_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1196,8 +1196,8 @@ define <vscale x 2 x i64> @vminu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1222,8 +1222,8 @@ define <vscale x 2 x i64> @vminu_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1270,8 +1270,8 @@ define <vscale x 4 x i64> @vminu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1296,8 +1296,8 @@ define <vscale x 4 x i64> @vminu_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1344,8 +1344,8 @@ define <vscale x 8 x i64> @vminu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1370,8 +1370,8 @@ define <vscale x 8 x i64> @vminu_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu.ll b/llvm/test/CodeGen/RISCV/rvv/vminu.ll
index b513331bc0b8fc..3d2f6d00e68132 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vminu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vminu.ll
@@ -1898,8 +1898,8 @@ define <vscale x 1 x i64> @intrinsic_vminu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vminu_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 1 x i64> @intrinsic_vminu_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vminu_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vminu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vminu_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2005,8 +2005,8 @@ define <vscale x 2 x i64> @intrinsic_vminu_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vminu_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2040,8 +2040,8 @@ define <vscale x 4 x i64> @intrinsic_vminu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vminu_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2076,8 +2076,8 @@ define <vscale x 4 x i64> @intrinsic_vminu_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vminu_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2111,8 +2111,8 @@ define <vscale x 8 x i64> @intrinsic_vminu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vminu_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2147,8 +2147,8 @@ define <vscale x 8 x i64> @intrinsic_vminu_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vminu_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in.ll
index b3af0d3a6f3054..9ce7d68ba40125 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in.ll
@@ -934,8 +934,8 @@ define <vscale x 1 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv1i1_nxv1i64_i64(<vsca
; RV32-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv1i1_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -970,8 +970,8 @@ define <vscale x 2 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv2i1_nxv2i64_i64(<vsca
; RV32-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv2i1_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1006,8 +1006,8 @@ define <vscale x 4 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv4i1_nxv4i64_i64(<vsca
; RV32-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv4i1_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1042,8 +1042,8 @@ define <vscale x 8 x i1> @intrinsic_vmsbc.borrow.in_vxm_nxv8i1_nxv8i64_i64(<vsca
; RV32-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv8i1_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbc.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbc.ll
index a4785a05de7821..8c870a9332646e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsbc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsbc.ll
@@ -813,8 +813,8 @@ define <vscale x 1 x i1> @intrinsic_vmsbc_vx_nxv1i1_nxv1i64_i64(<vscale x 1 x i6
; RV32-LABEL: intrinsic_vmsbc_vx_nxv1i1_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -845,8 +845,8 @@ define <vscale x 2 x i1> @intrinsic_vmsbc_vx_nxv2i1_nxv2i64_i64(<vscale x 2 x i6
; RV32-LABEL: intrinsic_vmsbc_vx_nxv2i1_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -877,8 +877,8 @@ define <vscale x 4 x i1> @intrinsic_vmsbc_vx_nxv4i1_nxv4i64_i64(<vscale x 4 x i6
; RV32-LABEL: intrinsic_vmsbc_vx_nxv4i1_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -909,8 +909,8 @@ define <vscale x 8 x i1> @intrinsic_vmsbc_vx_nxv8i1_nxv8i64_i64(<vscale x 8 x i6
; RV32-LABEL: intrinsic_vmsbc_vx_nxv8i1_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll
index 9f181f7a30ebed..c5769e0d1e5192 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll
@@ -1654,8 +1654,8 @@ define <vscale x 1 x i1> @intrinsic_vmseq_vx_nxv1i64_i64(<vscale x 1 x i64> %0,
; RV32-LABEL: intrinsic_vmseq_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1688,8 +1688,8 @@ define <vscale x 1 x i1> @intrinsic_vmseq_mask_vx_nxv1i64_i64(<vscale x 1 x i1>
; RV32-LABEL: intrinsic_vmseq_mask_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v11, (a0), zero
@@ -1728,8 +1728,8 @@ define <vscale x 2 x i1> @intrinsic_vmseq_vx_nxv2i64_i64(<vscale x 2 x i64> %0,
; RV32-LABEL: intrinsic_vmseq_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1762,8 +1762,8 @@ define <vscale x 2 x i1> @intrinsic_vmseq_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
; RV32-LABEL: intrinsic_vmseq_mask_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1802,8 +1802,8 @@ define <vscale x 4 x i1> @intrinsic_vmseq_vx_nxv4i64_i64(<vscale x 4 x i64> %0,
; RV32-LABEL: intrinsic_vmseq_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1836,8 +1836,8 @@ define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
; RV32-LABEL: intrinsic_vmseq_mask_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
index 75fc407abbc2f3..1ec304609699a6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll
@@ -1669,8 +1669,8 @@ define <vscale x 1 x i1> @intrinsic_vmsge_vx_nxv1i64_i64(<vscale x 1 x i64> %0,
; RV32-LABEL: intrinsic_vmsge_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1704,8 +1704,8 @@ define <vscale x 1 x i1> @intrinsic_vmsge_mask_vx_nxv1i64_i64(<vscale x 1 x i1>
; RV32-LABEL: intrinsic_vmsge_mask_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v11, (a0), zero
@@ -1744,8 +1744,8 @@ define <vscale x 2 x i1> @intrinsic_vmsge_vx_nxv2i64_i64(<vscale x 2 x i64> %0,
; RV32-LABEL: intrinsic_vmsge_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1779,8 +1779,8 @@ define <vscale x 2 x i1> @intrinsic_vmsge_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
; RV32-LABEL: intrinsic_vmsge_mask_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1819,8 +1819,8 @@ define <vscale x 4 x i1> @intrinsic_vmsge_vx_nxv4i64_i64(<vscale x 4 x i64> %0,
; RV32-LABEL: intrinsic_vmsge_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1854,8 +1854,8 @@ define <vscale x 4 x i1> @intrinsic_vmsge_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
; RV32-LABEL: intrinsic_vmsge_mask_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2790,8 +2790,8 @@ define <vscale x 1 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv1i64_i64(<vscale
; RV32-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -2820,8 +2820,8 @@ define <vscale x 2 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv2i64_i64(<vscale
; RV32-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2852,8 +2852,8 @@ define <vscale x 4 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv4i64_i64(<vscale
; RV32-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
index 5568c1e9b1cfb9..05cc7a9d8f7b42 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll
@@ -1669,8 +1669,8 @@ define <vscale x 1 x i1> @intrinsic_vmsgeu_vx_nxv1i64_i64(<vscale x 1 x i64> %0,
; RV32-LABEL: intrinsic_vmsgeu_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1704,8 +1704,8 @@ define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vx_nxv1i64_i64(<vscale x 1 x i1>
; RV32-LABEL: intrinsic_vmsgeu_mask_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v11, (a0), zero
@@ -1744,8 +1744,8 @@ define <vscale x 2 x i1> @intrinsic_vmsgeu_vx_nxv2i64_i64(<vscale x 2 x i64> %0,
; RV32-LABEL: intrinsic_vmsgeu_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1779,8 +1779,8 @@ define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
; RV32-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1819,8 +1819,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_vx_nxv4i64_i64(<vscale x 4 x i64> %0,
; RV32-LABEL: intrinsic_vmsgeu_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1854,8 +1854,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
; RV32-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2802,8 +2802,8 @@ define <vscale x 1 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv1i64_i64(<vscale
; RV32-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -2832,8 +2832,8 @@ define <vscale x 2 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i64_i64(<vscale
; RV32-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2864,8 +2864,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i64_i64(<vscale
; RV32-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll
index f1fa6484d976b4..98a03a2c562806 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll
@@ -1654,8 +1654,8 @@ define <vscale x 1 x i1> @intrinsic_vmsgt_vx_nxv1i64_i64(<vscale x 1 x i64> %0,
; RV32-LABEL: intrinsic_vmsgt_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1688,8 +1688,8 @@ define <vscale x 1 x i1> @intrinsic_vmsgt_mask_vx_nxv1i64_i64(<vscale x 1 x i1>
; RV32-LABEL: intrinsic_vmsgt_mask_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v11, (a0), zero
@@ -1728,8 +1728,8 @@ define <vscale x 2 x i1> @intrinsic_vmsgt_vx_nxv2i64_i64(<vscale x 2 x i64> %0,
; RV32-LABEL: intrinsic_vmsgt_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1762,8 +1762,8 @@ define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
; RV32-LABEL: intrinsic_vmsgt_mask_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1802,8 +1802,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgt_vx_nxv4i64_i64(<vscale x 4 x i64> %0,
; RV32-LABEL: intrinsic_vmsgt_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1836,8 +1836,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
; RV32-LABEL: intrinsic_vmsgt_mask_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll
index de7a0ad87be27c..69b22573c289e5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll
@@ -1654,8 +1654,8 @@ define <vscale x 1 x i1> @intrinsic_vmsgtu_vx_nxv1i64_i64(<vscale x 1 x i64> %0,
; RV32-LABEL: intrinsic_vmsgtu_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1688,8 +1688,8 @@ define <vscale x 1 x i1> @intrinsic_vmsgtu_mask_vx_nxv1i64_i64(<vscale x 1 x i1>
; RV32-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v11, (a0), zero
@@ -1728,8 +1728,8 @@ define <vscale x 2 x i1> @intrinsic_vmsgtu_vx_nxv2i64_i64(<vscale x 2 x i64> %0,
; RV32-LABEL: intrinsic_vmsgtu_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1762,8 +1762,8 @@ define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
; RV32-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1802,8 +1802,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgtu_vx_nxv4i64_i64(<vscale x 4 x i64> %0,
; RV32-LABEL: intrinsic_vmsgtu_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1836,8 +1836,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
; RV32-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll
index f54aef3ed4052c..c8794e1b63900f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll
@@ -1654,8 +1654,8 @@ define <vscale x 1 x i1> @intrinsic_vmsle_vx_nxv1i64_i64(<vscale x 1 x i64> %0,
; RV32-LABEL: intrinsic_vmsle_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1688,8 +1688,8 @@ define <vscale x 1 x i1> @intrinsic_vmsle_mask_vx_nxv1i64_i64(<vscale x 1 x i1>
; RV32-LABEL: intrinsic_vmsle_mask_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v11, (a0), zero
@@ -1728,8 +1728,8 @@ define <vscale x 2 x i1> @intrinsic_vmsle_vx_nxv2i64_i64(<vscale x 2 x i64> %0,
; RV32-LABEL: intrinsic_vmsle_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1762,8 +1762,8 @@ define <vscale x 2 x i1> @intrinsic_vmsle_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
; RV32-LABEL: intrinsic_vmsle_mask_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1802,8 +1802,8 @@ define <vscale x 4 x i1> @intrinsic_vmsle_vx_nxv4i64_i64(<vscale x 4 x i64> %0,
; RV32-LABEL: intrinsic_vmsle_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1836,8 +1836,8 @@ define <vscale x 4 x i1> @intrinsic_vmsle_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
; RV32-LABEL: intrinsic_vmsle_mask_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll
index 540577247484e3..86dc48d51cc2bc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll
@@ -1654,8 +1654,8 @@ define <vscale x 1 x i1> @intrinsic_vmsleu_vx_nxv1i64_i64(<vscale x 1 x i64> %0,
; RV32-LABEL: intrinsic_vmsleu_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1688,8 +1688,8 @@ define <vscale x 1 x i1> @intrinsic_vmsleu_mask_vx_nxv1i64_i64(<vscale x 1 x i1>
; RV32-LABEL: intrinsic_vmsleu_mask_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v11, (a0), zero
@@ -1728,8 +1728,8 @@ define <vscale x 2 x i1> @intrinsic_vmsleu_vx_nxv2i64_i64(<vscale x 2 x i64> %0,
; RV32-LABEL: intrinsic_vmsleu_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1762,8 +1762,8 @@ define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
; RV32-LABEL: intrinsic_vmsleu_mask_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1802,8 +1802,8 @@ define <vscale x 4 x i1> @intrinsic_vmsleu_vx_nxv4i64_i64(<vscale x 4 x i64> %0,
; RV32-LABEL: intrinsic_vmsleu_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1836,8 +1836,8 @@ define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
; RV32-LABEL: intrinsic_vmsleu_mask_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll
index 554d25172d4fde..8d57f2adc53868 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll
@@ -1654,8 +1654,8 @@ define <vscale x 1 x i1> @intrinsic_vmslt_vx_nxv1i64_i64(<vscale x 1 x i64> %0,
; RV32-LABEL: intrinsic_vmslt_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1688,8 +1688,8 @@ define <vscale x 1 x i1> @intrinsic_vmslt_mask_vx_nxv1i64_i64(<vscale x 1 x i1>
; RV32-LABEL: intrinsic_vmslt_mask_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v11, (a0), zero
@@ -1728,8 +1728,8 @@ define <vscale x 2 x i1> @intrinsic_vmslt_vx_nxv2i64_i64(<vscale x 2 x i64> %0,
; RV32-LABEL: intrinsic_vmslt_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1762,8 +1762,8 @@ define <vscale x 2 x i1> @intrinsic_vmslt_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
; RV32-LABEL: intrinsic_vmslt_mask_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1802,8 +1802,8 @@ define <vscale x 4 x i1> @intrinsic_vmslt_vx_nxv4i64_i64(<vscale x 4 x i64> %0,
; RV32-LABEL: intrinsic_vmslt_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1836,8 +1836,8 @@ define <vscale x 4 x i1> @intrinsic_vmslt_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
; RV32-LABEL: intrinsic_vmslt_mask_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll
index 7a8efa6c80fb6b..627b5943087137 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll
@@ -1654,8 +1654,8 @@ define <vscale x 1 x i1> @intrinsic_vmsltu_vx_nxv1i64_i64(<vscale x 1 x i64> %0,
; RV32-LABEL: intrinsic_vmsltu_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1688,8 +1688,8 @@ define <vscale x 1 x i1> @intrinsic_vmsltu_mask_vx_nxv1i64_i64(<vscale x 1 x i1>
; RV32-LABEL: intrinsic_vmsltu_mask_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v11, (a0), zero
@@ -1728,8 +1728,8 @@ define <vscale x 2 x i1> @intrinsic_vmsltu_vx_nxv2i64_i64(<vscale x 2 x i64> %0,
; RV32-LABEL: intrinsic_vmsltu_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1762,8 +1762,8 @@ define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
; RV32-LABEL: intrinsic_vmsltu_mask_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1802,8 +1802,8 @@ define <vscale x 4 x i1> @intrinsic_vmsltu_vx_nxv4i64_i64(<vscale x 4 x i64> %0,
; RV32-LABEL: intrinsic_vmsltu_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1836,8 +1836,8 @@ define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
; RV32-LABEL: intrinsic_vmsltu_mask_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll
index bd6bd8a804bcc2..47d1048f46cab4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll
@@ -1654,8 +1654,8 @@ define <vscale x 1 x i1> @intrinsic_vmsne_vx_nxv1i64_i64(<vscale x 1 x i64> %0,
; RV32-LABEL: intrinsic_vmsne_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1688,8 +1688,8 @@ define <vscale x 1 x i1> @intrinsic_vmsne_mask_vx_nxv1i64_i64(<vscale x 1 x i1>
; RV32-LABEL: intrinsic_vmsne_mask_vx_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v11, (a0), zero
@@ -1728,8 +1728,8 @@ define <vscale x 2 x i1> @intrinsic_vmsne_vx_nxv2i64_i64(<vscale x 2 x i64> %0,
; RV32-LABEL: intrinsic_vmsne_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1762,8 +1762,8 @@ define <vscale x 2 x i1> @intrinsic_vmsne_mask_vx_nxv2i64_i64(<vscale x 2 x i1>
; RV32-LABEL: intrinsic_vmsne_mask_vx_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1802,8 +1802,8 @@ define <vscale x 4 x i1> @intrinsic_vmsne_vx_nxv4i64_i64(<vscale x 4 x i64> %0,
; RV32-LABEL: intrinsic_vmsne_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1836,8 +1836,8 @@ define <vscale x 4 x i1> @intrinsic_vmsne_mask_vx_nxv4i64_i64(<vscale x 4 x i1>
; RV32-LABEL: intrinsic_vmsne_mask_vx_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
index 0b8620c90c62e0..c05ab8fe7db2fc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
@@ -614,8 +614,8 @@ define <vscale x 1 x i64> @vmul_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -680,8 +680,8 @@ define <vscale x 2 x i64> @vmul_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -746,8 +746,8 @@ define <vscale x 4 x i64> @vmul_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -812,8 +812,8 @@ define <vscale x 8 x i64> @vmul_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -867,13 +867,13 @@ define <vscale x 8 x i64> @vmul_xx_nxv8i64(i64 %a, i64 %b) nounwind {
; RV32NOM-LABEL: vmul_xx_nxv8i64:
; RV32NOM: # %bb.0:
; RV32NOM-NEXT: addi sp, sp, -16
-; RV32NOM-NEXT: sw a1, 12(sp)
; RV32NOM-NEXT: sw a0, 8(sp)
+; RV32NOM-NEXT: sw a1, 12(sp)
; RV32NOM-NEXT: addi a0, sp, 8
; RV32NOM-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32NOM-NEXT: vlse64.v v8, (a0), zero
-; RV32NOM-NEXT: sw a3, 4(sp)
; RV32NOM-NEXT: sw a2, 0(sp)
+; RV32NOM-NEXT: sw a3, 4(sp)
; RV32NOM-NEXT: mv a0, sp
; RV32NOM-NEXT: vlse64.v v16, (a0), zero
; RV32NOM-NEXT: vmul.vv v8, v8, v16
@@ -891,12 +891,12 @@ define <vscale x 8 x i64> @vmul_xx_nxv8i64(i64 %a, i64 %b) nounwind {
; RV32M: # %bb.0:
; RV32M-NEXT: addi sp, sp, -16
; RV32M-NEXT: mul a4, a0, a2
-; RV32M-NEXT: sw a4, 8(sp)
; RV32M-NEXT: mul a3, a0, a3
; RV32M-NEXT: mulhu a0, a0, a2
; RV32M-NEXT: add a0, a0, a3
; RV32M-NEXT: mul a1, a1, a2
; RV32M-NEXT: add a0, a0, a1
+; RV32M-NEXT: sw a4, 8(sp)
; RV32M-NEXT: sw a0, 12(sp)
; RV32M-NEXT: addi a0, sp, 8
; RV32M-NEXT: vsetvli a1, zero, e64, m8, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
index 51026cbcb8c4bf..80ff2c746f8db3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
@@ -931,8 +931,8 @@ define <vscale x 1 x i64> @vmul_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -957,8 +957,8 @@ define <vscale x 1 x i64> @vmul_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1005,8 +1005,8 @@ define <vscale x 2 x i64> @vmul_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1031,8 +1031,8 @@ define <vscale x 2 x i64> @vmul_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1079,8 +1079,8 @@ define <vscale x 4 x i64> @vmul_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1105,8 +1105,8 @@ define <vscale x 4 x i64> @vmul_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1153,8 +1153,8 @@ define <vscale x 8 x i64> @vmul_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1179,8 +1179,8 @@ define <vscale x 8 x i64> @vmul_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul.ll b/llvm/test/CodeGen/RISCV/rvv/vmul.ll
index 91d255ffeeecfd..81b9b60dc70c2b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmul.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmul.ll
@@ -1862,8 +1862,8 @@ define <vscale x 1 x i64> @intrinsic_vmul_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i
; RV32-LABEL: intrinsic_vmul_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1897,8 +1897,8 @@ define <vscale x 1 x i64> @intrinsic_vmul_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmul_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1932,8 +1932,8 @@ define <vscale x 2 x i64> @intrinsic_vmul_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i
; RV32-LABEL: intrinsic_vmul_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1967,8 +1967,8 @@ define <vscale x 2 x i64> @intrinsic_vmul_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmul_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2002,8 +2002,8 @@ define <vscale x 4 x i64> @intrinsic_vmul_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i
; RV32-LABEL: intrinsic_vmul_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2037,8 +2037,8 @@ define <vscale x 4 x i64> @intrinsic_vmul_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmul_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2072,8 +2072,8 @@ define <vscale x 8 x i64> @intrinsic_vmul_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i
; RV32-LABEL: intrinsic_vmul_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2107,8 +2107,8 @@ define <vscale x 8 x i64> @intrinsic_vmul_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmul_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh.ll
index e6e60853d490b6..d0d86af7ea622a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulh.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulh.ll
@@ -1864,8 +1864,8 @@ define <vscale x 1 x i64> @intrinsic_vmulh_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vmulh_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1899,8 +1899,8 @@ define <vscale x 1 x i64> @intrinsic_vmulh_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmulh_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 2 x i64> @intrinsic_vmulh_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vmulh_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vmulh_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmulh_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2004,8 +2004,8 @@ define <vscale x 4 x i64> @intrinsic_vmulh_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vmulh_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2039,8 +2039,8 @@ define <vscale x 4 x i64> @intrinsic_vmulh_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmulh_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2074,8 +2074,8 @@ define <vscale x 8 x i64> @intrinsic_vmulh_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vmulh_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2109,8 +2109,8 @@ define <vscale x 8 x i64> @intrinsic_vmulh_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vmulh_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhsu.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhsu.ll
index 42752473a4b350..03c4f3fa1de8ef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulhsu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulhsu.ll
@@ -1864,8 +1864,8 @@ define <vscale x 1 x i64> @intrinsic_vmulhsu_vx_nxv1i64_nxv1i64_i64(<vscale x 1
; RV32-LABEL: intrinsic_vmulhsu_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1899,8 +1899,8 @@ define <vscale x 1 x i64> @intrinsic_vmulhsu_mask_vx_nxv1i64_nxv1i64_i64(<vscale
; RV32-LABEL: intrinsic_vmulhsu_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 2 x i64> @intrinsic_vmulhsu_vx_nxv2i64_nxv2i64_i64(<vscale x 2
; RV32-LABEL: intrinsic_vmulhsu_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vmulhsu_mask_vx_nxv2i64_nxv2i64_i64(<vscale
; RV32-LABEL: intrinsic_vmulhsu_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2004,8 +2004,8 @@ define <vscale x 4 x i64> @intrinsic_vmulhsu_vx_nxv4i64_nxv4i64_i64(<vscale x 4
; RV32-LABEL: intrinsic_vmulhsu_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2039,8 +2039,8 @@ define <vscale x 4 x i64> @intrinsic_vmulhsu_mask_vx_nxv4i64_nxv4i64_i64(<vscale
; RV32-LABEL: intrinsic_vmulhsu_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2074,8 +2074,8 @@ define <vscale x 8 x i64> @intrinsic_vmulhsu_vx_nxv8i64_nxv8i64_i64(<vscale x 8
; RV32-LABEL: intrinsic_vmulhsu_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2109,8 +2109,8 @@ define <vscale x 8 x i64> @intrinsic_vmulhsu_mask_vx_nxv8i64_nxv8i64_i64(<vscale
; RV32-LABEL: intrinsic_vmulhsu_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu.ll
index 6d50959cb0e4ef..8f5ef1664ad94c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulhu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu.ll
@@ -1864,8 +1864,8 @@ define <vscale x 1 x i64> @intrinsic_vmulhu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vmulhu_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1899,8 +1899,8 @@ define <vscale x 1 x i64> @intrinsic_vmulhu_mask_vx_nxv1i64_nxv1i64_i64(<vscale
; RV32-LABEL: intrinsic_vmulhu_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 2 x i64> @intrinsic_vmulhu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vmulhu_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vmulhu_mask_vx_nxv2i64_nxv2i64_i64(<vscale
; RV32-LABEL: intrinsic_vmulhu_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2004,8 +2004,8 @@ define <vscale x 4 x i64> @intrinsic_vmulhu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vmulhu_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2039,8 +2039,8 @@ define <vscale x 4 x i64> @intrinsic_vmulhu_mask_vx_nxv4i64_nxv4i64_i64(<vscale
; RV32-LABEL: intrinsic_vmulhu_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2074,8 +2074,8 @@ define <vscale x 8 x i64> @intrinsic_vmulhu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vmulhu_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2109,8 +2109,8 @@ define <vscale x 8 x i64> @intrinsic_vmulhu_mask_vx_nxv8i64_nxv8i64_i64(<vscale
; RV32-LABEL: intrinsic_vmulhu_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x.ll
index afb9cba328e7e6..07661c57640457 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x.ll
@@ -244,8 +244,8 @@ define <vscale x 1 x i64> @intrinsic_vmv.s.x_x_nxv1i64(<vscale x 1 x i64> %0, i6
; RV32-LABEL: intrinsic_vmv.s.x_x_nxv1i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vid.v v9
; RV32-NEXT: vmseq.vi v0, v9, 0
@@ -270,8 +270,8 @@ define <vscale x 2 x i64> @intrinsic_vmv.s.x_x_nxv2i64(<vscale x 2 x i64> %0, i6
; RV32-LABEL: intrinsic_vmv.s.x_x_nxv2i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vid.v v10
; RV32-NEXT: vmseq.vi v0, v10, 0
@@ -296,8 +296,8 @@ define <vscale x 4 x i64> @intrinsic_vmv.s.x_x_nxv4i64(<vscale x 4 x i64> %0, i6
; RV32-LABEL: intrinsic_vmv.s.x_x_nxv4i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vid.v v12
; RV32-NEXT: vmseq.vi v0, v12, 0
@@ -322,8 +322,8 @@ define <vscale x 8 x i64> @intrinsic_vmv.s.x_x_nxv8i64(<vscale x 8 x i64> %0, i6
; RV32-LABEL: intrinsic_vmv.s.x_x_nxv8i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vid.v v16
; RV32-NEXT: vmseq.vi v0, v16, 0
@@ -347,10 +347,10 @@ define <vscale x 1 x i64> @intrinsic_vmv.s.x_x_nxv1i64_bug(<vscale x 1 x i64> %0
; RV32-LABEL: intrinsic_vmv.s.x_x_nxv1i64_bug:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: lw a1, 4(a0)
-; RV32-NEXT: lw a0, 0(a0)
-; RV32-NEXT: sw a1, 12(sp)
-; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: lw a1, 0(a0)
+; RV32-NEXT: lw a0, 4(a0)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a0, 12(sp)
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu
; RV32-NEXT: vid.v v9
; RV32-NEXT: vmseq.vi v0, v9, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.x.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.x.ll
index 4fa95fb2d945db..79bdd30291c08c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.x.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.x.ll
@@ -373,8 +373,8 @@ define <vscale x 1 x i64> @intrinsic_vmv.v.x_x_nxv1i64(i64 %0, iXLen %1) nounwin
; RV32-LABEL: intrinsic_vmv.v.x_x_nxv1i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
@@ -404,8 +404,8 @@ define <vscale x 2 x i64> @intrinsic_vmv.v.x_x_nxv2i64(i64 %0, iXLen %1) nounwin
; RV32-LABEL: intrinsic_vmv.v.x_x_nxv2i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
@@ -435,8 +435,8 @@ define <vscale x 4 x i64> @intrinsic_vmv.v.x_x_nxv4i64(i64 %0, iXLen %1) nounwin
; RV32-LABEL: intrinsic_vmv.v.x_x_nxv4i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
@@ -466,8 +466,8 @@ define <vscale x 8 x i64> @intrinsic_vmv.v.x_x_nxv8i64(i64 %0, iXLen %1) nounwin
; RV32-LABEL: intrinsic_vmv.v.x_x_nxv8i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll
index 30edcaf9b15b53..4eadd11d421048 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll
@@ -1651,8 +1651,8 @@ define <vscale x 1 x i64> @vnmsac_vx_nxv1i64(<vscale x 1 x i64> %a, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1681,8 +1681,8 @@ define <vscale x 1 x i64> @vnmsac_vx_nxv1i64_unmasked(<vscale x 1 x i64> %a, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1724,8 +1724,8 @@ define <vscale x 1 x i64> @vnmsac_vx_nxv1i64_ta(<vscale x 1 x i64> %a, i64 %b, <
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1785,8 +1785,8 @@ define <vscale x 2 x i64> @vnmsac_vx_nxv2i64(<vscale x 2 x i64> %a, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1815,8 +1815,8 @@ define <vscale x 2 x i64> @vnmsac_vx_nxv2i64_unmasked(<vscale x 2 x i64> %a, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1858,8 +1858,8 @@ define <vscale x 2 x i64> @vnmsac_vx_nxv2i64_ta(<vscale x 2 x i64> %a, i64 %b, <
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1919,8 +1919,8 @@ define <vscale x 4 x i64> @vnmsac_vx_nxv4i64(<vscale x 4 x i64> %a, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1949,8 +1949,8 @@ define <vscale x 4 x i64> @vnmsac_vx_nxv4i64_unmasked(<vscale x 4 x i64> %a, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1992,8 +1992,8 @@ define <vscale x 4 x i64> @vnmsac_vx_nxv4i64_ta(<vscale x 4 x i64> %a, i64 %b, <
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2055,8 +2055,8 @@ define <vscale x 8 x i64> @vnmsac_vx_nxv8i64(<vscale x 8 x i64> %a, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -2085,8 +2085,8 @@ define <vscale x 8 x i64> @vnmsac_vx_nxv8i64_unmasked(<vscale x 8 x i64> %a, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
@@ -2129,8 +2129,8 @@ define <vscale x 8 x i64> @vnmsac_vx_nxv8i64_ta(<vscale x 8 x i64> %a, i64 %b, <
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsac.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsac.ll
index 760f4d47ce2346..505443d93720bb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnmsac.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnmsac.ll
@@ -1566,8 +1566,8 @@ define <vscale x 1 x i64> @intrinsic_vnmsac_vx_nxv1i64_i64_nxv1i64(<vscale x 1
; RV32-LABEL: intrinsic_vnmsac_vx_nxv1i64_i64_nxv1i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1602,8 +1602,8 @@ define <vscale x 1 x i64> @intrinsic_vnmsac_mask_vx_nxv1i64_i64_nxv1i64(<vscale
; RV32-LABEL: intrinsic_vnmsac_mask_vx_nxv1i64_i64_nxv1i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1639,8 +1639,8 @@ define <vscale x 2 x i64> @intrinsic_vnmsac_vx_nxv2i64_i64_nxv2i64(<vscale x 2
; RV32-LABEL: intrinsic_vnmsac_vx_nxv2i64_i64_nxv2i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1675,8 +1675,8 @@ define <vscale x 2 x i64> @intrinsic_vnmsac_mask_vx_nxv2i64_i64_nxv2i64(<vscale
; RV32-LABEL: intrinsic_vnmsac_mask_vx_nxv2i64_i64_nxv2i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1712,8 +1712,8 @@ define <vscale x 4 x i64> @intrinsic_vnmsac_vx_nxv4i64_i64_nxv4i64(<vscale x 4
; RV32-LABEL: intrinsic_vnmsac_vx_nxv4i64_i64_nxv4i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1748,8 +1748,8 @@ define <vscale x 4 x i64> @intrinsic_vnmsac_mask_vx_nxv4i64_i64_nxv4i64(<vscale
; RV32-LABEL: intrinsic_vnmsac_mask_vx_nxv4i64_i64_nxv4i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll
index 186ffb64e59025..c465785797844c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll
@@ -458,8 +458,8 @@ define <vscale x 1 x i64> @vnmsub_vx_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -495,8 +495,8 @@ define <vscale x 2 x i64> @vnmsub_vx_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -532,8 +532,8 @@ define <vscale x 4 x i64> @vnmsub_vx_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -570,8 +570,8 @@ define <vscale x 8 x i64> @vnmsub_vx_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsub.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsub.ll
index 52036875c7d033..d9c7560830fecb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnmsub.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnmsub.ll
@@ -1566,8 +1566,8 @@ define <vscale x 1 x i64> @intrinsic_vnmsub_vx_nxv1i64_i64_nxv1i64(<vscale x 1
; RV32-LABEL: intrinsic_vnmsub_vx_nxv1i64_i64_nxv1i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1602,8 +1602,8 @@ define <vscale x 1 x i64> @intrinsic_vnmsub_mask_vx_nxv1i64_i64_nxv1i64(<vscale
; RV32-LABEL: intrinsic_vnmsub_mask_vx_nxv1i64_i64_nxv1i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1639,8 +1639,8 @@ define <vscale x 2 x i64> @intrinsic_vnmsub_vx_nxv2i64_i64_nxv2i64(<vscale x 2
; RV32-LABEL: intrinsic_vnmsub_vx_nxv2i64_i64_nxv2i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1675,8 +1675,8 @@ define <vscale x 2 x i64> @intrinsic_vnmsub_mask_vx_nxv2i64_i64_nxv2i64(<vscale
; RV32-LABEL: intrinsic_vnmsub_mask_vx_nxv2i64_i64_nxv2i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1712,8 +1712,8 @@ define <vscale x 4 x i64> @intrinsic_vnmsub_vx_nxv4i64_i64_nxv4i64(<vscale x 4
; RV32-LABEL: intrinsic_vnmsub_vx_nxv4i64_i64_nxv4i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1748,8 +1748,8 @@ define <vscale x 4 x i64> @intrinsic_vnmsub_mask_vx_nxv4i64_i64_nxv4i64(<vscale
; RV32-LABEL: intrinsic_vnmsub_mask_vx_nxv4i64_i64_nxv4i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll
index dcfe07c1fba658..33a7476ac852a1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll
@@ -781,8 +781,8 @@ define <vscale x 1 x i64> @vor_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -837,8 +837,8 @@ define <vscale x 2 x i64> @vor_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -893,8 +893,8 @@ define <vscale x 4 x i64> @vor_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -949,8 +949,8 @@ define <vscale x 8 x i64> @vor_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1016,9 +1016,9 @@ define <vscale x 8 x i64> @vor_xx_nxv8i64(i64 %a, i64 %b) nounwind {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: or a1, a1, a3
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: or a0, a0, a2
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll
index b9388e58797048..a23044967db44a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll
@@ -1323,8 +1323,8 @@ define <vscale x 1 x i64> @vor_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscal
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1349,8 +1349,8 @@ define <vscale x 1 x i64> @vor_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1417,8 +1417,8 @@ define <vscale x 2 x i64> @vor_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscal
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1443,8 +1443,8 @@ define <vscale x 2 x i64> @vor_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1511,8 +1511,8 @@ define <vscale x 4 x i64> @vor_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscal
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1537,8 +1537,8 @@ define <vscale x 4 x i64> @vor_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1605,8 +1605,8 @@ define <vscale x 8 x i64> @vor_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscal
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1631,8 +1631,8 @@ define <vscale x 8 x i64> @vor_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64 %
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vor.ll b/llvm/test/CodeGen/RISCV/rvv/vor.ll
index 3badc121f0322b..8280f4c64684ee 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vor.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vor.ll
@@ -1898,8 +1898,8 @@ define <vscale x 1 x i64> @intrinsic_vor_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i6
; RV32-LABEL: intrinsic_vor_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 1 x i64> @intrinsic_vor_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1
; RV32-LABEL: intrinsic_vor_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vor_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i6
; RV32-LABEL: intrinsic_vor_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2005,8 +2005,8 @@ define <vscale x 2 x i64> @intrinsic_vor_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2
; RV32-LABEL: intrinsic_vor_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2040,8 +2040,8 @@ define <vscale x 4 x i64> @intrinsic_vor_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i6
; RV32-LABEL: intrinsic_vor_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2076,8 +2076,8 @@ define <vscale x 4 x i64> @intrinsic_vor_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4
; RV32-LABEL: intrinsic_vor_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2111,8 +2111,8 @@ define <vscale x 8 x i64> @intrinsic_vor_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i6
; RV32-LABEL: intrinsic_vor_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2147,8 +2147,8 @@ define <vscale x 8 x i64> @intrinsic_vor_mask_vx_nxv8i64_nxv8i64_i64(<vscale x 8
; RV32-LABEL: intrinsic_vor_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll
index 0da05c1bd4364d..7fc9388ff10a40 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll
@@ -191,8 +191,8 @@ define <vscale x 1 x i64> @vp_splat_nxv1i64(i64 %val, <vscale x 1 x i1> %m, i32
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
@@ -213,8 +213,8 @@ define <vscale x 2 x i64> @vp_splat_nxv2i64(i64 %val, <vscale x 2 x i1> %m, i32
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
@@ -235,8 +235,8 @@ define <vscale x 4 x i64> @vp_splat_nxv4i64(i64 %val, <vscale x 4 x i1> %m, i32
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
@@ -257,8 +257,8 @@ define <vscale x 8 x i64> @vp_splat_nxv8i64(i64 %val, <vscale x 8 x i1> %m, i32
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
index 86dfc74fdee37b..ea7bf65fc5644d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
@@ -853,8 +853,8 @@ define <vscale x 1 x i64> @vpmerge_vx_nxv1i64(i64 %a, <vscale x 1 x i64> %vb, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu
; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t
@@ -900,8 +900,8 @@ define <vscale x 2 x i64> @vpmerge_vx_nxv2i64(i64 %a, <vscale x 2 x i64> %vb, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu
; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t
@@ -947,8 +947,8 @@ define <vscale x 4 x i64> @vpmerge_vx_nxv4i64(i64 %a, <vscale x 4 x i64> %vb, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu
; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t
@@ -994,8 +994,8 @@ define <vscale x 8 x i64> @vpmerge_vx_nxv8i64(i64 %a, <vscale x 8 x i64> %vb, <v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, mu
; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
index c41ddaee75a86c..85ad876004526a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
@@ -1221,8 +1221,8 @@ define signext i64 @vpreduce_add_nxv1i64(i64 signext %s, <vscale x 1 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1253,8 +1253,8 @@ define signext i64 @vpwreduce_add_nxv1i32(i64 signext %s, <vscale x 1 x i32> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1287,8 +1287,8 @@ define signext i64 @vpwreduce_uadd_nxv1i32(i64 signext %s, <vscale x 1 x i32> %v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1323,8 +1323,8 @@ define signext i64 @vpreduce_umax_nxv1i64(i64 signext %s, <vscale x 1 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1357,8 +1357,8 @@ define signext i64 @vpreduce_smax_nxv1i64(i64 signext %s, <vscale x 1 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1391,8 +1391,8 @@ define signext i64 @vpreduce_umin_nxv1i64(i64 signext %s, <vscale x 1 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1425,8 +1425,8 @@ define signext i64 @vpreduce_smin_nxv1i64(i64 signext %s, <vscale x 1 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1459,8 +1459,8 @@ define signext i64 @vpreduce_and_nxv1i64(i64 signext %s, <vscale x 1 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1493,8 +1493,8 @@ define signext i64 @vpreduce_or_nxv1i64(i64 signext %s, <vscale x 1 x i64> %v, <
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1527,8 +1527,8 @@ define signext i64 @vpreduce_xor_nxv1i64(i64 signext %s, <vscale x 1 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1561,8 +1561,8 @@ define signext i64 @vpreduce_add_nxv2i64(i64 signext %s, <vscale x 2 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1593,8 +1593,8 @@ define signext i64 @vwpreduce_add_nxv2i32(i64 signext %s, <vscale x 2 x i32> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1627,8 +1627,8 @@ define signext i64 @vwpreduce_uadd_nxv2i32(i64 signext %s, <vscale x 2 x i32> %v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1663,8 +1663,8 @@ define signext i64 @vpreduce_umax_nxv2i64(i64 signext %s, <vscale x 2 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1697,8 +1697,8 @@ define signext i64 @vpreduce_smax_nxv2i64(i64 signext %s, <vscale x 2 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1731,8 +1731,8 @@ define signext i64 @vpreduce_umin_nxv2i64(i64 signext %s, <vscale x 2 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1765,8 +1765,8 @@ define signext i64 @vpreduce_smin_nxv2i64(i64 signext %s, <vscale x 2 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1799,8 +1799,8 @@ define signext i64 @vpreduce_and_nxv2i64(i64 signext %s, <vscale x 2 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1833,8 +1833,8 @@ define signext i64 @vpreduce_or_nxv2i64(i64 signext %s, <vscale x 2 x i64> %v, <
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1867,8 +1867,8 @@ define signext i64 @vpreduce_xor_nxv2i64(i64 signext %s, <vscale x 2 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1901,8 +1901,8 @@ define signext i64 @vpreduce_add_nxv4i64(i64 signext %s, <vscale x 4 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1933,8 +1933,8 @@ define signext i64 @vpwreduce_add_nxv4i32(i64 signext %s, <vscale x 4 x i32> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1967,8 +1967,8 @@ define signext i64 @vpwreduce_uadd_nxv4i32(i64 signext %s, <vscale x 4 x i32> %v
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2003,8 +2003,8 @@ define signext i64 @vpreduce_umax_nxv4i64(i64 signext %s, <vscale x 4 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2037,8 +2037,8 @@ define signext i64 @vpreduce_smax_nxv4i64(i64 signext %s, <vscale x 4 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2071,8 +2071,8 @@ define signext i64 @vpreduce_umin_nxv4i64(i64 signext %s, <vscale x 4 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2105,8 +2105,8 @@ define signext i64 @vpreduce_smin_nxv4i64(i64 signext %s, <vscale x 4 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2139,8 +2139,8 @@ define signext i64 @vpreduce_and_nxv4i64(i64 signext %s, <vscale x 4 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2173,8 +2173,8 @@ define signext i64 @vpreduce_or_nxv4i64(i64 signext %s, <vscale x 4 x i64> %v, <
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2207,8 +2207,8 @@ define signext i64 @vpreduce_xor_nxv4i64(i64 signext %s, <vscale x 4 x i64> %v,
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
index 3a6ae5fdb21073..8a3a47f2348867 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
@@ -939,8 +939,8 @@ define <vscale x 1 x i64> @vrem_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -966,10 +966,10 @@ define <vscale x 1 x i64> @vrem_vi_nxv1i64_0(<vscale x 1 x i64> %va) {
; RV32-V-NEXT: .cfi_def_cfa_offset 16
; RV32-V-NEXT: lui a0, 748983
; RV32-V-NEXT: addi a0, a0, -586
+; RV32-V-NEXT: lui a1, 898779
+; RV32-V-NEXT: addi a1, a1, 1755
+; RV32-V-NEXT: sw a1, 8(sp)
; RV32-V-NEXT: sw a0, 12(sp)
-; RV32-V-NEXT: lui a0, 898779
-; RV32-V-NEXT: addi a0, a0, 1755
-; RV32-V-NEXT: sw a0, 8(sp)
; RV32-V-NEXT: addi a0, sp, 8
; RV32-V-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-V-NEXT: vlse64.v v9, (a0), zero
@@ -1022,8 +1022,8 @@ define <vscale x 2 x i64> @vrem_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1049,10 +1049,10 @@ define <vscale x 2 x i64> @vrem_vi_nxv2i64_0(<vscale x 2 x i64> %va) {
; RV32-V-NEXT: .cfi_def_cfa_offset 16
; RV32-V-NEXT: lui a0, 748983
; RV32-V-NEXT: addi a0, a0, -586
+; RV32-V-NEXT: lui a1, 898779
+; RV32-V-NEXT: addi a1, a1, 1755
+; RV32-V-NEXT: sw a1, 8(sp)
; RV32-V-NEXT: sw a0, 12(sp)
-; RV32-V-NEXT: lui a0, 898779
-; RV32-V-NEXT: addi a0, a0, 1755
-; RV32-V-NEXT: sw a0, 8(sp)
; RV32-V-NEXT: addi a0, sp, 8
; RV32-V-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-V-NEXT: vlse64.v v10, (a0), zero
@@ -1105,8 +1105,8 @@ define <vscale x 4 x i64> @vrem_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1132,10 +1132,10 @@ define <vscale x 4 x i64> @vrem_vi_nxv4i64_0(<vscale x 4 x i64> %va) {
; RV32-V-NEXT: .cfi_def_cfa_offset 16
; RV32-V-NEXT: lui a0, 748983
; RV32-V-NEXT: addi a0, a0, -586
+; RV32-V-NEXT: lui a1, 898779
+; RV32-V-NEXT: addi a1, a1, 1755
+; RV32-V-NEXT: sw a1, 8(sp)
; RV32-V-NEXT: sw a0, 12(sp)
-; RV32-V-NEXT: lui a0, 898779
-; RV32-V-NEXT: addi a0, a0, 1755
-; RV32-V-NEXT: sw a0, 8(sp)
; RV32-V-NEXT: addi a0, sp, 8
; RV32-V-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-V-NEXT: vlse64.v v12, (a0), zero
@@ -1188,8 +1188,8 @@ define <vscale x 8 x i64> @vrem_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1215,10 +1215,10 @@ define <vscale x 8 x i64> @vrem_vi_nxv8i64_0(<vscale x 8 x i64> %va) {
; RV32-V-NEXT: .cfi_def_cfa_offset 16
; RV32-V-NEXT: lui a0, 748983
; RV32-V-NEXT: addi a0, a0, -586
+; RV32-V-NEXT: lui a1, 898779
+; RV32-V-NEXT: addi a1, a1, 1755
+; RV32-V-NEXT: sw a1, 8(sp)
; RV32-V-NEXT: sw a0, 12(sp)
-; RV32-V-NEXT: lui a0, 898779
-; RV32-V-NEXT: addi a0, a0, 1755
-; RV32-V-NEXT: sw a0, 8(sp)
; RV32-V-NEXT: addi a0, sp, 8
; RV32-V-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-V-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll
index 2ef96f4b3896fc..48b89907c57a3b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll
@@ -892,8 +892,8 @@ define <vscale x 1 x i64> @vrem_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -918,8 +918,8 @@ define <vscale x 1 x i64> @vrem_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -966,8 +966,8 @@ define <vscale x 2 x i64> @vrem_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -992,8 +992,8 @@ define <vscale x 2 x i64> @vrem_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1040,8 +1040,8 @@ define <vscale x 4 x i64> @vrem_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1066,8 +1066,8 @@ define <vscale x 4 x i64> @vrem_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1114,8 +1114,8 @@ define <vscale x 8 x i64> @vrem_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1140,8 +1140,8 @@ define <vscale x 8 x i64> @vrem_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem.ll b/llvm/test/CodeGen/RISCV/rvv/vrem.ll
index 15692419dd76ce..3c6488e84fd52e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrem.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrem.ll
@@ -1898,8 +1898,8 @@ define <vscale x 1 x i64> @intrinsic_vrem_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i
; RV32-LABEL: intrinsic_vrem_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 1 x i64> @intrinsic_vrem_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vrem_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vrem_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i
; RV32-LABEL: intrinsic_vrem_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2005,8 +2005,8 @@ define <vscale x 2 x i64> @intrinsic_vrem_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vrem_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2040,8 +2040,8 @@ define <vscale x 4 x i64> @intrinsic_vrem_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i
; RV32-LABEL: intrinsic_vrem_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2076,8 +2076,8 @@ define <vscale x 4 x i64> @intrinsic_vrem_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vrem_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2111,8 +2111,8 @@ define <vscale x 8 x i64> @intrinsic_vrem_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i
; RV32-LABEL: intrinsic_vrem_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2147,8 +2147,8 @@ define <vscale x 8 x i64> @intrinsic_vrem_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vrem_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
index ed40f5af4fa4ca..2013f1794e7000 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
@@ -678,8 +678,8 @@ define <vscale x 1 x i64> @vremu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -704,9 +704,9 @@ define <vscale x 1 x i64> @vremu_vi_nxv1i64_0(<vscale x 1 x i64> %va) {
; RV32-V-NEXT: addi sp, sp, -16
; RV32-V-NEXT: .cfi_def_cfa_offset 16
; RV32-V-NEXT: lui a0, 131072
+; RV32-V-NEXT: li a1, 1
+; RV32-V-NEXT: sw a1, 8(sp)
; RV32-V-NEXT: sw a0, 12(sp)
-; RV32-V-NEXT: li a0, 1
-; RV32-V-NEXT: sw a0, 8(sp)
; RV32-V-NEXT: addi a0, sp, 8
; RV32-V-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-V-NEXT: vlse64.v v9, (a0), zero
@@ -783,8 +783,8 @@ define <vscale x 2 x i64> @vremu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -809,9 +809,9 @@ define <vscale x 2 x i64> @vremu_vi_nxv2i64_0(<vscale x 2 x i64> %va) {
; RV32-V-NEXT: addi sp, sp, -16
; RV32-V-NEXT: .cfi_def_cfa_offset 16
; RV32-V-NEXT: lui a0, 131072
+; RV32-V-NEXT: li a1, 1
+; RV32-V-NEXT: sw a1, 8(sp)
; RV32-V-NEXT: sw a0, 12(sp)
-; RV32-V-NEXT: li a0, 1
-; RV32-V-NEXT: sw a0, 8(sp)
; RV32-V-NEXT: addi a0, sp, 8
; RV32-V-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-V-NEXT: vlse64.v v10, (a0), zero
@@ -888,8 +888,8 @@ define <vscale x 4 x i64> @vremu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -914,9 +914,9 @@ define <vscale x 4 x i64> @vremu_vi_nxv4i64_0(<vscale x 4 x i64> %va) {
; RV32-V-NEXT: addi sp, sp, -16
; RV32-V-NEXT: .cfi_def_cfa_offset 16
; RV32-V-NEXT: lui a0, 131072
+; RV32-V-NEXT: li a1, 1
+; RV32-V-NEXT: sw a1, 8(sp)
; RV32-V-NEXT: sw a0, 12(sp)
-; RV32-V-NEXT: li a0, 1
-; RV32-V-NEXT: sw a0, 8(sp)
; RV32-V-NEXT: addi a0, sp, 8
; RV32-V-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-V-NEXT: vlse64.v v12, (a0), zero
@@ -993,8 +993,8 @@ define <vscale x 8 x i64> @vremu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1019,9 +1019,9 @@ define <vscale x 8 x i64> @vremu_vi_nxv8i64_0(<vscale x 8 x i64> %va) {
; RV32-V-NEXT: addi sp, sp, -16
; RV32-V-NEXT: .cfi_def_cfa_offset 16
; RV32-V-NEXT: lui a0, 131072
+; RV32-V-NEXT: li a1, 1
+; RV32-V-NEXT: sw a1, 8(sp)
; RV32-V-NEXT: sw a0, 12(sp)
-; RV32-V-NEXT: li a0, 1
-; RV32-V-NEXT: sw a0, 8(sp)
; RV32-V-NEXT: addi a0, sp, 8
; RV32-V-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-V-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll
index 1f1ed4a1269acb..20d33f6cadf223 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll
@@ -891,8 +891,8 @@ define <vscale x 1 x i64> @vremu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -917,8 +917,8 @@ define <vscale x 1 x i64> @vremu_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -965,8 +965,8 @@ define <vscale x 2 x i64> @vremu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -991,8 +991,8 @@ define <vscale x 2 x i64> @vremu_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1039,8 +1039,8 @@ define <vscale x 4 x i64> @vremu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1065,8 +1065,8 @@ define <vscale x 4 x i64> @vremu_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1113,8 +1113,8 @@ define <vscale x 8 x i64> @vremu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1139,8 +1139,8 @@ define <vscale x 8 x i64> @vremu_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu.ll b/llvm/test/CodeGen/RISCV/rvv/vremu.ll
index 3a090a51e62624..777407c144b515 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vremu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vremu.ll
@@ -1898,8 +1898,8 @@ define <vscale x 1 x i64> @intrinsic_vremu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vremu_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 1 x i64> @intrinsic_vremu_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vremu_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vremu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vremu_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2005,8 +2005,8 @@ define <vscale x 2 x i64> @intrinsic_vremu_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vremu_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2040,8 +2040,8 @@ define <vscale x 4 x i64> @intrinsic_vremu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vremu_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2076,8 +2076,8 @@ define <vscale x 4 x i64> @intrinsic_vremu_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vremu_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2111,8 +2111,8 @@ define <vscale x 8 x i64> @intrinsic_vremu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vremu_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2147,8 +2147,8 @@ define <vscale x 8 x i64> @intrinsic_vremu_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vremu_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll
index e97b1f41ad3d36..e7b9f716a70d5f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll
@@ -403,8 +403,8 @@ define <vscale x 1 x i64> @vrsub_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -438,8 +438,8 @@ define <vscale x 2 x i64> @vrsub_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -473,8 +473,8 @@ define <vscale x 4 x i64> @vrsub_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -508,8 +508,8 @@ define <vscale x 8 x i64> @vrsub_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll
index be372c9aa54d5d..f211c900ee5d3a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll
@@ -839,8 +839,8 @@ define <vscale x 1 x i64> @vrsub_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -865,8 +865,8 @@ define <vscale x 1 x i64> @vrsub_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -913,8 +913,8 @@ define <vscale x 2 x i64> @vrsub_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -939,8 +939,8 @@ define <vscale x 2 x i64> @vrsub_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -987,8 +987,8 @@ define <vscale x 4 x i64> @vrsub_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1013,8 +1013,8 @@ define <vscale x 4 x i64> @vrsub_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1061,8 +1061,8 @@ define <vscale x 8 x i64> @vrsub_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1087,8 +1087,8 @@ define <vscale x 8 x i64> @vrsub_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub.ll
index 1a8db00a1c9591..19d6afbd0e57ef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrsub.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrsub.ll
@@ -842,8 +842,8 @@ define <vscale x 1 x i64> @intrinsic_vrsub_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vrsub_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -877,8 +877,8 @@ define <vscale x 1 x i64> @intrinsic_vrsub_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vrsub_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -912,8 +912,8 @@ define <vscale x 2 x i64> @intrinsic_vrsub_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vrsub_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -947,8 +947,8 @@ define <vscale x 2 x i64> @intrinsic_vrsub_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vrsub_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -982,8 +982,8 @@ define <vscale x 4 x i64> @intrinsic_vrsub_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vrsub_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1017,8 +1017,8 @@ define <vscale x 4 x i64> @intrinsic_vrsub_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vrsub_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1052,8 +1052,8 @@ define <vscale x 8 x i64> @intrinsic_vrsub_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vrsub_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1087,8 +1087,8 @@ define <vscale x 8 x i64> @intrinsic_vrsub_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vrsub_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll
index 6a8b801254057b..2961bdece3cef2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll
@@ -633,8 +633,8 @@ define <vscale x 1 x i64> @sadd_nxv1i64_vx(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -680,8 +680,8 @@ define <vscale x 2 x i64> @sadd_nxv2i64_vx(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -727,8 +727,8 @@ define <vscale x 4 x i64> @sadd_nxv4i64_vx(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -774,8 +774,8 @@ define <vscale x 8 x i64> @sadd_nxv8i64_vx(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll
index 71b91f56e89a5e..7c6167bd5bc7ba 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll
@@ -1423,8 +1423,8 @@ define <vscale x 1 x i64> @vsadd_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1449,8 +1449,8 @@ define <vscale x 1 x i64> @vsadd_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1517,8 +1517,8 @@ define <vscale x 2 x i64> @vsadd_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1543,8 +1543,8 @@ define <vscale x 2 x i64> @vsadd_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1611,8 +1611,8 @@ define <vscale x 4 x i64> @vsadd_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1637,8 +1637,8 @@ define <vscale x 4 x i64> @vsadd_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1705,8 +1705,8 @@ define <vscale x 8 x i64> @vsadd_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1731,8 +1731,8 @@ define <vscale x 8 x i64> @vsadd_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd.ll
index a108d98c1731b3..445942f8ef2353 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsadd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsadd.ll
@@ -1898,8 +1898,8 @@ define <vscale x 1 x i64> @intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vsadd_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 1 x i64> @intrinsic_vsadd_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vsadd_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vsadd_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vsadd_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2005,8 +2005,8 @@ define <vscale x 2 x i64> @intrinsic_vsadd_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vsadd_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2040,8 +2040,8 @@ define <vscale x 4 x i64> @intrinsic_vsadd_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vsadd_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2076,8 +2076,8 @@ define <vscale x 4 x i64> @intrinsic_vsadd_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vsadd_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2111,8 +2111,8 @@ define <vscale x 8 x i64> @intrinsic_vsadd_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vsadd_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2147,8 +2147,8 @@ define <vscale x 8 x i64> @intrinsic_vsadd_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vsadd_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll
index 4fe765c34ba6ce..3a595db88ef85b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll
@@ -633,8 +633,8 @@ define <vscale x 1 x i64> @uadd_nxv1i64_vx(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -680,8 +680,8 @@ define <vscale x 2 x i64> @uadd_nxv2i64_vx(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -727,8 +727,8 @@ define <vscale x 4 x i64> @uadd_nxv4i64_vx(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -774,8 +774,8 @@ define <vscale x 8 x i64> @uadd_nxv8i64_vx(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll
index 454a4ebab04a28..916cbdc7e62207 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll
@@ -1422,8 +1422,8 @@ define <vscale x 1 x i64> @vsaddu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vs
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1448,8 +1448,8 @@ define <vscale x 1 x i64> @vsaddu_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i6
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1516,8 +1516,8 @@ define <vscale x 2 x i64> @vsaddu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vs
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1542,8 +1542,8 @@ define <vscale x 2 x i64> @vsaddu_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i6
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1610,8 +1610,8 @@ define <vscale x 4 x i64> @vsaddu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vs
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1636,8 +1636,8 @@ define <vscale x 4 x i64> @vsaddu_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i6
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1704,8 +1704,8 @@ define <vscale x 8 x i64> @vsaddu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vs
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1730,8 +1730,8 @@ define <vscale x 8 x i64> @vsaddu_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i6
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu.ll
index 57a89d6fe7d234..37f93fd90e5360 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsaddu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu.ll
@@ -1898,8 +1898,8 @@ define <vscale x 1 x i64> @intrinsic_vsaddu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vsaddu_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 1 x i64> @intrinsic_vsaddu_mask_vx_nxv1i64_nxv1i64_i64(<vscale
; RV32-LABEL: intrinsic_vsaddu_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vsaddu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vsaddu_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2005,8 +2005,8 @@ define <vscale x 2 x i64> @intrinsic_vsaddu_mask_vx_nxv2i64_nxv2i64_i64(<vscale
; RV32-LABEL: intrinsic_vsaddu_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2040,8 +2040,8 @@ define <vscale x 4 x i64> @intrinsic_vsaddu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vsaddu_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2076,8 +2076,8 @@ define <vscale x 4 x i64> @intrinsic_vsaddu_mask_vx_nxv4i64_nxv4i64_i64(<vscale
; RV32-LABEL: intrinsic_vsaddu_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2111,8 +2111,8 @@ define <vscale x 8 x i64> @intrinsic_vsaddu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vsaddu_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2147,8 +2147,8 @@ define <vscale x 8 x i64> @intrinsic_vsaddu_mask_vx_nxv8i64_nxv8i64_i64(<vscale
; RV32-LABEL: intrinsic_vsaddu_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsbc.ll b/llvm/test/CodeGen/RISCV/rvv/vsbc.ll
index 178935a1df32ea..effc04b87dee21 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsbc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsbc.ll
@@ -975,8 +975,8 @@ define <vscale x 1 x i64> @intrinsic_vsbc_vxm_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vsbc_vxm_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1011,8 +1011,8 @@ define <vscale x 2 x i64> @intrinsic_vsbc_vxm_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vsbc_vxm_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1047,8 +1047,8 @@ define <vscale x 4 x i64> @intrinsic_vsbc_vxm_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vsbc_vxm_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1083,8 +1083,8 @@ define <vscale x 8 x i64> @intrinsic_vsbc_vxm_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vsbc_vxm_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll
index 2715ec78bd7949..a92904cea5a0f5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll
@@ -627,8 +627,8 @@ define <vscale x 1 x i64> @vmerge_xv_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vs
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t
@@ -671,8 +671,8 @@ define <vscale x 2 x i64> @vmerge_xv_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vs
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t
@@ -715,8 +715,8 @@ define <vscale x 4 x i64> @vmerge_xv_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vs
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t
@@ -759,8 +759,8 @@ define <vscale x 8 x i64> @vmerge_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vs
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsmul.ll b/llvm/test/CodeGen/RISCV/rvv/vsmul.ll
index bc53bce889ddb7..8592f01cdfbbb4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsmul.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsmul.ll
@@ -1942,8 +1942,8 @@ define <vscale x 1 x i64> @intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1979,8 +1979,8 @@ define <vscale x 1 x i64> @intrinsic_vsmul_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vsmul_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2016,8 +2016,8 @@ define <vscale x 2 x i64> @intrinsic_vsmul_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vsmul_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2053,8 +2053,8 @@ define <vscale x 2 x i64> @intrinsic_vsmul_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vsmul_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2090,8 +2090,8 @@ define <vscale x 4 x i64> @intrinsic_vsmul_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vsmul_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2127,8 +2127,8 @@ define <vscale x 4 x i64> @intrinsic_vsmul_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vsmul_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2164,8 +2164,8 @@ define <vscale x 8 x i64> @intrinsic_vsmul_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vsmul_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2201,8 +2201,8 @@ define <vscale x 8 x i64> @intrinsic_vsmul_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vsmul_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll
index 721d6ef26d61e1..5a180e2c8f7614 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll
@@ -37,10 +37,10 @@ define <vscale x 8 x i64> @vsplat_nxv8i64_4() {
; RV32V: # %bb.0:
; RV32V-NEXT: addi sp, sp, -16
; RV32V-NEXT: .cfi_def_cfa_offset 16
-; RV32V-NEXT: sw zero, 12(sp)
; RV32V-NEXT: lui a0, 1028096
; RV32V-NEXT: addi a0, a0, -1281
; RV32V-NEXT: sw a0, 8(sp)
+; RV32V-NEXT: sw zero, 12(sp)
; RV32V-NEXT: addi a0, sp, 8
; RV32V-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32V-NEXT: vlse64.v v8, (a0), zero
@@ -63,8 +63,8 @@ define <vscale x 8 x i64> @vsplat_nxv8i64_5(i64 %a) {
; RV32V: # %bb.0:
; RV32V-NEXT: addi sp, sp, -16
; RV32V-NEXT: .cfi_def_cfa_offset 16
-; RV32V-NEXT: sw a1, 12(sp)
; RV32V-NEXT: sw a0, 8(sp)
+; RV32V-NEXT: sw a1, 12(sp)
; RV32V-NEXT: addi a0, sp, 8
; RV32V-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32V-NEXT: vlse64.v v8, (a0), zero
@@ -137,10 +137,10 @@ define <vscale x 8 x i64> @vadd_vx_nxv8i64_10(<vscale x 8 x i64> %v) {
; RV32V: # %bb.0:
; RV32V-NEXT: addi sp, sp, -16
; RV32V-NEXT: .cfi_def_cfa_offset 16
-; RV32V-NEXT: sw zero, 12(sp)
; RV32V-NEXT: lui a0, 1028096
; RV32V-NEXT: addi a0, a0, -1281
; RV32V-NEXT: sw a0, 8(sp)
+; RV32V-NEXT: sw zero, 12(sp)
; RV32V-NEXT: addi a0, sp, 8
; RV32V-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32V-NEXT: vlse64.v v16, (a0), zero
@@ -166,10 +166,10 @@ define <vscale x 8 x i64> @vadd_vx_nxv8i64_11(<vscale x 8 x i64> %v) {
; RV32V-NEXT: addi sp, sp, -16
; RV32V-NEXT: .cfi_def_cfa_offset 16
; RV32V-NEXT: li a0, 1
+; RV32V-NEXT: lui a1, 1028096
+; RV32V-NEXT: addi a1, a1, -1281
+; RV32V-NEXT: sw a1, 8(sp)
; RV32V-NEXT: sw a0, 12(sp)
-; RV32V-NEXT: lui a0, 1028096
-; RV32V-NEXT: addi a0, a0, -1281
-; RV32V-NEXT: sw a0, 8(sp)
; RV32V-NEXT: addi a0, sp, 8
; RV32V-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32V-NEXT: vlse64.v v16, (a0), zero
@@ -194,8 +194,8 @@ define <vscale x 8 x i64> @vadd_vx_nxv8i64_12(<vscale x 8 x i64> %v, i64 %a) {
; RV32V: # %bb.0:
; RV32V-NEXT: addi sp, sp, -16
; RV32V-NEXT: .cfi_def_cfa_offset 16
-; RV32V-NEXT: sw a1, 12(sp)
; RV32V-NEXT: sw a0, 8(sp)
+; RV32V-NEXT: sw a1, 12(sp)
; RV32V-NEXT: addi a0, sp, 8
; RV32V-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32V-NEXT: vlse64.v v16, (a0), zero
@@ -238,8 +238,8 @@ define <vscale x 8 x i64> @vsplat_nxv8i64_14(i32 %a) {
; RV32V: # %bb.0:
; RV32V-NEXT: addi sp, sp, -16
; RV32V-NEXT: .cfi_def_cfa_offset 16
-; RV32V-NEXT: sw zero, 12(sp)
; RV32V-NEXT: sw a0, 8(sp)
+; RV32V-NEXT: sw zero, 12(sp)
; RV32V-NEXT: addi a0, sp, 8
; RV32V-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32V-NEXT: vlse64.v v8, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll
index c043858c029473..632e28607b9be1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll
@@ -651,8 +651,8 @@ define <vscale x 1 x i64> @ssub_nxv1i64_vx(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -699,8 +699,8 @@ define <vscale x 2 x i64> @ssub_nxv2i64_vx(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -747,8 +747,8 @@ define <vscale x 4 x i64> @ssub_nxv4i64_vx(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -795,8 +795,8 @@ define <vscale x 8 x i64> @ssub_nxv8i64_vx(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll
index 613b58b0f1b88a..2f9786e237084d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll
@@ -1467,8 +1467,8 @@ define <vscale x 1 x i64> @vssub_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1493,8 +1493,8 @@ define <vscale x 1 x i64> @vssub_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1563,8 +1563,8 @@ define <vscale x 2 x i64> @vssub_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1589,8 +1589,8 @@ define <vscale x 2 x i64> @vssub_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1659,8 +1659,8 @@ define <vscale x 4 x i64> @vssub_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1685,8 +1685,8 @@ define <vscale x 4 x i64> @vssub_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1755,8 +1755,8 @@ define <vscale x 8 x i64> @vssub_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vsc
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1781,8 +1781,8 @@ define <vscale x 8 x i64> @vssub_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub.ll b/llvm/test/CodeGen/RISCV/rvv/vssub.ll
index 50fca5e832af50..2ac94f3c55a8d1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssub.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssub.ll
@@ -1898,8 +1898,8 @@ define <vscale x 1 x i64> @intrinsic_vssub_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vssub_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 1 x i64> @intrinsic_vssub_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vssub_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vssub_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vssub_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2005,8 +2005,8 @@ define <vscale x 2 x i64> @intrinsic_vssub_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vssub_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2040,8 +2040,8 @@ define <vscale x 4 x i64> @intrinsic_vssub_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vssub_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2076,8 +2076,8 @@ define <vscale x 4 x i64> @intrinsic_vssub_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vssub_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2111,8 +2111,8 @@ define <vscale x 8 x i64> @intrinsic_vssub_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vssub_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2147,8 +2147,8 @@ define <vscale x 8 x i64> @intrinsic_vssub_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vssub_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll
index 5349548a213bc7..83ffb81382f195 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll
@@ -651,8 +651,8 @@ define <vscale x 1 x i64> @usub_nxv1i64_vx(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -699,8 +699,8 @@ define <vscale x 2 x i64> @usub_nxv2i64_vx(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -747,8 +747,8 @@ define <vscale x 4 x i64> @usub_nxv4i64_vx(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -795,8 +795,8 @@ define <vscale x 8 x i64> @usub_nxv8i64_vx(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll
index 8c729d7d9bfb6e..dea0a82eca375d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll
@@ -1465,8 +1465,8 @@ define <vscale x 1 x i64> @vssubu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vs
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1491,8 +1491,8 @@ define <vscale x 1 x i64> @vssubu_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i6
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1561,8 +1561,8 @@ define <vscale x 2 x i64> @vssubu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vs
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1587,8 +1587,8 @@ define <vscale x 2 x i64> @vssubu_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i6
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1657,8 +1657,8 @@ define <vscale x 4 x i64> @vssubu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vs
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1683,8 +1683,8 @@ define <vscale x 4 x i64> @vssubu_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i6
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1753,8 +1753,8 @@ define <vscale x 8 x i64> @vssubu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vs
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1779,8 +1779,8 @@ define <vscale x 8 x i64> @vssubu_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i6
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu.ll
index db1b4ce34e9b38..9827b20b5ca0a9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssubu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssubu.ll
@@ -1898,8 +1898,8 @@ define <vscale x 1 x i64> @intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x
; RV32-LABEL: intrinsic_vssubu_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 1 x i64> @intrinsic_vssubu_mask_vx_nxv1i64_nxv1i64_i64(<vscale
; RV32-LABEL: intrinsic_vssubu_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vssubu_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x
; RV32-LABEL: intrinsic_vssubu_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2005,8 +2005,8 @@ define <vscale x 2 x i64> @intrinsic_vssubu_mask_vx_nxv2i64_nxv2i64_i64(<vscale
; RV32-LABEL: intrinsic_vssubu_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2040,8 +2040,8 @@ define <vscale x 4 x i64> @intrinsic_vssubu_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x
; RV32-LABEL: intrinsic_vssubu_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2076,8 +2076,8 @@ define <vscale x 4 x i64> @intrinsic_vssubu_mask_vx_nxv4i64_nxv4i64_i64(<vscale
; RV32-LABEL: intrinsic_vssubu_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2111,8 +2111,8 @@ define <vscale x 8 x i64> @intrinsic_vssubu_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x
; RV32-LABEL: intrinsic_vssubu_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2147,8 +2147,8 @@ define <vscale x 8 x i64> @intrinsic_vssubu_mask_vx_nxv8i64_nxv8i64_i64(<vscale
; RV32-LABEL: intrinsic_vssubu_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll
index c2173c9a291fcf..e2b0a11e16883d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll
@@ -622,8 +622,8 @@ define <vscale x 1 x i64> @vsub_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -668,8 +668,8 @@ define <vscale x 2 x i64> @vsub_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -714,8 +714,8 @@ define <vscale x 4 x i64> @vsub_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -760,8 +760,8 @@ define <vscale x 8 x i64> @vsub_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -796,10 +796,10 @@ define <vscale x 8 x i64> @vsub_xx_nxv8i64(i64 %a, i64 %b) nounwind {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: sub a4, a0, a2
-; RV32-NEXT: sw a4, 8(sp)
; RV32-NEXT: sltu a0, a0, a2
; RV32-NEXT: sub a1, a1, a3
; RV32-NEXT: sub a1, a1, a0
+; RV32-NEXT: sw a4, 8(sp)
; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll
index a2b9285fedeaf7..634f5cd0a9a0e1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll
@@ -919,8 +919,8 @@ define <vscale x 1 x i64> @vsub_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -945,8 +945,8 @@ define <vscale x 1 x i64> @vsub_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -993,8 +993,8 @@ define <vscale x 2 x i64> @vsub_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1019,8 +1019,8 @@ define <vscale x 2 x i64> @vsub_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1067,8 +1067,8 @@ define <vscale x 4 x i64> @vsub_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1093,8 +1093,8 @@ define <vscale x 4 x i64> @vsub_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1141,8 +1141,8 @@ define <vscale x 8 x i64> @vsub_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1167,8 +1167,8 @@ define <vscale x 8 x i64> @vsub_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub.ll b/llvm/test/CodeGen/RISCV/rvv/vsub.ll
index c7e153d86fce3a..ecf23722917d3b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsub.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsub.ll
@@ -1858,8 +1858,8 @@ define <vscale x 1 x i64> @intrinsic_vsub_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i
; RV32-LABEL: intrinsic_vsub_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1893,8 +1893,8 @@ define <vscale x 1 x i64> @intrinsic_vsub_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vsub_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1928,8 +1928,8 @@ define <vscale x 2 x i64> @intrinsic_vsub_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i
; RV32-LABEL: intrinsic_vsub_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1963,8 +1963,8 @@ define <vscale x 2 x i64> @intrinsic_vsub_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vsub_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1998,8 +1998,8 @@ define <vscale x 4 x i64> @intrinsic_vsub_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i
; RV32-LABEL: intrinsic_vsub_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2033,8 +2033,8 @@ define <vscale x 4 x i64> @intrinsic_vsub_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vsub_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2068,8 +2068,8 @@ define <vscale x 8 x i64> @intrinsic_vsub_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i
; RV32-LABEL: intrinsic_vsub_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2103,8 +2103,8 @@ define <vscale x 8 x i64> @intrinsic_vsub_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vsub_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
index b7ce0e3f196f05..0d643189e89cec 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
@@ -1470,8 +1470,8 @@ define <vscale x 8 x i64> @vwadd_vx_splat_zext(<vscale x 8 x i32> %va, i32 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1530,8 +1530,8 @@ define <vscale x 8 x i64> @vwadd_wx_splat_zext(<vscale x 8 x i64> %va, i32 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll
index b03a105610dfdf..e98bd1a1ad7820 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll
@@ -971,8 +971,8 @@ define <vscale x 1 x i64> @vxor_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1037,8 +1037,8 @@ define <vscale x 2 x i64> @vxor_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1103,8 +1103,8 @@ define <vscale x 4 x i64> @vxor_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1169,8 +1169,8 @@ define <vscale x 8 x i64> @vxor_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -1225,9 +1225,9 @@ define <vscale x 8 x i64> @vxor_xx_nxv8i64(i64 %a, i64 %b) nounwind {
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: xor a1, a1, a3
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: xor a0, a0, a2
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v8, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll
index f2235b4fdc94ba..3ad0c22894561a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll
@@ -1691,8 +1691,8 @@ define <vscale x 1 x i64> @vxor_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1717,8 +1717,8 @@ define <vscale x 1 x i64> @vxor_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1805,8 +1805,8 @@ define <vscale x 2 x i64> @vxor_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1831,8 +1831,8 @@ define <vscale x 2 x i64> @vxor_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1919,8 +1919,8 @@ define <vscale x 4 x i64> @vxor_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -1945,8 +1945,8 @@ define <vscale x 4 x i64> @vxor_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2033,8 +2033,8 @@ define <vscale x 8 x i64> @vxor_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vsca
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2059,8 +2059,8 @@ define <vscale x 8 x i64> @vxor_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor.ll b/llvm/test/CodeGen/RISCV/rvv/vxor.ll
index b08d4530d0085c..a3cb2f42c8ecf8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxor.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxor.ll
@@ -1898,8 +1898,8 @@ define <vscale x 1 x i64> @intrinsic_vxor_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i
; RV32-LABEL: intrinsic_vxor_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a0), zero
@@ -1934,8 +1934,8 @@ define <vscale x 1 x i64> @intrinsic_vxor_mask_vx_nxv1i64_nxv1i64_i64(<vscale x
; RV32-LABEL: intrinsic_vxor_mask_vx_nxv1i64_nxv1i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -1969,8 +1969,8 @@ define <vscale x 2 x i64> @intrinsic_vxor_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i
; RV32-LABEL: intrinsic_vxor_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a0), zero
@@ -2005,8 +2005,8 @@ define <vscale x 2 x i64> @intrinsic_vxor_mask_vx_nxv2i64_nxv2i64_i64(<vscale x
; RV32-LABEL: intrinsic_vxor_mask_vx_nxv2i64_nxv2i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2040,8 +2040,8 @@ define <vscale x 4 x i64> @intrinsic_vxor_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i
; RV32-LABEL: intrinsic_vxor_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a0), zero
@@ -2076,8 +2076,8 @@ define <vscale x 4 x i64> @intrinsic_vxor_mask_vx_nxv4i64_nxv4i64_i64(<vscale x
; RV32-LABEL: intrinsic_vxor_mask_vx_nxv4i64_nxv4i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2111,8 +2111,8 @@ define <vscale x 8 x i64> @intrinsic_vxor_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i
; RV32-LABEL: intrinsic_vxor_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a0), zero
@@ -2147,8 +2147,8 @@ define <vscale x 8 x i64> @intrinsic_vxor_mask_vx_nxv8i64_nxv8i64_i64(<vscale x
; RV32-LABEL: intrinsic_vxor_mask_vx_nxv8i64_nxv8i64_i64:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -16
-; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; RV32-NEXT: vlse64.v v24, (a0), zero
diff --git a/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll b/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll
index f4c3b072c8fe1e..005a01bf1000a0 100644
--- a/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll
+++ b/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll
@@ -70,10 +70,10 @@ define i128 @cmovcc128(i64 signext %a, i128 %b, i128 %c) nounwind {
; RV32I-NEXT: addi a3, a3, 12
; RV32I-NEXT: .LBB1_10: # %entry
; RV32I-NEXT: lw a1, 0(a3)
-; RV32I-NEXT: sw a1, 12(a0)
-; RV32I-NEXT: sw a6, 8(a0)
-; RV32I-NEXT: sw a5, 4(a0)
; RV32I-NEXT: sw a2, 0(a0)
+; RV32I-NEXT: sw a5, 4(a0)
+; RV32I-NEXT: sw a6, 8(a0)
+; RV32I-NEXT: sw a1, 12(a0)
; RV32I-NEXT: ret
;
; RV64I-LABEL: cmovcc128:
@@ -153,10 +153,10 @@ define i128 @cmov128(i1 %a, i128 %b, i128 %c) nounwind {
; RV32I-NEXT: addi a2, a2, 12
; RV32I-NEXT: .LBB3_10: # %entry
; RV32I-NEXT: lw a1, 0(a2)
-; RV32I-NEXT: sw a1, 12(a0)
-; RV32I-NEXT: sw a6, 8(a0)
-; RV32I-NEXT: sw a5, 4(a0)
; RV32I-NEXT: sw a4, 0(a0)
+; RV32I-NEXT: sw a5, 4(a0)
+; RV32I-NEXT: sw a6, 8(a0)
+; RV32I-NEXT: sw a1, 12(a0)
; RV32I-NEXT: ret
;
; RV64I-LABEL: cmov128:
@@ -221,8 +221,8 @@ define double @cmovdouble(i1 %a, double %b, double %c) nounwind {
; RV32I-NEXT: sw a3, 8(sp)
; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: fld fa5, 8(sp)
-; RV32I-NEXT: sw a1, 8(sp)
; RV32I-NEXT: andi a0, a0, 1
+; RV32I-NEXT: sw a1, 8(sp)
; RV32I-NEXT: sw a2, 12(sp)
; RV32I-NEXT: beqz a0, .LBB5_2
; RV32I-NEXT: # %bb.1:
diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll
index dcc99ebaa5514b..2bec1fca5c43b4 100644
--- a/llvm/test/CodeGen/RISCV/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/shifts.ll
@@ -157,14 +157,14 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw a5, 8(a1)
; RV32I-NEXT: lw a1, 12(a1)
-; RV32I-NEXT: sw zero, 28(sp)
-; RV32I-NEXT: sw zero, 24(sp)
-; RV32I-NEXT: sw zero, 20(sp)
; RV32I-NEXT: sw zero, 16(sp)
-; RV32I-NEXT: sw a1, 12(sp)
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw zero, 20(sp)
+; RV32I-NEXT: sw zero, 24(sp)
+; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a1, 12(sp)
; RV32I-NEXT: srli a1, a2, 3
; RV32I-NEXT: andi a1, a1, 12
; RV32I-NEXT: mv a3, sp
@@ -188,10 +188,10 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sll a6, a6, a7
; RV32I-NEXT: or a5, a5, a6
; RV32I-NEXT: srl a1, a1, a2
-; RV32I-NEXT: sw a1, 12(a0)
-; RV32I-NEXT: sw a5, 8(a0)
-; RV32I-NEXT: sw a4, 4(a0)
; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a4, 4(a0)
+; RV32I-NEXT: sw a5, 8(a0)
+; RV32I-NEXT: sw a1, 12(a0)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
@@ -221,20 +221,20 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: ashr128:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -32
-; RV32I-NEXT: lw a3, 8(a1)
-; RV32I-NEXT: lw a4, 12(a1)
-; RV32I-NEXT: lw a5, 0(a1)
-; RV32I-NEXT: lw a1, 4(a1)
; RV32I-NEXT: lw a2, 0(a2)
-; RV32I-NEXT: sw a4, 12(sp)
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: sw a1, 4(sp)
-; RV32I-NEXT: sw a5, 0(sp)
-; RV32I-NEXT: srai a4, a4, 31
-; RV32I-NEXT: sw a4, 28(sp)
-; RV32I-NEXT: sw a4, 24(sp)
-; RV32I-NEXT: sw a4, 20(sp)
-; RV32I-NEXT: sw a4, 16(sp)
+; RV32I-NEXT: lw a3, 0(a1)
+; RV32I-NEXT: lw a4, 4(a1)
+; RV32I-NEXT: lw a5, 8(a1)
+; RV32I-NEXT: lw a1, 12(a1)
+; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a1, 12(sp)
+; RV32I-NEXT: srai a1, a1, 31
+; RV32I-NEXT: sw a1, 16(sp)
+; RV32I-NEXT: sw a1, 20(sp)
+; RV32I-NEXT: sw a1, 24(sp)
+; RV32I-NEXT: sw a1, 28(sp)
; RV32I-NEXT: srli a1, a2, 3
; RV32I-NEXT: andi a1, a1, 12
; RV32I-NEXT: mv a3, sp
@@ -258,10 +258,10 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sll a6, a6, a7
; RV32I-NEXT: or a5, a5, a6
; RV32I-NEXT: sra a1, a1, a2
-; RV32I-NEXT: sw a1, 12(a0)
-; RV32I-NEXT: sw a5, 8(a0)
-; RV32I-NEXT: sw a4, 4(a0)
; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sw a4, 4(a0)
+; RV32I-NEXT: sw a5, 8(a0)
+; RV32I-NEXT: sw a1, 12(a0)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
@@ -296,14 +296,14 @@ define i128 @shl128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw a5, 8(a1)
; RV32I-NEXT: lw a1, 12(a1)
-; RV32I-NEXT: sw zero, 12(sp)
-; RV32I-NEXT: sw zero, 8(sp)
-; RV32I-NEXT: sw zero, 4(sp)
; RV32I-NEXT: sw zero, 0(sp)
-; RV32I-NEXT: sw a1, 28(sp)
-; RV32I-NEXT: sw a5, 24(sp)
-; RV32I-NEXT: sw a4, 20(sp)
+; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 8(sp)
+; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a4, 20(sp)
+; RV32I-NEXT: sw a5, 24(sp)
+; RV32I-NEXT: sw a1, 28(sp)
; RV32I-NEXT: srli a1, a2, 3
; RV32I-NEXT: andi a1, a1, 12
; RV32I-NEXT: addi a3, sp, 16
@@ -328,9 +328,9 @@ define i128 @shl128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: or a3, a3, a5
; RV32I-NEXT: sll a2, a4, a2
; RV32I-NEXT: sw a2, 0(a0)
-; RV32I-NEXT: sw a3, 12(a0)
-; RV32I-NEXT: sw a1, 8(a0)
; RV32I-NEXT: sw a6, 4(a0)
+; RV32I-NEXT: sw a1, 8(a0)
+; RV32I-NEXT: sw a3, 12(a0)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
@@ -446,10 +446,10 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind {
; RV32I-NEXT: slli a5, a5, 1
; RV32I-NEXT: sll a2, a5, t2
; RV32I-NEXT: or a1, a2, a1
-; RV32I-NEXT: sw a1, 12(a0)
-; RV32I-NEXT: sw a3, 8(a0)
-; RV32I-NEXT: sw t0, 4(a0)
; RV32I-NEXT: sw a7, 0(a0)
+; RV32I-NEXT: sw t0, 4(a0)
+; RV32I-NEXT: sw a3, 8(a0)
+; RV32I-NEXT: sw a1, 12(a0)
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB10_13:
; RV32I-NEXT: mv a5, t0
diff --git a/llvm/test/CodeGen/RISCV/split-offsets.ll b/llvm/test/CodeGen/RISCV/split-offsets.ll
index 8d065daa2067c4..284b559eeec5f5 100644
--- a/llvm/test/CodeGen/RISCV/split-offsets.ll
+++ b/llvm/test/CodeGen/RISCV/split-offsets.ll
@@ -17,8 +17,8 @@ define void @test1(ptr %sp, ptr %t, i32 %n) {
; RV32I-NEXT: add a1, a1, a2
; RV32I-NEXT: add a0, a0, a2
; RV32I-NEXT: li a2, 2
-; RV32I-NEXT: sw a2, 0(a0)
; RV32I-NEXT: li a3, 1
+; RV32I-NEXT: sw a2, 0(a0)
; RV32I-NEXT: sw a3, 4(a0)
; RV32I-NEXT: sw a3, 0(a1)
; RV32I-NEXT: sw a2, 4(a1)
@@ -32,8 +32,8 @@ define void @test1(ptr %sp, ptr %t, i32 %n) {
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: add a0, a0, a2
; RV64I-NEXT: li a2, 2
-; RV64I-NEXT: sw a2, 0(a0)
; RV64I-NEXT: li a3, 1
+; RV64I-NEXT: sw a2, 0(a0)
; RV64I-NEXT: sw a3, 4(a0)
; RV64I-NEXT: sw a3, 0(a1)
; RV64I-NEXT: sw a2, 4(a1)
@@ -128,9 +128,9 @@ define void @test3(ptr %t) {
; RV32I-NEXT: addi a1, a1, -1920
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: li a1, 2
+; RV32I-NEXT: li a2, 3
; RV32I-NEXT: sw a1, 4(a0)
-; RV32I-NEXT: li a1, 3
-; RV32I-NEXT: sw a1, 8(a0)
+; RV32I-NEXT: sw a2, 8(a0)
; RV32I-NEXT: ret
;
; RV64I-LABEL: test3:
@@ -139,9 +139,9 @@ define void @test3(ptr %t) {
; RV64I-NEXT: addiw a1, a1, -1920
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: li a1, 2
+; RV64I-NEXT: li a2, 3
; RV64I-NEXT: sw a1, 4(a0)
-; RV64I-NEXT: li a1, 3
-; RV64I-NEXT: sw a1, 8(a0)
+; RV64I-NEXT: sw a2, 8(a0)
; RV64I-NEXT: ret
entry:
%splitgep = getelementptr i8, ptr %t, i64 80000
diff --git a/llvm/test/CodeGen/RISCV/split-store.ll b/llvm/test/CodeGen/RISCV/split-store.ll
index 367d3fe2c595fe..a9d29f1840cfc1 100644
--- a/llvm/test/CodeGen/RISCV/split-store.ll
+++ b/llvm/test/CodeGen/RISCV/split-store.ll
@@ -117,14 +117,14 @@ define void @int8_float_pair(i8 signext %tmp1, float %tmp2, ptr %ref.tmp) {
define void @int32_int32_pair(i32 %tmp1, i32 %tmp2, ptr %ref.tmp) {
; RV32-LABEL: int32_int32_pair:
; RV32: # %bb.0:
-; RV32-NEXT: sw a1, 4(a2)
; RV32-NEXT: sw a0, 0(a2)
+; RV32-NEXT: sw a1, 4(a2)
; RV32-NEXT: ret
;
; RV32D-LABEL: int32_int32_pair:
; RV32D: # %bb.0:
-; RV32D-NEXT: sw a1, 4(a2)
; RV32D-NEXT: sw a0, 0(a2)
+; RV32D-NEXT: sw a1, 4(a2)
; RV32D-NEXT: ret
;
; RV64-LABEL: int32_int32_pair:
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 5d00e90366c3be..307a0531cf0296 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -354,19 +354,19 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32-NEXT: neg a3, a2
; RV32-NEXT: addi a1, a1, -1
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: sw a3, 0(s0)
-; RV32-NEXT: andi a3, a0, 7
-; RV32-NEXT: sb a3, 12(s0)
-; RV32-NEXT: slli a3, a1, 1
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: sw a2, 4(s0)
-; RV32-NEXT: srli a2, a1, 31
+; RV32-NEXT: andi a4, a0, 7
+; RV32-NEXT: slli a5, a1, 1
+; RV32-NEXT: or a2, a5, a2
+; RV32-NEXT: srli a5, a1, 31
; RV32-NEXT: andi a1, a1, 1
; RV32-NEXT: slli a1, a1, 1
; RV32-NEXT: slli a0, a0, 2
-; RV32-NEXT: or a0, a2, a0
+; RV32-NEXT: or a0, a5, a0
; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: sw a3, 0(s0)
+; RV32-NEXT: sw a2, 4(s0)
; RV32-NEXT: sw a0, 8(s0)
+; RV32-NEXT: sb a4, 12(s0)
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -431,15 +431,15 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64-NEXT: slli a4, a2, 31
; RV64-NEXT: srli a4, a4, 62
; RV64-NEXT: or a3, a4, a3
-; RV64-NEXT: sw a3, 8(s0)
; RV64-NEXT: slli a1, a1, 29
; RV64-NEXT: srli a1, a1, 61
-; RV64-NEXT: sb a1, 12(s0)
; RV64-NEXT: slli a0, a0, 31
; RV64-NEXT: srli a0, a0, 31
; RV64-NEXT: slli a2, a2, 33
; RV64-NEXT: or a0, a0, a2
; RV64-NEXT: sd a0, 0(s0)
+; RV64-NEXT: sw a3, 8(s0)
+; RV64-NEXT: sb a1, 12(s0)
; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -506,19 +506,19 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32M-NEXT: neg a3, a2
; RV32M-NEXT: addi a1, a1, -1
; RV32M-NEXT: addi a0, a0, -1
-; RV32M-NEXT: sw a3, 0(s0)
-; RV32M-NEXT: andi a3, a0, 7
-; RV32M-NEXT: sb a3, 12(s0)
-; RV32M-NEXT: slli a3, a1, 1
-; RV32M-NEXT: or a2, a3, a2
-; RV32M-NEXT: sw a2, 4(s0)
-; RV32M-NEXT: srli a2, a1, 31
+; RV32M-NEXT: andi a4, a0, 7
+; RV32M-NEXT: slli a5, a1, 1
+; RV32M-NEXT: or a2, a5, a2
+; RV32M-NEXT: srli a5, a1, 31
; RV32M-NEXT: andi a1, a1, 1
; RV32M-NEXT: slli a1, a1, 1
; RV32M-NEXT: slli a0, a0, 2
-; RV32M-NEXT: or a0, a2, a0
+; RV32M-NEXT: or a0, a5, a0
; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: sw a3, 0(s0)
+; RV32M-NEXT: sw a2, 4(s0)
; RV32M-NEXT: sw a0, 8(s0)
+; RV32M-NEXT: sb a4, 12(s0)
; RV32M-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32M-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32M-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -586,14 +586,14 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64M-NEXT: slli a1, a1, 31
; RV64M-NEXT: srli a1, a1, 31
; RV64M-NEXT: or a1, a1, a4
-; RV64M-NEXT: sd a1, 0(a0)
-; RV64M-NEXT: slli a1, a2, 2
+; RV64M-NEXT: slli a4, a2, 2
; RV64M-NEXT: slli a3, a3, 31
; RV64M-NEXT: srli a3, a3, 62
-; RV64M-NEXT: or a1, a3, a1
-; RV64M-NEXT: sw a1, 8(a0)
+; RV64M-NEXT: or a3, a3, a4
; RV64M-NEXT: slli a2, a2, 29
; RV64M-NEXT: srli a2, a2, 61
+; RV64M-NEXT: sd a1, 0(a0)
+; RV64M-NEXT: sw a3, 8(a0)
; RV64M-NEXT: sb a2, 12(a0)
; RV64M-NEXT: ret
;
@@ -629,9 +629,9 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32MV-NEXT: slli a1, a3, 31
; RV32MV-NEXT: srai a1, a1, 31
; RV32MV-NEXT: li a2, 1
+; RV32MV-NEXT: li a3, -1
+; RV32MV-NEXT: sw a3, 16(sp)
; RV32MV-NEXT: sw a2, 20(sp)
-; RV32MV-NEXT: li a2, -1
-; RV32MV-NEXT: sw a2, 16(sp)
; RV32MV-NEXT: li a2, 6
; RV32MV-NEXT: li a3, 0
; RV32MV-NEXT: call __moddi3
@@ -683,35 +683,35 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32MV-NEXT: vmsne.vv v0, v8, v12
; RV32MV-NEXT: vmv.v.i v8, 0
; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0
-; RV32MV-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32MV-NEXT: vse32.v v8, (s0)
+; RV32MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32MV-NEXT: vslidedown.vi v10, v8, 1
; RV32MV-NEXT: vmv.x.s a0, v10
; RV32MV-NEXT: vslidedown.vi v10, v8, 2
; RV32MV-NEXT: vmv.x.s a1, v10
; RV32MV-NEXT: slli a2, a1, 1
; RV32MV-NEXT: sub a2, a2, a0
-; RV32MV-NEXT: sw a2, 4(s0)
; RV32MV-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32MV-NEXT: vslidedown.vi v10, v8, 4
; RV32MV-NEXT: vmv.x.s a0, v10
-; RV32MV-NEXT: srli a2, a0, 30
+; RV32MV-NEXT: srli a3, a0, 30
; RV32MV-NEXT: vslidedown.vi v10, v8, 5
-; RV32MV-NEXT: vmv.x.s a3, v10
-; RV32MV-NEXT: slli a3, a3, 2
-; RV32MV-NEXT: or a2, a3, a2
-; RV32MV-NEXT: andi a2, a2, 7
-; RV32MV-NEXT: sb a2, 12(s0)
-; RV32MV-NEXT: srli a1, a1, 31
+; RV32MV-NEXT: vmv.x.s a4, v10
+; RV32MV-NEXT: slli a4, a4, 2
+; RV32MV-NEXT: or a3, a4, a3
; RV32MV-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32MV-NEXT: vse32.v v8, (s0)
+; RV32MV-NEXT: andi a3, a3, 7
+; RV32MV-NEXT: srli a1, a1, 31
; RV32MV-NEXT: vslidedown.vi v8, v8, 3
-; RV32MV-NEXT: vmv.x.s a2, v8
-; RV32MV-NEXT: andi a2, a2, 1
-; RV32MV-NEXT: slli a2, a2, 1
; RV32MV-NEXT: slli a0, a0, 2
; RV32MV-NEXT: or a0, a1, a0
-; RV32MV-NEXT: or a0, a0, a2
+; RV32MV-NEXT: vmv.x.s a1, v8
+; RV32MV-NEXT: andi a1, a1, 1
+; RV32MV-NEXT: slli a1, a1, 1
+; RV32MV-NEXT: or a0, a0, a1
+; RV32MV-NEXT: sw a2, 4(s0)
; RV32MV-NEXT: sw a0, 8(s0)
+; RV32MV-NEXT: sb a3, 12(s0)
; RV32MV-NEXT: csrr a0, vlenb
; RV32MV-NEXT: slli a0, a0, 1
; RV32MV-NEXT: add sp, sp, a0
@@ -785,20 +785,20 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64MV-NEXT: vmv.x.s a2, v10
; RV64MV-NEXT: slli a3, a2, 31
; RV64MV-NEXT: srli a3, a3, 61
-; RV64MV-NEXT: sb a3, 12(a0)
-; RV64MV-NEXT: vmv.x.s a3, v8
-; RV64MV-NEXT: and a1, a3, a1
+; RV64MV-NEXT: vmv.x.s a4, v8
+; RV64MV-NEXT: and a1, a4, a1
; RV64MV-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64MV-NEXT: vslidedown.vi v8, v8, 1
-; RV64MV-NEXT: vmv.x.s a3, v8
-; RV64MV-NEXT: slli a4, a3, 33
-; RV64MV-NEXT: or a1, a1, a4
-; RV64MV-NEXT: sd a1, 0(a0)
+; RV64MV-NEXT: vmv.x.s a4, v8
+; RV64MV-NEXT: slli a5, a4, 33
+; RV64MV-NEXT: or a1, a1, a5
; RV64MV-NEXT: slli a2, a2, 2
-; RV64MV-NEXT: slli a3, a3, 31
-; RV64MV-NEXT: srli a3, a3, 62
-; RV64MV-NEXT: or a2, a3, a2
+; RV64MV-NEXT: slli a4, a4, 31
+; RV64MV-NEXT: srli a4, a4, 62
+; RV64MV-NEXT: or a2, a4, a2
+; RV64MV-NEXT: sd a1, 0(a0)
; RV64MV-NEXT: sw a2, 8(a0)
+; RV64MV-NEXT: sb a3, 12(a0)
; RV64MV-NEXT: ret
%ld = load <3 x i33>, ptr %X
%srem = srem <3 x i33> %ld, <i33 6, i33 7, i33 -5>
diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
index 90443051d4b574..c7e57021b90dc1 100644
--- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
@@ -38,10 +38,10 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
; RV32I-NEXT: li a1, -1003
; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __modsi3
-; RV32I-NEXT: sh a0, 6(s3)
-; RV32I-NEXT: sh s1, 4(s3)
-; RV32I-NEXT: sh s0, 2(s3)
; RV32I-NEXT: sh s4, 0(s3)
+; RV32I-NEXT: sh s0, 2(s3)
+; RV32I-NEXT: sh s1, 4(s3)
+; RV32I-NEXT: sh a0, 6(s3)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -95,10 +95,10 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
; RV32IM-NEXT: li a6, -1003
; RV32IM-NEXT: mul a5, a5, a6
; RV32IM-NEXT: sub a1, a1, a5
-; RV32IM-NEXT: sh a1, 6(a0)
-; RV32IM-NEXT: sh a4, 4(a0)
-; RV32IM-NEXT: sh a3, 2(a0)
; RV32IM-NEXT: sh a2, 0(a0)
+; RV32IM-NEXT: sh a3, 2(a0)
+; RV32IM-NEXT: sh a4, 4(a0)
+; RV32IM-NEXT: sh a1, 6(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: fold_srem_vec_1:
@@ -130,10 +130,10 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
; RV64I-NEXT: li a1, -1003
; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __moddi3
-; RV64I-NEXT: sh a0, 6(s3)
-; RV64I-NEXT: sh s1, 4(s3)
-; RV64I-NEXT: sh s0, 2(s3)
; RV64I-NEXT: sh s4, 0(s3)
+; RV64I-NEXT: sh s0, 2(s3)
+; RV64I-NEXT: sh s1, 4(s3)
+; RV64I-NEXT: sh a0, 6(s3)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -187,10 +187,10 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
; RV64IM-NEXT: li a6, -1003
; RV64IM-NEXT: mul a2, a2, a6
; RV64IM-NEXT: subw a1, a1, a2
-; RV64IM-NEXT: sh a1, 6(a0)
-; RV64IM-NEXT: sh a5, 4(a0)
-; RV64IM-NEXT: sh a4, 2(a0)
; RV64IM-NEXT: sh a3, 0(a0)
+; RV64IM-NEXT: sh a4, 2(a0)
+; RV64IM-NEXT: sh a5, 4(a0)
+; RV64IM-NEXT: sh a1, 6(a0)
; RV64IM-NEXT: ret
%1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003>
ret <4 x i16> %1
@@ -226,10 +226,10 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind {
; RV32I-NEXT: li a1, 95
; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __modsi3
-; RV32I-NEXT: sh a0, 6(s3)
-; RV32I-NEXT: sh s1, 4(s3)
-; RV32I-NEXT: sh s0, 2(s3)
; RV32I-NEXT: sh s4, 0(s3)
+; RV32I-NEXT: sh s0, 2(s3)
+; RV32I-NEXT: sh s1, 4(s3)
+; RV32I-NEXT: sh a0, 6(s3)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -276,10 +276,10 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind {
; RV32IM-NEXT: add a5, a5, a6
; RV32IM-NEXT: mul a5, a5, a7
; RV32IM-NEXT: sub a1, a1, a5
-; RV32IM-NEXT: sh a1, 6(a0)
-; RV32IM-NEXT: sh a4, 4(a0)
-; RV32IM-NEXT: sh a3, 2(a0)
; RV32IM-NEXT: sh a2, 0(a0)
+; RV32IM-NEXT: sh a3, 2(a0)
+; RV32IM-NEXT: sh a4, 4(a0)
+; RV32IM-NEXT: sh a1, 6(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: fold_srem_vec_2:
@@ -311,10 +311,10 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind {
; RV64I-NEXT: li a1, 95
; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __moddi3
-; RV64I-NEXT: sh a0, 6(s3)
-; RV64I-NEXT: sh s1, 4(s3)
-; RV64I-NEXT: sh s0, 2(s3)
; RV64I-NEXT: sh s4, 0(s3)
+; RV64I-NEXT: sh s0, 2(s3)
+; RV64I-NEXT: sh s1, 4(s3)
+; RV64I-NEXT: sh a0, 6(s3)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -361,10 +361,10 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind {
; RV64IM-NEXT: add a2, a2, a6
; RV64IM-NEXT: mul a2, a2, a7
; RV64IM-NEXT: subw a1, a1, a2
-; RV64IM-NEXT: sh a1, 6(a0)
-; RV64IM-NEXT: sh a5, 4(a0)
-; RV64IM-NEXT: sh a4, 2(a0)
; RV64IM-NEXT: sh a3, 0(a0)
+; RV64IM-NEXT: sh a4, 2(a0)
+; RV64IM-NEXT: sh a5, 4(a0)
+; RV64IM-NEXT: sh a1, 6(a0)
; RV64IM-NEXT: ret
%1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
ret <4 x i16> %1
@@ -426,10 +426,10 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
; RV32I-NEXT: add s2, s7, s2
; RV32I-NEXT: add s3, s6, s3
; RV32I-NEXT: add s4, s5, s4
-; RV32I-NEXT: sh s4, 6(s0)
-; RV32I-NEXT: sh s3, 4(s0)
-; RV32I-NEXT: sh s2, 2(s0)
; RV32I-NEXT: sh a0, 0(s0)
+; RV32I-NEXT: sh s2, 2(s0)
+; RV32I-NEXT: sh s3, 4(s0)
+; RV32I-NEXT: sh s4, 6(s0)
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
@@ -484,10 +484,10 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
; RV32IM-NEXT: sub a1, a1, t2
; RV32IM-NEXT: add a2, a2, a6
; RV32IM-NEXT: sub a2, a2, t0
-; RV32IM-NEXT: sh a2, 6(a0)
-; RV32IM-NEXT: sh a1, 4(a0)
-; RV32IM-NEXT: sh a4, 2(a0)
; RV32IM-NEXT: sh a3, 0(a0)
+; RV32IM-NEXT: sh a4, 2(a0)
+; RV32IM-NEXT: sh a1, 4(a0)
+; RV32IM-NEXT: sh a2, 6(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: combine_srem_sdiv:
@@ -543,10 +543,10 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
; RV64I-NEXT: add s2, s7, s2
; RV64I-NEXT: add s3, s6, s3
; RV64I-NEXT: add s4, s5, s4
-; RV64I-NEXT: sh s4, 6(s0)
-; RV64I-NEXT: sh s3, 4(s0)
-; RV64I-NEXT: sh s2, 2(s0)
; RV64I-NEXT: sh a0, 0(s0)
+; RV64I-NEXT: sh s2, 2(s0)
+; RV64I-NEXT: sh s3, 4(s0)
+; RV64I-NEXT: sh s4, 6(s0)
; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
@@ -601,10 +601,10 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
; RV64IM-NEXT: subw a1, a1, t2
; RV64IM-NEXT: add a2, a2, a6
; RV64IM-NEXT: subw a2, a2, t0
-; RV64IM-NEXT: sh a2, 6(a0)
-; RV64IM-NEXT: sh a1, 4(a0)
-; RV64IM-NEXT: sh a4, 2(a0)
; RV64IM-NEXT: sh a3, 0(a0)
+; RV64IM-NEXT: sh a4, 2(a0)
+; RV64IM-NEXT: sh a1, 4(a0)
+; RV64IM-NEXT: sh a2, 6(a0)
; RV64IM-NEXT: ret
%1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
%2 = sdiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
@@ -641,10 +641,10 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
; RV32I-NEXT: sub s3, a4, a1
; RV32I-NEXT: li a1, 95
; RV32I-NEXT: call __modsi3
-; RV32I-NEXT: sh a0, 6(s0)
-; RV32I-NEXT: sh s3, 4(s0)
-; RV32I-NEXT: sh s2, 2(s0)
; RV32I-NEXT: sh s1, 0(s0)
+; RV32I-NEXT: sh s2, 2(s0)
+; RV32I-NEXT: sh s3, 4(s0)
+; RV32I-NEXT: sh a0, 6(s0)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -681,9 +681,9 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
; RV32IM-NEXT: add a5, a3, a5
; RV32IM-NEXT: andi a5, a5, -8
; RV32IM-NEXT: sub a3, a3, a5
-; RV32IM-NEXT: sh a3, 4(a0)
-; RV32IM-NEXT: sh a2, 2(a0)
; RV32IM-NEXT: sh a1, 0(a0)
+; RV32IM-NEXT: sh a2, 2(a0)
+; RV32IM-NEXT: sh a3, 4(a0)
; RV32IM-NEXT: sh a4, 6(a0)
; RV32IM-NEXT: ret
;
@@ -714,10 +714,10 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
; RV64I-NEXT: subw s3, a4, a1
; RV64I-NEXT: li a1, 95
; RV64I-NEXT: call __moddi3
-; RV64I-NEXT: sh a0, 6(s0)
-; RV64I-NEXT: sh s3, 4(s0)
-; RV64I-NEXT: sh s2, 2(s0)
; RV64I-NEXT: sh s1, 0(s0)
+; RV64I-NEXT: sh s2, 2(s0)
+; RV64I-NEXT: sh s3, 4(s0)
+; RV64I-NEXT: sh a0, 6(s0)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -754,9 +754,9 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
; RV64IM-NEXT: add a3, a4, a3
; RV64IM-NEXT: andi a3, a3, -8
; RV64IM-NEXT: subw a4, a4, a3
-; RV64IM-NEXT: sh a4, 4(a0)
-; RV64IM-NEXT: sh a5, 2(a0)
; RV64IM-NEXT: sh a1, 0(a0)
+; RV64IM-NEXT: sh a5, 2(a0)
+; RV64IM-NEXT: sh a4, 4(a0)
; RV64IM-NEXT: sh a2, 6(a0)
; RV64IM-NEXT: ret
%1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
@@ -789,10 +789,10 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
; RV32I-NEXT: addi a1, a0, 1327
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __modsi3
-; RV32I-NEXT: sh a0, 6(s2)
-; RV32I-NEXT: sh s0, 4(s2)
-; RV32I-NEXT: sh s3, 2(s2)
; RV32I-NEXT: sh zero, 0(s2)
+; RV32I-NEXT: sh s3, 2(s2)
+; RV32I-NEXT: sh s0, 4(s2)
+; RV32I-NEXT: sh a0, 6(s2)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -837,9 +837,9 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
; RV32IM-NEXT: mul a4, a4, a5
; RV32IM-NEXT: sub a1, a1, a4
; RV32IM-NEXT: sh zero, 0(a0)
-; RV32IM-NEXT: sh a1, 6(a0)
-; RV32IM-NEXT: sh a3, 4(a0)
; RV32IM-NEXT: sh a2, 2(a0)
+; RV32IM-NEXT: sh a3, 4(a0)
+; RV32IM-NEXT: sh a1, 6(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: dont_fold_srem_one:
@@ -866,10 +866,10 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
; RV64I-NEXT: addiw a1, a0, 1327
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __moddi3
-; RV64I-NEXT: sh a0, 6(s2)
-; RV64I-NEXT: sh s0, 4(s2)
-; RV64I-NEXT: sh s3, 2(s2)
; RV64I-NEXT: sh zero, 0(s2)
+; RV64I-NEXT: sh s3, 2(s2)
+; RV64I-NEXT: sh s0, 4(s2)
+; RV64I-NEXT: sh a0, 6(s2)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -913,9 +913,9 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
; RV64IM-NEXT: mul a2, a2, a5
; RV64IM-NEXT: subw a1, a1, a2
; RV64IM-NEXT: sh zero, 0(a0)
-; RV64IM-NEXT: sh a1, 6(a0)
; RV64IM-NEXT: sh a4, 2(a0)
; RV64IM-NEXT: sh a3, 4(a0)
+; RV64IM-NEXT: sh a1, 6(a0)
; RV64IM-NEXT: ret
%1 = srem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
ret <4 x i16> %1
@@ -947,10 +947,10 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
; RV32I-NEXT: addi a1, a0, 1327
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __modsi3
-; RV32I-NEXT: sh a0, 6(s0)
-; RV32I-NEXT: sh s2, 4(s0)
; RV32I-NEXT: sh zero, 0(s0)
; RV32I-NEXT: sh s3, 2(s0)
+; RV32I-NEXT: sh s2, 4(s0)
+; RV32I-NEXT: sh a0, 6(s0)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -991,8 +991,8 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
; RV32IM-NEXT: sub a2, a2, a4
; RV32IM-NEXT: sh zero, 0(a0)
; RV32IM-NEXT: sh a2, 2(a0)
-; RV32IM-NEXT: sh a1, 6(a0)
; RV32IM-NEXT: sh a3, 4(a0)
+; RV32IM-NEXT: sh a1, 6(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: dont_fold_urem_i16_smax:
@@ -1019,10 +1019,10 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
; RV64I-NEXT: addiw a1, a0, 1327
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __moddi3
-; RV64I-NEXT: sh a0, 6(s0)
-; RV64I-NEXT: sh s2, 4(s0)
; RV64I-NEXT: sh zero, 0(s0)
; RV64I-NEXT: sh s3, 2(s0)
+; RV64I-NEXT: sh s2, 4(s0)
+; RV64I-NEXT: sh a0, 6(s0)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -1063,8 +1063,8 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
; RV64IM-NEXT: subw a4, a4, a2
; RV64IM-NEXT: sh zero, 0(a0)
; RV64IM-NEXT: sh a4, 2(a0)
-; RV64IM-NEXT: sh a1, 6(a0)
; RV64IM-NEXT: sh a3, 4(a0)
+; RV64IM-NEXT: sh a1, 6(a0)
; RV64IM-NEXT: ret
%1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423>
ret <4 x i16> %1
@@ -1085,15 +1085,15 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw s0, 16(a1)
-; RV32I-NEXT: lw s1, 20(a1)
-; RV32I-NEXT: lw s2, 24(a1)
-; RV32I-NEXT: lw s3, 28(a1)
+; RV32I-NEXT: lw s1, 16(a1)
+; RV32I-NEXT: lw s2, 20(a1)
+; RV32I-NEXT: lw s3, 24(a1)
+; RV32I-NEXT: lw s4, 28(a1)
; RV32I-NEXT: lw a3, 0(a1)
; RV32I-NEXT: lw a4, 4(a1)
-; RV32I-NEXT: lw s4, 8(a1)
-; RV32I-NEXT: lw s5, 12(a1)
-; RV32I-NEXT: mv s6, a0
+; RV32I-NEXT: lw s5, 8(a1)
+; RV32I-NEXT: lw s6, 12(a1)
+; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: li a2, 1
; RV32I-NEXT: mv a0, a3
; RV32I-NEXT: mv a1, a4
@@ -1102,33 +1102,33 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
; RV32I-NEXT: mv s7, a0
; RV32I-NEXT: mv s8, a1
; RV32I-NEXT: li a2, 654
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: mv a1, s5
+; RV32I-NEXT: mv a0, s5
+; RV32I-NEXT: mv a1, s6
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __moddi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv s5, a1
+; RV32I-NEXT: mv s5, a0
+; RV32I-NEXT: mv s6, a1
; RV32I-NEXT: li a2, 23
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s2
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __moddi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: addi a2, a0, 1327
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s3
+; RV32I-NEXT: mv a0, s3
+; RV32I-NEXT: mv a1, s4
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __moddi3
-; RV32I-NEXT: sw a1, 28(s6)
-; RV32I-NEXT: sw a0, 24(s6)
-; RV32I-NEXT: sw s1, 20(s6)
-; RV32I-NEXT: sw s0, 16(s6)
-; RV32I-NEXT: sw s5, 12(s6)
-; RV32I-NEXT: sw s4, 8(s6)
-; RV32I-NEXT: sw s8, 4(s6)
-; RV32I-NEXT: sw s7, 0(s6)
+; RV32I-NEXT: sw s1, 16(s0)
+; RV32I-NEXT: sw s2, 20(s0)
+; RV32I-NEXT: sw a0, 24(s0)
+; RV32I-NEXT: sw a1, 28(s0)
+; RV32I-NEXT: sw s7, 0(s0)
+; RV32I-NEXT: sw s8, 4(s0)
+; RV32I-NEXT: sw s5, 8(s0)
+; RV32I-NEXT: sw s6, 12(s0)
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
@@ -1155,15 +1155,15 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s0, 16(a1)
-; RV32IM-NEXT: lw s1, 20(a1)
-; RV32IM-NEXT: lw s2, 24(a1)
-; RV32IM-NEXT: lw s3, 28(a1)
+; RV32IM-NEXT: lw s1, 16(a1)
+; RV32IM-NEXT: lw s2, 20(a1)
+; RV32IM-NEXT: lw s3, 24(a1)
+; RV32IM-NEXT: lw s4, 28(a1)
; RV32IM-NEXT: lw a3, 0(a1)
; RV32IM-NEXT: lw a4, 4(a1)
-; RV32IM-NEXT: lw s4, 8(a1)
-; RV32IM-NEXT: lw s5, 12(a1)
-; RV32IM-NEXT: mv s6, a0
+; RV32IM-NEXT: lw s5, 8(a1)
+; RV32IM-NEXT: lw s6, 12(a1)
+; RV32IM-NEXT: mv s0, a0
; RV32IM-NEXT: li a2, 1
; RV32IM-NEXT: mv a0, a3
; RV32IM-NEXT: mv a1, a4
@@ -1172,33 +1172,33 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
; RV32IM-NEXT: mv s7, a0
; RV32IM-NEXT: mv s8, a1
; RV32IM-NEXT: li a2, 654
-; RV32IM-NEXT: mv a0, s4
-; RV32IM-NEXT: mv a1, s5
+; RV32IM-NEXT: mv a0, s5
+; RV32IM-NEXT: mv a1, s6
; RV32IM-NEXT: li a3, 0
; RV32IM-NEXT: call __moddi3
-; RV32IM-NEXT: mv s4, a0
-; RV32IM-NEXT: mv s5, a1
+; RV32IM-NEXT: mv s5, a0
+; RV32IM-NEXT: mv s6, a1
; RV32IM-NEXT: li a2, 23
-; RV32IM-NEXT: mv a0, s0
-; RV32IM-NEXT: mv a1, s1
+; RV32IM-NEXT: mv a0, s1
+; RV32IM-NEXT: mv a1, s2
; RV32IM-NEXT: li a3, 0
; RV32IM-NEXT: call __moddi3
-; RV32IM-NEXT: mv s0, a0
-; RV32IM-NEXT: mv s1, a1
+; RV32IM-NEXT: mv s1, a0
+; RV32IM-NEXT: mv s2, a1
; RV32IM-NEXT: lui a0, 1
; RV32IM-NEXT: addi a2, a0, 1327
-; RV32IM-NEXT: mv a0, s2
-; RV32IM-NEXT: mv a1, s3
+; RV32IM-NEXT: mv a0, s3
+; RV32IM-NEXT: mv a1, s4
; RV32IM-NEXT: li a3, 0
; RV32IM-NEXT: call __moddi3
-; RV32IM-NEXT: sw a1, 28(s6)
-; RV32IM-NEXT: sw a0, 24(s6)
-; RV32IM-NEXT: sw s1, 20(s6)
-; RV32IM-NEXT: sw s0, 16(s6)
-; RV32IM-NEXT: sw s5, 12(s6)
-; RV32IM-NEXT: sw s4, 8(s6)
-; RV32IM-NEXT: sw s8, 4(s6)
-; RV32IM-NEXT: sw s7, 0(s6)
+; RV32IM-NEXT: sw s1, 16(s0)
+; RV32IM-NEXT: sw s2, 20(s0)
+; RV32IM-NEXT: sw a0, 24(s0)
+; RV32IM-NEXT: sw a1, 28(s0)
+; RV32IM-NEXT: sw s7, 0(s0)
+; RV32IM-NEXT: sw s8, 4(s0)
+; RV32IM-NEXT: sw s5, 8(s0)
+; RV32IM-NEXT: sw s6, 12(s0)
; RV32IM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IM-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
@@ -1236,10 +1236,10 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
; RV64I-NEXT: addiw a1, a0, 1327
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __moddi3
-; RV64I-NEXT: sd a0, 24(s2)
-; RV64I-NEXT: sd s0, 16(s2)
-; RV64I-NEXT: sd s3, 8(s2)
; RV64I-NEXT: sd zero, 0(s2)
+; RV64I-NEXT: sd s3, 8(s2)
+; RV64I-NEXT: sd s0, 16(s2)
+; RV64I-NEXT: sd a0, 24(s2)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -1283,9 +1283,9 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
; RV64IM-NEXT: mul a2, a2, a5
; RV64IM-NEXT: sub a1, a1, a2
; RV64IM-NEXT: sd zero, 0(a0)
-; RV64IM-NEXT: sd a1, 24(a0)
; RV64IM-NEXT: sd a4, 8(a0)
; RV64IM-NEXT: sd a3, 16(a0)
+; RV64IM-NEXT: sd a1, 24(a0)
; RV64IM-NEXT: ret
%1 = srem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>
ret <4 x i64> %1
diff --git a/llvm/test/CodeGen/RISCV/stack-slot-size.ll b/llvm/test/CodeGen/RISCV/stack-slot-size.ll
index 1388eaac3a67e7..71ee6d8160a9da 100644
--- a/llvm/test/CodeGen/RISCV/stack-slot-size.ll
+++ b/llvm/test/CodeGen/RISCV/stack-slot-size.ll
@@ -21,11 +21,11 @@ define i32 @caller129() nounwind {
; RV32I-NEXT: li a0, 42
; RV32I-NEXT: sw a0, 24(sp)
; RV32I-NEXT: sw zero, 16(sp)
-; RV32I-NEXT: sw zero, 12(sp)
-; RV32I-NEXT: sw zero, 8(sp)
-; RV32I-NEXT: sw zero, 4(sp)
; RV32I-NEXT: mv a0, sp
; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 8(sp)
+; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: call callee129
; RV32I-NEXT: lw a0, 24(sp)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -38,10 +38,10 @@ define i32 @caller129() nounwind {
; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT: li a0, 42
; RV64I-NEXT: sw a0, 36(sp)
-; RV64I-NEXT: sd zero, 16(sp)
-; RV64I-NEXT: sd zero, 8(sp)
; RV64I-NEXT: mv a0, sp
; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: sd zero, 8(sp)
+; RV64I-NEXT: sd zero, 16(sp)
; RV64I-NEXT: call callee129
; RV64I-NEXT: lw a0, 36(sp)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
@@ -62,11 +62,11 @@ define i32 @caller160() nounwind {
; RV32I-NEXT: li a0, 42
; RV32I-NEXT: sw a0, 24(sp)
; RV32I-NEXT: sw zero, 16(sp)
-; RV32I-NEXT: sw zero, 12(sp)
-; RV32I-NEXT: sw zero, 8(sp)
-; RV32I-NEXT: sw zero, 4(sp)
; RV32I-NEXT: mv a0, sp
; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 8(sp)
+; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: call callee160
; RV32I-NEXT: lw a0, 24(sp)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -79,10 +79,10 @@ define i32 @caller160() nounwind {
; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT: li a0, 42
; RV64I-NEXT: sw a0, 36(sp)
-; RV64I-NEXT: sd zero, 16(sp)
-; RV64I-NEXT: sd zero, 8(sp)
; RV64I-NEXT: mv a0, sp
; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: sd zero, 8(sp)
+; RV64I-NEXT: sd zero, 16(sp)
; RV64I-NEXT: call callee160
; RV64I-NEXT: lw a0, 36(sp)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
@@ -102,13 +102,13 @@ define i32 @caller161() nounwind {
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: li a0, 42
; RV32I-NEXT: sw a0, 24(sp)
-; RV32I-NEXT: sw zero, 20(sp)
; RV32I-NEXT: sw zero, 16(sp)
-; RV32I-NEXT: sw zero, 12(sp)
-; RV32I-NEXT: sw zero, 8(sp)
-; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 20(sp)
; RV32I-NEXT: mv a0, sp
; RV32I-NEXT: sw zero, 0(sp)
+; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 8(sp)
+; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: call callee161
; RV32I-NEXT: lw a0, 24(sp)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -121,10 +121,10 @@ define i32 @caller161() nounwind {
; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT: li a0, 42
; RV64I-NEXT: sw a0, 36(sp)
-; RV64I-NEXT: sd zero, 16(sp)
-; RV64I-NEXT: sd zero, 8(sp)
; RV64I-NEXT: mv a0, sp
; RV64I-NEXT: sd zero, 0(sp)
+; RV64I-NEXT: sd zero, 8(sp)
+; RV64I-NEXT: sd zero, 16(sp)
; RV64I-NEXT: call callee161
; RV64I-NEXT: lw a0, 36(sp)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/stack-store-check.ll b/llvm/test/CodeGen/RISCV/stack-store-check.ll
index b51a759a87b859..052ccbf6e06f36 100644
--- a/llvm/test/CodeGen/RISCV/stack-store-check.ll
+++ b/llvm/test/CodeGen/RISCV/stack-store-check.ll
@@ -14,292 +14,294 @@
define void @main() local_unnamed_addr nounwind {
; CHECK-LABEL: main:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -688
-; CHECK-NEXT: sw ra, 684(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw s0, 680(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw s1, 676(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw s2, 672(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw s3, 668(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw s4, 664(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw s5, 660(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw s6, 656(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw s7, 652(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw s8, 648(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw s9, 644(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw s10, 640(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw s11, 636(sp) # 4-byte Folded Spill
+; CHECK-NEXT: addi sp, sp, -704
+; CHECK-NEXT: sw ra, 700(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s0, 696(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s1, 692(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s2, 688(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s3, 684(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s4, 680(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s5, 676(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s6, 672(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s7, 668(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s8, 664(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s9, 660(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s10, 656(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s11, 652(sp) # 4-byte Folded Spill
; CHECK-NEXT: lui a0, %hi(U)
-; CHECK-NEXT: lw s6, %lo(U)(a0)
-; CHECK-NEXT: lw s7, %lo(U+4)(a0)
-; CHECK-NEXT: lw s8, %lo(U+8)(a0)
-; CHECK-NEXT: lw s0, %lo(U+12)(a0)
-; CHECK-NEXT: sw zero, 612(sp)
-; CHECK-NEXT: sw zero, 608(sp)
-; CHECK-NEXT: sw zero, 604(sp)
-; CHECK-NEXT: sw zero, 600(sp)
-; CHECK-NEXT: sw s0, 596(sp)
-; CHECK-NEXT: sw s8, 592(sp)
-; CHECK-NEXT: sw s7, 588(sp)
-; CHECK-NEXT: addi a0, sp, 616
-; CHECK-NEXT: addi a1, sp, 600
-; CHECK-NEXT: addi a2, sp, 584
-; CHECK-NEXT: sw s6, 584(sp)
+; CHECK-NEXT: lw s9, %lo(U)(a0)
+; CHECK-NEXT: lw s10, %lo(U+4)(a0)
+; CHECK-NEXT: lw s11, %lo(U+8)(a0)
+; CHECK-NEXT: lw s5, %lo(U+12)(a0)
+; CHECK-NEXT: sw zero, 616(sp)
+; CHECK-NEXT: sw zero, 620(sp)
+; CHECK-NEXT: sw zero, 624(sp)
+; CHECK-NEXT: sw zero, 628(sp)
+; CHECK-NEXT: addi a0, sp, 632
+; CHECK-NEXT: addi a1, sp, 616
+; CHECK-NEXT: addi a2, sp, 600
+; CHECK-NEXT: sw s9, 600(sp)
+; CHECK-NEXT: sw s10, 604(sp)
+; CHECK-NEXT: sw s11, 608(sp)
+; CHECK-NEXT: sw s5, 612(sp)
; CHECK-NEXT: call __subtf3
-; CHECK-NEXT: lw s1, 616(sp)
-; CHECK-NEXT: lw s2, 620(sp)
-; CHECK-NEXT: lw s3, 624(sp)
-; CHECK-NEXT: lw s4, 628(sp)
-; CHECK-NEXT: sw s0, 548(sp)
-; CHECK-NEXT: sw s8, 544(sp)
-; CHECK-NEXT: sw s7, 540(sp)
-; CHECK-NEXT: sw s6, 536(sp)
-; CHECK-NEXT: sw s4, 564(sp)
-; CHECK-NEXT: sw s3, 560(sp)
-; CHECK-NEXT: sw s2, 556(sp)
-; CHECK-NEXT: addi a0, sp, 568
-; CHECK-NEXT: addi a1, sp, 552
-; CHECK-NEXT: addi a2, sp, 536
-; CHECK-NEXT: sw s1, 552(sp)
+; CHECK-NEXT: lw s1, 632(sp)
+; CHECK-NEXT: lw s2, 636(sp)
+; CHECK-NEXT: lw s3, 640(sp)
+; CHECK-NEXT: lw s4, 644(sp)
+; CHECK-NEXT: sw s9, 552(sp)
+; CHECK-NEXT: sw s10, 556(sp)
+; CHECK-NEXT: sw s11, 560(sp)
+; CHECK-NEXT: sw s5, 564(sp)
+; CHECK-NEXT: addi a0, sp, 584
+; CHECK-NEXT: addi a1, sp, 568
+; CHECK-NEXT: addi a2, sp, 552
+; CHECK-NEXT: sw s1, 568(sp)
+; CHECK-NEXT: sw s2, 572(sp)
+; CHECK-NEXT: sw s3, 576(sp)
+; CHECK-NEXT: sw s4, 580(sp)
; CHECK-NEXT: call __subtf3
-; CHECK-NEXT: lw a0, 568(sp)
-; CHECK-NEXT: sw a0, 40(sp) # 4-byte Folded Spill
-; CHECK-NEXT: lw a0, 572(sp)
-; CHECK-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
-; CHECK-NEXT: lw a0, 576(sp)
-; CHECK-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
-; CHECK-NEXT: lw a0, 580(sp)
+; CHECK-NEXT: lw a0, 584(sp)
+; CHECK-NEXT: sw a0, 52(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a0, 588(sp)
; CHECK-NEXT: sw a0, 48(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw zero, 500(sp)
-; CHECK-NEXT: sw zero, 496(sp)
-; CHECK-NEXT: sw zero, 492(sp)
-; CHECK-NEXT: sw zero, 488(sp)
-; CHECK-NEXT: sw s0, 516(sp)
-; CHECK-NEXT: sw s8, 512(sp)
-; CHECK-NEXT: sw s7, 508(sp)
-; CHECK-NEXT: addi a0, sp, 520
-; CHECK-NEXT: addi a1, sp, 504
-; CHECK-NEXT: addi a2, sp, 488
-; CHECK-NEXT: sw s6, 504(sp)
+; CHECK-NEXT: lw a0, 592(sp)
+; CHECK-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a0, 596(sp)
+; CHECK-NEXT: sw a0, 40(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw zero, 504(sp)
+; CHECK-NEXT: sw zero, 508(sp)
+; CHECK-NEXT: sw zero, 512(sp)
+; CHECK-NEXT: sw zero, 516(sp)
+; CHECK-NEXT: addi a0, sp, 536
+; CHECK-NEXT: addi a1, sp, 520
+; CHECK-NEXT: addi a2, sp, 504
+; CHECK-NEXT: sw s9, 520(sp)
+; CHECK-NEXT: sw s10, 524(sp)
+; CHECK-NEXT: sw s11, 528(sp)
+; CHECK-NEXT: sw s5, 532(sp)
; CHECK-NEXT: call __addtf3
-; CHECK-NEXT: lw s9, 520(sp)
-; CHECK-NEXT: lw s11, 524(sp)
-; CHECK-NEXT: lw s5, 528(sp)
-; CHECK-NEXT: lw s10, 532(sp)
-; CHECK-NEXT: sw s10, 16(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw s0, 536(sp)
+; CHECK-NEXT: lw s6, 540(sp)
+; CHECK-NEXT: lw s7, 544(sp)
+; CHECK-NEXT: lw s8, 548(sp)
; CHECK-NEXT: lui a0, %hi(Y1)
; CHECK-NEXT: lw a1, %lo(Y1)(a0)
-; CHECK-NEXT: sw a1, 52(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a1, 20(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw a2, %lo(Y1+4)(a0)
-; CHECK-NEXT: sw a2, 12(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a2, 16(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw a3, %lo(Y1+8)(a0)
-; CHECK-NEXT: sw a3, 8(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a3, 12(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw a0, %lo(Y1+12)(a0)
-; CHECK-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw a0, 308(sp)
-; CHECK-NEXT: sw a3, 304(sp)
-; CHECK-NEXT: sw a2, 300(sp)
-; CHECK-NEXT: sw a1, 296(sp)
-; CHECK-NEXT: sw s4, 324(sp)
-; CHECK-NEXT: sw s3, 320(sp)
-; CHECK-NEXT: sw s2, 316(sp)
-; CHECK-NEXT: addi a0, sp, 328
-; CHECK-NEXT: addi a1, sp, 312
-; CHECK-NEXT: addi a2, sp, 296
-; CHECK-NEXT: sw s1, 312(sp)
+; CHECK-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw a1, 312(sp)
+; CHECK-NEXT: sw a2, 316(sp)
+; CHECK-NEXT: sw a3, 320(sp)
+; CHECK-NEXT: sw a0, 324(sp)
+; CHECK-NEXT: addi a0, sp, 344
+; CHECK-NEXT: addi a1, sp, 328
+; CHECK-NEXT: addi a2, sp, 312
+; CHECK-NEXT: sw s1, 328(sp)
+; CHECK-NEXT: sw s2, 332(sp)
+; CHECK-NEXT: sw s3, 336(sp)
+; CHECK-NEXT: sw s4, 340(sp)
; CHECK-NEXT: call __multf3
-; CHECK-NEXT: lw a0, 328(sp)
-; CHECK-NEXT: sw a0, 44(sp) # 4-byte Folded Spill
-; CHECK-NEXT: lw a0, 332(sp)
-; CHECK-NEXT: sw a0, 36(sp) # 4-byte Folded Spill
-; CHECK-NEXT: lw a0, 336(sp)
-; CHECK-NEXT: sw a0, 32(sp) # 4-byte Folded Spill
-; CHECK-NEXT: lw a0, 340(sp)
-; CHECK-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
-; CHECK-NEXT: sw s0, 468(sp)
-; CHECK-NEXT: sw s8, 464(sp)
-; CHECK-NEXT: sw s7, 460(sp)
-; CHECK-NEXT: sw s6, 456(sp)
-; CHECK-NEXT: sw s10, 452(sp)
-; CHECK-NEXT: sw s5, 448(sp)
-; CHECK-NEXT: sw s11, 444(sp)
-; CHECK-NEXT: addi a0, sp, 472
-; CHECK-NEXT: addi a1, sp, 456
-; CHECK-NEXT: addi a2, sp, 440
-; CHECK-NEXT: sw s9, 440(sp)
+; CHECK-NEXT: lw a0, 344(sp)
+; CHECK-NEXT: sw a0, 68(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a0, 348(sp)
+; CHECK-NEXT: sw a0, 64(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a0, 352(sp)
+; CHECK-NEXT: sw a0, 60(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a0, 356(sp)
+; CHECK-NEXT: sw a0, 56(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw s9, 472(sp)
+; CHECK-NEXT: sw s10, 476(sp)
+; CHECK-NEXT: sw s11, 480(sp)
+; CHECK-NEXT: sw s5, 484(sp)
+; CHECK-NEXT: addi a0, sp, 488
+; CHECK-NEXT: addi a1, sp, 472
+; CHECK-NEXT: addi a2, sp, 456
+; CHECK-NEXT: sw s0, 456(sp)
+; CHECK-NEXT: sw s6, 460(sp)
+; CHECK-NEXT: sw s7, 464(sp)
+; CHECK-NEXT: sw s8, 468(sp)
; CHECK-NEXT: call __addtf3
-; CHECK-NEXT: lw a3, 472(sp)
-; CHECK-NEXT: lw a0, 476(sp)
-; CHECK-NEXT: lw a1, 480(sp)
-; CHECK-NEXT: lw a2, 484(sp)
-; CHECK-NEXT: sw zero, 420(sp)
-; CHECK-NEXT: sw zero, 416(sp)
-; CHECK-NEXT: sw zero, 412(sp)
-; CHECK-NEXT: sw zero, 408(sp)
-; CHECK-NEXT: sw a2, 404(sp)
-; CHECK-NEXT: sw a1, 400(sp)
-; CHECK-NEXT: sw a0, 396(sp)
-; CHECK-NEXT: addi a0, sp, 424
-; CHECK-NEXT: addi a1, sp, 408
-; CHECK-NEXT: addi a2, sp, 392
-; CHECK-NEXT: sw a3, 392(sp)
+; CHECK-NEXT: lw a3, 488(sp)
+; CHECK-NEXT: lw a4, 492(sp)
+; CHECK-NEXT: lw a5, 496(sp)
+; CHECK-NEXT: lw a6, 500(sp)
+; CHECK-NEXT: sw zero, 424(sp)
+; CHECK-NEXT: sw zero, 428(sp)
+; CHECK-NEXT: sw zero, 432(sp)
+; CHECK-NEXT: sw zero, 436(sp)
+; CHECK-NEXT: addi a0, sp, 440
+; CHECK-NEXT: addi a1, sp, 424
+; CHECK-NEXT: addi a2, sp, 408
+; CHECK-NEXT: sw a3, 408(sp)
+; CHECK-NEXT: sw a4, 412(sp)
+; CHECK-NEXT: sw a5, 416(sp)
+; CHECK-NEXT: sw a6, 420(sp)
; CHECK-NEXT: call __subtf3
-; CHECK-NEXT: lw a0, 432(sp)
-; CHECK-NEXT: lw a1, 436(sp)
-; CHECK-NEXT: lw a2, 424(sp)
-; CHECK-NEXT: lw a3, 428(sp)
+; CHECK-NEXT: lw a0, 448(sp)
+; CHECK-NEXT: lw a1, 452(sp)
+; CHECK-NEXT: lw a2, 440(sp)
+; CHECK-NEXT: lw a3, 444(sp)
; CHECK-NEXT: lui a4, %hi(X)
; CHECK-NEXT: sw a1, %lo(X+12)(a4)
; CHECK-NEXT: sw a0, %lo(X+8)(a4)
; CHECK-NEXT: sw a3, %lo(X+4)(a4)
; CHECK-NEXT: sw a2, %lo(X)(a4)
-; CHECK-NEXT: lw s8, 4(sp) # 4-byte Folded Reload
-; CHECK-NEXT: sw s8, 212(sp)
-; CHECK-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; CHECK-NEXT: sw s4, 208(sp)
-; CHECK-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT: sw s3, 204(sp)
-; CHECK-NEXT: lw a0, 52(sp) # 4-byte Folded Reload
-; CHECK-NEXT: sw a0, 200(sp)
-; CHECK-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
-; CHECK-NEXT: sw a0, 228(sp)
-; CHECK-NEXT: lw s10, 20(sp) # 4-byte Folded Reload
+; CHECK-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
+; CHECK-NEXT: sw s5, 216(sp)
+; CHECK-NEXT: lw s9, 16(sp) # 4-byte Folded Reload
+; CHECK-NEXT: sw s9, 220(sp)
+; CHECK-NEXT: lw s10, 12(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw s10, 224(sp)
-; CHECK-NEXT: lw s2, 28(sp) # 4-byte Folded Reload
-; CHECK-NEXT: sw s2, 220(sp)
-; CHECK-NEXT: addi a0, sp, 232
-; CHECK-NEXT: addi a1, sp, 216
-; CHECK-NEXT: addi a2, sp, 200
-; CHECK-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; CHECK-NEXT: sw s0, 216(sp)
+; CHECK-NEXT: lw s11, 8(sp) # 4-byte Folded Reload
+; CHECK-NEXT: sw s11, 228(sp)
+; CHECK-NEXT: addi a0, sp, 248
+; CHECK-NEXT: addi a1, sp, 232
+; CHECK-NEXT: addi a2, sp, 216
+; CHECK-NEXT: lw s1, 52(sp) # 4-byte Folded Reload
+; CHECK-NEXT: sw s1, 232(sp)
+; CHECK-NEXT: lw s2, 48(sp) # 4-byte Folded Reload
+; CHECK-NEXT: sw s2, 236(sp)
+; CHECK-NEXT: lw s3, 44(sp) # 4-byte Folded Reload
+; CHECK-NEXT: sw s3, 240(sp)
+; CHECK-NEXT: lw s4, 40(sp) # 4-byte Folded Reload
+; CHECK-NEXT: sw s4, 244(sp)
; CHECK-NEXT: call __multf3
-; CHECK-NEXT: lw s1, 232(sp)
-; CHECK-NEXT: lw a0, 236(sp)
-; CHECK-NEXT: sw a0, 0(sp) # 4-byte Folded Spill
-; CHECK-NEXT: lw s6, 240(sp)
-; CHECK-NEXT: lw s7, 244(sp)
-; CHECK-NEXT: sw zero, 356(sp)
-; CHECK-NEXT: sw zero, 352(sp)
-; CHECK-NEXT: sw zero, 348(sp)
-; CHECK-NEXT: sw zero, 344(sp)
-; CHECK-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
-; CHECK-NEXT: sw a0, 372(sp)
-; CHECK-NEXT: sw s5, 368(sp)
-; CHECK-NEXT: sw s11, 364(sp)
-; CHECK-NEXT: addi a0, sp, 376
-; CHECK-NEXT: addi a1, sp, 360
-; CHECK-NEXT: addi a2, sp, 344
-; CHECK-NEXT: sw s9, 360(sp)
+; CHECK-NEXT: lw a0, 248(sp)
+; CHECK-NEXT: sw a0, 36(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a0, 252(sp)
+; CHECK-NEXT: sw a0, 32(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a0, 256(sp)
+; CHECK-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
+; CHECK-NEXT: lw a0, 260(sp)
+; CHECK-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sw zero, 360(sp)
+; CHECK-NEXT: sw zero, 364(sp)
+; CHECK-NEXT: sw zero, 368(sp)
+; CHECK-NEXT: sw zero, 372(sp)
+; CHECK-NEXT: addi a0, sp, 392
+; CHECK-NEXT: addi a1, sp, 376
+; CHECK-NEXT: addi a2, sp, 360
+; CHECK-NEXT: sw s0, 376(sp)
+; CHECK-NEXT: sw s6, 380(sp)
+; CHECK-NEXT: sw s7, 384(sp)
+; CHECK-NEXT: sw s8, 388(sp)
; CHECK-NEXT: call __multf3
-; CHECK-NEXT: lw a0, 384(sp)
-; CHECK-NEXT: lw a1, 388(sp)
-; CHECK-NEXT: lw a2, 376(sp)
-; CHECK-NEXT: lw a3, 380(sp)
+; CHECK-NEXT: lw a0, 400(sp)
+; CHECK-NEXT: lw a1, 404(sp)
+; CHECK-NEXT: lw a2, 392(sp)
+; CHECK-NEXT: lw a3, 396(sp)
; CHECK-NEXT: lui a4, %hi(S)
; CHECK-NEXT: sw a1, %lo(S+12)(a4)
; CHECK-NEXT: sw a0, %lo(S+8)(a4)
; CHECK-NEXT: sw a3, %lo(S+4)(a4)
; CHECK-NEXT: sw a2, %lo(S)(a4)
-; CHECK-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
-; CHECK-NEXT: sw a0, 260(sp)
-; CHECK-NEXT: sw s10, 256(sp)
-; CHECK-NEXT: sw s2, 252(sp)
-; CHECK-NEXT: sw s0, 248(sp)
-; CHECK-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; CHECK-NEXT: sw a0, 276(sp)
-; CHECK-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
-; CHECK-NEXT: sw a0, 272(sp)
-; CHECK-NEXT: lw a0, 36(sp) # 4-byte Folded Reload
-; CHECK-NEXT: sw a0, 268(sp)
-; CHECK-NEXT: addi a0, sp, 280
-; CHECK-NEXT: addi a1, sp, 264
-; CHECK-NEXT: addi a2, sp, 248
-; CHECK-NEXT: lw a3, 44(sp) # 4-byte Folded Reload
-; CHECK-NEXT: sw a3, 264(sp)
+; CHECK-NEXT: sw s1, 264(sp)
+; CHECK-NEXT: sw s2, 268(sp)
+; CHECK-NEXT: sw s3, 272(sp)
+; CHECK-NEXT: sw s4, 276(sp)
+; CHECK-NEXT: addi a0, sp, 296
+; CHECK-NEXT: addi a1, sp, 280
+; CHECK-NEXT: addi a2, sp, 264
+; CHECK-NEXT: lw a3, 68(sp) # 4-byte Folded Reload
+; CHECK-NEXT: sw a3, 280(sp)
+; CHECK-NEXT: lw a3, 64(sp) # 4-byte Folded Reload
+; CHECK-NEXT: sw a3, 284(sp)
+; CHECK-NEXT: lw a3, 60(sp) # 4-byte Folded Reload
+; CHECK-NEXT: sw a3, 288(sp)
+; CHECK-NEXT: lw a3, 56(sp) # 4-byte Folded Reload
+; CHECK-NEXT: sw a3, 292(sp)
; CHECK-NEXT: call __subtf3
-; CHECK-NEXT: lw a0, 288(sp)
-; CHECK-NEXT: lw a1, 292(sp)
-; CHECK-NEXT: lw a2, 280(sp)
-; CHECK-NEXT: lw a3, 284(sp)
+; CHECK-NEXT: lw a0, 304(sp)
+; CHECK-NEXT: lw a1, 308(sp)
+; CHECK-NEXT: lw a2, 296(sp)
+; CHECK-NEXT: lw a3, 300(sp)
; CHECK-NEXT: lui a4, %hi(T)
; CHECK-NEXT: sw a1, %lo(T+12)(a4)
; CHECK-NEXT: sw a0, %lo(T+8)(a4)
; CHECK-NEXT: sw a3, %lo(T+4)(a4)
; CHECK-NEXT: sw a2, %lo(T)(a4)
-; CHECK-NEXT: sw zero, 164(sp)
-; CHECK-NEXT: sw zero, 160(sp)
-; CHECK-NEXT: sw zero, 156(sp)
-; CHECK-NEXT: sw zero, 152(sp)
-; CHECK-NEXT: sw s7, 180(sp)
-; CHECK-NEXT: sw s6, 176(sp)
-; CHECK-NEXT: lw a0, 0(sp) # 4-byte Folded Reload
-; CHECK-NEXT: sw a0, 172(sp)
-; CHECK-NEXT: addi a0, sp, 184
-; CHECK-NEXT: addi a1, sp, 168
-; CHECK-NEXT: addi a2, sp, 152
-; CHECK-NEXT: sw s1, 168(sp)
+; CHECK-NEXT: sw zero, 168(sp)
+; CHECK-NEXT: sw zero, 172(sp)
+; CHECK-NEXT: sw zero, 176(sp)
+; CHECK-NEXT: sw zero, 180(sp)
+; CHECK-NEXT: addi a0, sp, 200
+; CHECK-NEXT: addi a1, sp, 184
+; CHECK-NEXT: addi a2, sp, 168
+; CHECK-NEXT: lw a3, 36(sp) # 4-byte Folded Reload
+; CHECK-NEXT: sw a3, 184(sp)
+; CHECK-NEXT: lw a3, 32(sp) # 4-byte Folded Reload
+; CHECK-NEXT: sw a3, 188(sp)
+; CHECK-NEXT: lw a3, 28(sp) # 4-byte Folded Reload
+; CHECK-NEXT: sw a3, 192(sp)
+; CHECK-NEXT: lw a3, 24(sp) # 4-byte Folded Reload
+; CHECK-NEXT: sw a3, 196(sp)
; CHECK-NEXT: call __addtf3
-; CHECK-NEXT: lw a0, 192(sp)
-; CHECK-NEXT: lw a1, 196(sp)
-; CHECK-NEXT: lw a2, 184(sp)
-; CHECK-NEXT: lw a3, 188(sp)
+; CHECK-NEXT: lw a0, 208(sp)
+; CHECK-NEXT: lw a1, 212(sp)
+; CHECK-NEXT: lw a2, 200(sp)
+; CHECK-NEXT: lw a3, 204(sp)
; CHECK-NEXT: lui a4, %hi(Y)
; CHECK-NEXT: sw a1, %lo(Y+12)(a4)
; CHECK-NEXT: sw a0, %lo(Y+8)(a4)
; CHECK-NEXT: sw a3, %lo(Y+4)(a4)
; CHECK-NEXT: sw a2, %lo(Y)(a4)
-; CHECK-NEXT: sw zero, 116(sp)
-; CHECK-NEXT: sw zero, 112(sp)
-; CHECK-NEXT: sw zero, 108(sp)
-; CHECK-NEXT: sw zero, 104(sp)
-; CHECK-NEXT: sw s8, 132(sp)
-; CHECK-NEXT: sw s4, 128(sp)
-; CHECK-NEXT: sw s3, 124(sp)
-; CHECK-NEXT: addi a0, sp, 136
-; CHECK-NEXT: addi a1, sp, 120
-; CHECK-NEXT: addi a2, sp, 104
-; CHECK-NEXT: lw a3, 52(sp) # 4-byte Folded Reload
-; CHECK-NEXT: sw a3, 120(sp)
+; CHECK-NEXT: sw zero, 120(sp)
+; CHECK-NEXT: sw zero, 124(sp)
+; CHECK-NEXT: sw zero, 128(sp)
+; CHECK-NEXT: sw zero, 132(sp)
+; CHECK-NEXT: addi a0, sp, 152
+; CHECK-NEXT: addi a1, sp, 136
+; CHECK-NEXT: addi a2, sp, 120
+; CHECK-NEXT: sw s5, 136(sp)
+; CHECK-NEXT: sw s9, 140(sp)
+; CHECK-NEXT: sw s10, 144(sp)
+; CHECK-NEXT: sw s11, 148(sp)
; CHECK-NEXT: call __multf3
-; CHECK-NEXT: lw a3, 136(sp)
-; CHECK-NEXT: lw a0, 140(sp)
-; CHECK-NEXT: lw a1, 144(sp)
-; CHECK-NEXT: lw a2, 148(sp)
-; CHECK-NEXT: lui a4, 786400
-; CHECK-NEXT: sw a4, 68(sp)
-; CHECK-NEXT: sw zero, 64(sp)
-; CHECK-NEXT: sw zero, 60(sp)
-; CHECK-NEXT: sw zero, 56(sp)
-; CHECK-NEXT: sw a2, 84(sp)
-; CHECK-NEXT: sw a1, 80(sp)
-; CHECK-NEXT: sw a0, 76(sp)
-; CHECK-NEXT: addi a0, sp, 88
-; CHECK-NEXT: addi a1, sp, 72
-; CHECK-NEXT: addi a2, sp, 56
-; CHECK-NEXT: sw a3, 72(sp)
+; CHECK-NEXT: lw a3, 152(sp)
+; CHECK-NEXT: lw a4, 156(sp)
+; CHECK-NEXT: lw a5, 160(sp)
+; CHECK-NEXT: lw a6, 164(sp)
+; CHECK-NEXT: lui a0, 786400
+; CHECK-NEXT: sw zero, 72(sp)
+; CHECK-NEXT: sw zero, 76(sp)
+; CHECK-NEXT: sw zero, 80(sp)
+; CHECK-NEXT: sw a0, 84(sp)
+; CHECK-NEXT: addi a0, sp, 104
+; CHECK-NEXT: addi a1, sp, 88
+; CHECK-NEXT: addi a2, sp, 72
+; CHECK-NEXT: sw a3, 88(sp)
+; CHECK-NEXT: sw a4, 92(sp)
+; CHECK-NEXT: sw a5, 96(sp)
+; CHECK-NEXT: sw a6, 100(sp)
; CHECK-NEXT: call __addtf3
-; CHECK-NEXT: lw a0, 96(sp)
-; CHECK-NEXT: lw a1, 100(sp)
-; CHECK-NEXT: lw a2, 88(sp)
-; CHECK-NEXT: lw a3, 92(sp)
+; CHECK-NEXT: lw a0, 112(sp)
+; CHECK-NEXT: lw a1, 116(sp)
+; CHECK-NEXT: lw a2, 104(sp)
+; CHECK-NEXT: lw a3, 108(sp)
; CHECK-NEXT: lui a4, %hi(Y1)
; CHECK-NEXT: sw a0, %lo(Y1+8)(a4)
; CHECK-NEXT: sw a1, %lo(Y1+12)(a4)
; CHECK-NEXT: sw a2, %lo(Y1)(a4)
; CHECK-NEXT: sw a3, %lo(Y1+4)(a4)
-; CHECK-NEXT: lw ra, 684(sp) # 4-byte Folded Reload
-; CHECK-NEXT: lw s0, 680(sp) # 4-byte Folded Reload
-; CHECK-NEXT: lw s1, 676(sp) # 4-byte Folded Reload
-; CHECK-NEXT: lw s2, 672(sp) # 4-byte Folded Reload
-; CHECK-NEXT: lw s3, 668(sp) # 4-byte Folded Reload
-; CHECK-NEXT: lw s4, 664(sp) # 4-byte Folded Reload
-; CHECK-NEXT: lw s5, 660(sp) # 4-byte Folded Reload
-; CHECK-NEXT: lw s6, 656(sp) # 4-byte Folded Reload
-; CHECK-NEXT: lw s7, 652(sp) # 4-byte Folded Reload
-; CHECK-NEXT: lw s8, 648(sp) # 4-byte Folded Reload
-; CHECK-NEXT: lw s9, 644(sp) # 4-byte Folded Reload
-; CHECK-NEXT: lw s10, 640(sp) # 4-byte Folded Reload
-; CHECK-NEXT: lw s11, 636(sp) # 4-byte Folded Reload
-; CHECK-NEXT: addi sp, sp, 688
+; CHECK-NEXT: lw ra, 700(sp) # 4-byte Folded Reload
+; CHECK-NEXT: lw s0, 696(sp) # 4-byte Folded Reload
+; CHECK-NEXT: lw s1, 692(sp) # 4-byte Folded Reload
+; CHECK-NEXT: lw s2, 688(sp) # 4-byte Folded Reload
+; CHECK-NEXT: lw s3, 684(sp) # 4-byte Folded Reload
+; CHECK-NEXT: lw s4, 680(sp) # 4-byte Folded Reload
+; CHECK-NEXT: lw s5, 676(sp) # 4-byte Folded Reload
+; CHECK-NEXT: lw s6, 672(sp) # 4-byte Folded Reload
+; CHECK-NEXT: lw s7, 668(sp) # 4-byte Folded Reload
+; CHECK-NEXT: lw s8, 664(sp) # 4-byte Folded Reload
+; CHECK-NEXT: lw s9, 660(sp) # 4-byte Folded Reload
+; CHECK-NEXT: lw s10, 656(sp) # 4-byte Folded Reload
+; CHECK-NEXT: lw s11, 652(sp) # 4-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 704
; CHECK-NEXT: ret
%1 = load fp128, ptr @U, align 16
%2 = fsub fp128 0xL00000000000000000000000000000000, %1
diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll
index 4dd6ed68ff9811..f7ef01b0958d87 100644
--- a/llvm/test/CodeGen/RISCV/tail-calls.ll
+++ b/llvm/test/CodeGen/RISCV/tail-calls.ll
@@ -213,12 +213,12 @@ define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g
; CHECK-NEXT: lw t3, 44(sp)
; CHECK-NEXT: lw t4, 48(sp)
; CHECK-NEXT: lw t5, 52(sp)
-; CHECK-NEXT: sw t5, 20(sp)
; CHECK-NEXT: sw t4, 16(sp)
-; CHECK-NEXT: sw t3, 12(sp)
-; CHECK-NEXT: sw t2, 8(sp)
-; CHECK-NEXT: sw t1, 4(sp)
+; CHECK-NEXT: sw t5, 20(sp)
; CHECK-NEXT: sw t0, 0(sp)
+; CHECK-NEXT: sw t1, 4(sp)
+; CHECK-NEXT: sw t2, 8(sp)
+; CHECK-NEXT: sw t3, 12(sp)
; CHECK-NEXT: call callee_args
; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 32
@@ -235,15 +235,15 @@ define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g
; CHECK-LARGE-ZICFILP-NEXT: lw t4, 44(sp)
; CHECK-LARGE-ZICFILP-NEXT: lw t2, 48(sp)
; CHECK-LARGE-ZICFILP-NEXT: lw t5, 52(sp)
-; CHECK-LARGE-ZICFILP-NEXT: sw t5, 20(sp)
; CHECK-LARGE-ZICFILP-NEXT: sw t2, 16(sp)
+; CHECK-LARGE-ZICFILP-NEXT: sw t5, 20(sp)
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi8:
; CHECK-LARGE-ZICFILP-NEXT: auipc t2, %pcrel_hi(.LCPI6_0)
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi8)(t2)
-; CHECK-LARGE-ZICFILP-NEXT: sw t4, 12(sp)
-; CHECK-LARGE-ZICFILP-NEXT: sw t3, 8(sp)
-; CHECK-LARGE-ZICFILP-NEXT: sw t1, 4(sp)
; CHECK-LARGE-ZICFILP-NEXT: sw t0, 0(sp)
+; CHECK-LARGE-ZICFILP-NEXT: sw t1, 4(sp)
+; CHECK-LARGE-ZICFILP-NEXT: sw t3, 8(sp)
+; CHECK-LARGE-ZICFILP-NEXT: sw t4, 12(sp)
; CHECK-LARGE-ZICFILP-NEXT: jalr t2
; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32
@@ -260,12 +260,12 @@ define void @caller_indirect_args() nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, -32
; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; CHECK-NEXT: lui a0, 262128
-; CHECK-NEXT: sw a0, 12(sp)
-; CHECK-NEXT: sw zero, 8(sp)
-; CHECK-NEXT: sw zero, 4(sp)
+; CHECK-NEXT: lui a1, 262128
; CHECK-NEXT: mv a0, sp
; CHECK-NEXT: sw zero, 0(sp)
+; CHECK-NEXT: sw zero, 4(sp)
+; CHECK-NEXT: sw zero, 8(sp)
+; CHECK-NEXT: sw a1, 12(sp)
; CHECK-NEXT: call callee_indirect_args
; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 32
@@ -276,15 +276,15 @@ define void @caller_indirect_args() nounwind {
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -32
; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; CHECK-LARGE-ZICFILP-NEXT: lui a0, 262128
-; CHECK-LARGE-ZICFILP-NEXT: sw a0, 12(sp)
+; CHECK-LARGE-ZICFILP-NEXT: lui a1, 262128
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi9:
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI7_0)
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi9)(a0)
-; CHECK-LARGE-ZICFILP-NEXT: sw zero, 8(sp)
-; CHECK-LARGE-ZICFILP-NEXT: sw zero, 4(sp)
; CHECK-LARGE-ZICFILP-NEXT: mv a0, sp
; CHECK-LARGE-ZICFILP-NEXT: sw zero, 0(sp)
+; CHECK-LARGE-ZICFILP-NEXT: sw zero, 4(sp)
+; CHECK-LARGE-ZICFILP-NEXT: sw zero, 8(sp)
+; CHECK-LARGE-ZICFILP-NEXT: sw a1, 12(sp)
; CHECK-LARGE-ZICFILP-NEXT: jalr t2
; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32
diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
index 0ca9fc065ff2d4..a2f5e446b63bcd 100644
--- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
@@ -247,9 +247,9 @@ define void @store_i8(ptr %p, i8 %v) {
define void @store_i16(ptr %p, i16 %v) {
; SLOW-LABEL: store_i16:
; SLOW: # %bb.0:
+; SLOW-NEXT: srli a2, a1, 8
; SLOW-NEXT: sb a1, 0(a0)
-; SLOW-NEXT: srli a1, a1, 8
-; SLOW-NEXT: sb a1, 1(a0)
+; SLOW-NEXT: sb a2, 1(a0)
; SLOW-NEXT: ret
;
; FAST-LABEL: store_i16:
@@ -263,18 +263,18 @@ define void @store_i16(ptr %p, i16 %v) {
define void @store_i24(ptr %p, i24 %v) {
; SLOW-LABEL: store_i24:
; SLOW: # %bb.0:
-; SLOW-NEXT: sb a1, 0(a0)
; SLOW-NEXT: srli a2, a1, 8
+; SLOW-NEXT: srli a3, a1, 16
+; SLOW-NEXT: sb a1, 0(a0)
; SLOW-NEXT: sb a2, 1(a0)
-; SLOW-NEXT: srli a1, a1, 16
-; SLOW-NEXT: sb a1, 2(a0)
+; SLOW-NEXT: sb a3, 2(a0)
; SLOW-NEXT: ret
;
; FAST-LABEL: store_i24:
; FAST: # %bb.0:
+; FAST-NEXT: srli a2, a1, 16
; FAST-NEXT: sh a1, 0(a0)
-; FAST-NEXT: srli a1, a1, 16
-; FAST-NEXT: sb a1, 2(a0)
+; FAST-NEXT: sb a2, 2(a0)
; FAST-NEXT: ret
store i24 %v, ptr %p, align 1
ret void
@@ -283,13 +283,13 @@ define void @store_i24(ptr %p, i24 %v) {
define void @store_i32(ptr %p, i32 %v) {
; SLOW-LABEL: store_i32:
; SLOW: # %bb.0:
-; SLOW-NEXT: sb a1, 0(a0)
; SLOW-NEXT: srli a2, a1, 24
+; SLOW-NEXT: srli a3, a1, 16
+; SLOW-NEXT: srli a4, a1, 8
+; SLOW-NEXT: sb a1, 0(a0)
+; SLOW-NEXT: sb a4, 1(a0)
+; SLOW-NEXT: sb a3, 2(a0)
; SLOW-NEXT: sb a2, 3(a0)
-; SLOW-NEXT: srli a2, a1, 16
-; SLOW-NEXT: sb a2, 2(a0)
-; SLOW-NEXT: srli a1, a1, 8
-; SLOW-NEXT: sb a1, 1(a0)
; SLOW-NEXT: ret
;
; FAST-LABEL: store_i32:
@@ -303,82 +303,82 @@ define void @store_i32(ptr %p, i32 %v) {
define void @store_i64(ptr %p, i64 %v) {
; RV32I-LABEL: store_i64:
; RV32I: # %bb.0:
-; RV32I-NEXT: sb a2, 4(a0)
-; RV32I-NEXT: sb a1, 0(a0)
; RV32I-NEXT: srli a3, a2, 24
+; RV32I-NEXT: srli a4, a2, 16
+; RV32I-NEXT: srli a5, a2, 8
+; RV32I-NEXT: sb a2, 4(a0)
+; RV32I-NEXT: sb a5, 5(a0)
+; RV32I-NEXT: sb a4, 6(a0)
; RV32I-NEXT: sb a3, 7(a0)
-; RV32I-NEXT: srli a3, a2, 16
-; RV32I-NEXT: sb a3, 6(a0)
-; RV32I-NEXT: srli a2, a2, 8
-; RV32I-NEXT: sb a2, 5(a0)
; RV32I-NEXT: srli a2, a1, 24
+; RV32I-NEXT: srli a3, a1, 16
+; RV32I-NEXT: srli a4, a1, 8
+; RV32I-NEXT: sb a1, 0(a0)
+; RV32I-NEXT: sb a4, 1(a0)
+; RV32I-NEXT: sb a3, 2(a0)
; RV32I-NEXT: sb a2, 3(a0)
-; RV32I-NEXT: srli a2, a1, 16
-; RV32I-NEXT: sb a2, 2(a0)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 1(a0)
; RV32I-NEXT: ret
;
; RV64I-LABEL: store_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: sb a1, 0(a0)
; RV64I-NEXT: srli a2, a1, 56
+; RV64I-NEXT: srli a3, a1, 48
+; RV64I-NEXT: srli a4, a1, 40
+; RV64I-NEXT: srli a5, a1, 32
+; RV64I-NEXT: sb a5, 4(a0)
+; RV64I-NEXT: sb a4, 5(a0)
+; RV64I-NEXT: sb a3, 6(a0)
; RV64I-NEXT: sb a2, 7(a0)
-; RV64I-NEXT: srli a2, a1, 48
-; RV64I-NEXT: sb a2, 6(a0)
-; RV64I-NEXT: srli a2, a1, 40
-; RV64I-NEXT: sb a2, 5(a0)
-; RV64I-NEXT: srli a2, a1, 32
-; RV64I-NEXT: sb a2, 4(a0)
; RV64I-NEXT: srli a2, a1, 24
+; RV64I-NEXT: srli a3, a1, 16
+; RV64I-NEXT: srli a4, a1, 8
+; RV64I-NEXT: sb a1, 0(a0)
+; RV64I-NEXT: sb a4, 1(a0)
+; RV64I-NEXT: sb a3, 2(a0)
; RV64I-NEXT: sb a2, 3(a0)
-; RV64I-NEXT: srli a2, a1, 16
-; RV64I-NEXT: sb a2, 2(a0)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 1(a0)
; RV64I-NEXT: ret
;
; RV32IZBKB-LABEL: store_i64:
; RV32IZBKB: # %bb.0:
-; RV32IZBKB-NEXT: sb a2, 4(a0)
-; RV32IZBKB-NEXT: sb a1, 0(a0)
; RV32IZBKB-NEXT: srli a3, a2, 24
+; RV32IZBKB-NEXT: srli a4, a2, 16
+; RV32IZBKB-NEXT: srli a5, a2, 8
+; RV32IZBKB-NEXT: sb a2, 4(a0)
+; RV32IZBKB-NEXT: sb a5, 5(a0)
+; RV32IZBKB-NEXT: sb a4, 6(a0)
; RV32IZBKB-NEXT: sb a3, 7(a0)
-; RV32IZBKB-NEXT: srli a3, a2, 16
-; RV32IZBKB-NEXT: sb a3, 6(a0)
-; RV32IZBKB-NEXT: srli a2, a2, 8
-; RV32IZBKB-NEXT: sb a2, 5(a0)
; RV32IZBKB-NEXT: srli a2, a1, 24
+; RV32IZBKB-NEXT: srli a3, a1, 16
+; RV32IZBKB-NEXT: srli a4, a1, 8
+; RV32IZBKB-NEXT: sb a1, 0(a0)
+; RV32IZBKB-NEXT: sb a4, 1(a0)
+; RV32IZBKB-NEXT: sb a3, 2(a0)
; RV32IZBKB-NEXT: sb a2, 3(a0)
-; RV32IZBKB-NEXT: srli a2, a1, 16
-; RV32IZBKB-NEXT: sb a2, 2(a0)
-; RV32IZBKB-NEXT: srli a1, a1, 8
-; RV32IZBKB-NEXT: sb a1, 1(a0)
; RV32IZBKB-NEXT: ret
;
; RV64IZBKB-LABEL: store_i64:
; RV64IZBKB: # %bb.0:
-; RV64IZBKB-NEXT: sb a1, 0(a0)
; RV64IZBKB-NEXT: srli a2, a1, 56
+; RV64IZBKB-NEXT: srli a3, a1, 48
+; RV64IZBKB-NEXT: srli a4, a1, 40
+; RV64IZBKB-NEXT: srli a5, a1, 32
+; RV64IZBKB-NEXT: sb a5, 4(a0)
+; RV64IZBKB-NEXT: sb a4, 5(a0)
+; RV64IZBKB-NEXT: sb a3, 6(a0)
; RV64IZBKB-NEXT: sb a2, 7(a0)
-; RV64IZBKB-NEXT: srli a2, a1, 48
-; RV64IZBKB-NEXT: sb a2, 6(a0)
-; RV64IZBKB-NEXT: srli a2, a1, 40
-; RV64IZBKB-NEXT: sb a2, 5(a0)
-; RV64IZBKB-NEXT: srli a2, a1, 32
-; RV64IZBKB-NEXT: sb a2, 4(a0)
; RV64IZBKB-NEXT: srli a2, a1, 24
+; RV64IZBKB-NEXT: srli a3, a1, 16
+; RV64IZBKB-NEXT: srli a4, a1, 8
+; RV64IZBKB-NEXT: sb a1, 0(a0)
+; RV64IZBKB-NEXT: sb a4, 1(a0)
+; RV64IZBKB-NEXT: sb a3, 2(a0)
; RV64IZBKB-NEXT: sb a2, 3(a0)
-; RV64IZBKB-NEXT: srli a2, a1, 16
-; RV64IZBKB-NEXT: sb a2, 2(a0)
-; RV64IZBKB-NEXT: srli a1, a1, 8
-; RV64IZBKB-NEXT: sb a1, 1(a0)
; RV64IZBKB-NEXT: ret
;
; RV32I-FAST-LABEL: store_i64:
; RV32I-FAST: # %bb.0:
-; RV32I-FAST-NEXT: sw a2, 4(a0)
; RV32I-FAST-NEXT: sw a1, 0(a0)
+; RV32I-FAST-NEXT: sw a2, 4(a0)
; RV32I-FAST-NEXT: ret
;
; RV64I-FAST-LABEL: store_i64:
@@ -543,31 +543,31 @@ define void @store_large_constant(ptr %x) {
; SLOW-LABEL: store_large_constant:
; SLOW: # %bb.0:
; SLOW-NEXT: li a1, -2
+; SLOW-NEXT: li a2, 220
+; SLOW-NEXT: li a3, 186
+; SLOW-NEXT: li a4, 152
+; SLOW-NEXT: sb a4, 4(a0)
+; SLOW-NEXT: sb a3, 5(a0)
+; SLOW-NEXT: sb a2, 6(a0)
; SLOW-NEXT: sb a1, 7(a0)
-; SLOW-NEXT: li a1, 220
-; SLOW-NEXT: sb a1, 6(a0)
-; SLOW-NEXT: li a1, 186
-; SLOW-NEXT: sb a1, 5(a0)
-; SLOW-NEXT: li a1, 152
-; SLOW-NEXT: sb a1, 4(a0)
; SLOW-NEXT: li a1, 118
+; SLOW-NEXT: li a2, 84
+; SLOW-NEXT: li a3, 50
+; SLOW-NEXT: li a4, 16
+; SLOW-NEXT: sb a4, 0(a0)
+; SLOW-NEXT: sb a3, 1(a0)
+; SLOW-NEXT: sb a2, 2(a0)
; SLOW-NEXT: sb a1, 3(a0)
-; SLOW-NEXT: li a1, 84
-; SLOW-NEXT: sb a1, 2(a0)
-; SLOW-NEXT: li a1, 50
-; SLOW-NEXT: sb a1, 1(a0)
-; SLOW-NEXT: li a1, 16
-; SLOW-NEXT: sb a1, 0(a0)
; SLOW-NEXT: ret
;
; RV32I-FAST-LABEL: store_large_constant:
; RV32I-FAST: # %bb.0:
; RV32I-FAST-NEXT: lui a1, 1043916
; RV32I-FAST-NEXT: addi a1, a1, -1384
+; RV32I-FAST-NEXT: lui a2, 484675
+; RV32I-FAST-NEXT: addi a2, a2, 528
+; RV32I-FAST-NEXT: sw a2, 0(a0)
; RV32I-FAST-NEXT: sw a1, 4(a0)
-; RV32I-FAST-NEXT: lui a1, 484675
-; RV32I-FAST-NEXT: addi a1, a1, 528
-; RV32I-FAST-NEXT: sw a1, 0(a0)
; RV32I-FAST-NEXT: ret
;
; RV64I-FAST-LABEL: store_large_constant:
diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index 5a5ae66b5fa767..0ee067b673da9a 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -362,14 +362,14 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV32-NEXT: addi s1, s1, -1
; RV32-NEXT: slli a1, s1, 21
; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: sb a1, 4(s0)
-; RV32-NEXT: andi a1, s3, 2047
+; RV32-NEXT: andi a2, s3, 2047
; RV32-NEXT: andi a0, a0, 2047
; RV32-NEXT: slli a0, a0, 11
; RV32-NEXT: slli s1, s1, 22
; RV32-NEXT: or a0, a0, s1
-; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: or a0, a2, a0
; RV32-NEXT: sw a0, 0(s0)
+; RV32-NEXT: sb a1, 4(s0)
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -423,10 +423,10 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV64-NEXT: slli a0, a0, 22
; RV64-NEXT: or a0, a2, a0
; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: slli a1, a0, 31
+; RV64-NEXT: srli a1, a1, 63
; RV64-NEXT: sw a0, 0(s0)
-; RV64-NEXT: slli a0, a0, 31
-; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: sb a0, 4(s0)
+; RV64-NEXT: sb a1, 4(s0)
; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -467,7 +467,6 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV32M-NEXT: addi a1, a1, -1
; RV32M-NEXT: slli a4, a1, 21
; RV32M-NEXT: srli a4, a4, 31
-; RV32M-NEXT: sb a4, 4(a0)
; RV32M-NEXT: andi a2, a2, 2047
; RV32M-NEXT: andi a3, a3, 2047
; RV32M-NEXT: slli a3, a3, 11
@@ -475,6 +474,7 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV32M-NEXT: or a1, a3, a1
; RV32M-NEXT: or a1, a2, a1
; RV32M-NEXT: sw a1, 0(a0)
+; RV32M-NEXT: sb a4, 4(a0)
; RV32M-NEXT: ret
;
; RV64M-LABEL: test_urem_vec:
@@ -513,10 +513,10 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV64M-NEXT: slli a2, a2, 22
; RV64M-NEXT: or a2, a3, a2
; RV64M-NEXT: or a1, a1, a2
+; RV64M-NEXT: slli a2, a1, 31
+; RV64M-NEXT: srli a2, a2, 63
; RV64M-NEXT: sw a1, 0(a0)
-; RV64M-NEXT: slli a1, a1, 31
-; RV64M-NEXT: srli a1, a1, 63
-; RV64M-NEXT: sb a1, 4(a0)
+; RV64M-NEXT: sb a2, 4(a0)
; RV64M-NEXT: ret
;
; RV32MV-LABEL: test_urem_vec:
@@ -568,17 +568,17 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV32MV-NEXT: vmv.x.s a1, v9
; RV32MV-NEXT: slli a2, a1, 21
; RV32MV-NEXT: srli a2, a2, 31
-; RV32MV-NEXT: sb a2, 4(a0)
-; RV32MV-NEXT: vmv.x.s a2, v8
-; RV32MV-NEXT: andi a2, a2, 2047
+; RV32MV-NEXT: vmv.x.s a3, v8
+; RV32MV-NEXT: andi a3, a3, 2047
; RV32MV-NEXT: vslidedown.vi v8, v8, 1
+; RV32MV-NEXT: slli a1, a1, 22
+; RV32MV-NEXT: or a1, a3, a1
; RV32MV-NEXT: vmv.x.s a3, v8
; RV32MV-NEXT: andi a3, a3, 2047
; RV32MV-NEXT: slli a3, a3, 11
-; RV32MV-NEXT: slli a1, a1, 22
-; RV32MV-NEXT: or a1, a2, a1
; RV32MV-NEXT: or a1, a1, a3
; RV32MV-NEXT: sw a1, 0(a0)
+; RV32MV-NEXT: sb a2, 4(a0)
; RV32MV-NEXT: ret
;
; RV64MV-LABEL: test_urem_vec:
@@ -636,10 +636,10 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV64MV-NEXT: slli a3, a3, 22
; RV64MV-NEXT: or a1, a1, a3
; RV64MV-NEXT: or a1, a1, a2
+; RV64MV-NEXT: slli a2, a1, 31
+; RV64MV-NEXT: srli a2, a2, 63
; RV64MV-NEXT: sw a1, 0(a0)
-; RV64MV-NEXT: slli a1, a1, 31
-; RV64MV-NEXT: srli a1, a1, 63
-; RV64MV-NEXT: sb a1, 4(a0)
+; RV64MV-NEXT: sb a2, 4(a0)
; RV64MV-NEXT: ret
%ld = load <3 x i11>, ptr %X
%urem = urem <3 x i11> %ld, <i11 6, i11 7, i11 -5>
diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
index b0e790ed606350..01f06474f78c23 100644
--- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
@@ -39,10 +39,10 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind {
; RV32I-NEXT: li a1, 1003
; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __umodsi3
-; RV32I-NEXT: sh a0, 6(s3)
-; RV32I-NEXT: sh s1, 4(s3)
-; RV32I-NEXT: sh s0, 2(s3)
; RV32I-NEXT: sh s4, 0(s3)
+; RV32I-NEXT: sh s0, 2(s3)
+; RV32I-NEXT: sh s1, 4(s3)
+; RV32I-NEXT: sh a0, 6(s3)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -83,10 +83,10 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind {
; RV32IM-NEXT: li a6, 1003
; RV32IM-NEXT: mul a5, a5, a6
; RV32IM-NEXT: sub a1, a1, a5
-; RV32IM-NEXT: sh a1, 6(a0)
-; RV32IM-NEXT: sh a4, 4(a0)
; RV32IM-NEXT: sh a2, 0(a0)
; RV32IM-NEXT: sh a3, 2(a0)
+; RV32IM-NEXT: sh a4, 4(a0)
+; RV32IM-NEXT: sh a1, 6(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: fold_urem_vec_1:
@@ -118,10 +118,10 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind {
; RV64I-NEXT: li a1, 1003
; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __umoddi3
-; RV64I-NEXT: sh a0, 6(s3)
-; RV64I-NEXT: sh s1, 4(s3)
-; RV64I-NEXT: sh s0, 2(s3)
; RV64I-NEXT: sh s4, 0(s3)
+; RV64I-NEXT: sh s0, 2(s3)
+; RV64I-NEXT: sh s1, 4(s3)
+; RV64I-NEXT: sh a0, 6(s3)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -162,10 +162,10 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind {
; RV64IM-NEXT: li a6, 1003
; RV64IM-NEXT: mul a3, a3, a6
; RV64IM-NEXT: subw a1, a1, a3
-; RV64IM-NEXT: sh a1, 6(a0)
-; RV64IM-NEXT: sh a5, 4(a0)
; RV64IM-NEXT: sh a4, 0(a0)
; RV64IM-NEXT: sh a2, 2(a0)
+; RV64IM-NEXT: sh a5, 4(a0)
+; RV64IM-NEXT: sh a1, 6(a0)
; RV64IM-NEXT: ret
%1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003>
ret <4 x i16> %1
@@ -201,10 +201,10 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
; RV32I-NEXT: li a1, 95
; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __umodsi3
-; RV32I-NEXT: sh a0, 6(s3)
-; RV32I-NEXT: sh s1, 4(s3)
-; RV32I-NEXT: sh s0, 2(s3)
; RV32I-NEXT: sh s4, 0(s3)
+; RV32I-NEXT: sh s0, 2(s3)
+; RV32I-NEXT: sh s1, 4(s3)
+; RV32I-NEXT: sh a0, 6(s3)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -235,10 +235,10 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
; RV32IM-NEXT: mulhu a5, a1, a5
; RV32IM-NEXT: mul a5, a5, a7
; RV32IM-NEXT: sub a1, a1, a5
-; RV32IM-NEXT: sh a1, 6(a0)
-; RV32IM-NEXT: sh a4, 4(a0)
-; RV32IM-NEXT: sh a3, 2(a0)
; RV32IM-NEXT: sh a2, 0(a0)
+; RV32IM-NEXT: sh a3, 2(a0)
+; RV32IM-NEXT: sh a4, 4(a0)
+; RV32IM-NEXT: sh a1, 6(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: fold_urem_vec_2:
@@ -270,10 +270,10 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
; RV64I-NEXT: li a1, 95
; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __umoddi3
-; RV64I-NEXT: sh a0, 6(s3)
-; RV64I-NEXT: sh s1, 4(s3)
-; RV64I-NEXT: sh s0, 2(s3)
; RV64I-NEXT: sh s4, 0(s3)
+; RV64I-NEXT: sh s0, 2(s3)
+; RV64I-NEXT: sh s1, 4(s3)
+; RV64I-NEXT: sh a0, 6(s3)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -304,10 +304,10 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
; RV64IM-NEXT: mulhu a2, a1, a2
; RV64IM-NEXT: mul a2, a2, a7
; RV64IM-NEXT: subw a1, a1, a2
-; RV64IM-NEXT: sh a1, 6(a0)
-; RV64IM-NEXT: sh a5, 4(a0)
-; RV64IM-NEXT: sh a4, 2(a0)
; RV64IM-NEXT: sh a3, 0(a0)
+; RV64IM-NEXT: sh a4, 2(a0)
+; RV64IM-NEXT: sh a5, 4(a0)
+; RV64IM-NEXT: sh a1, 6(a0)
; RV64IM-NEXT: ret
%1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
ret <4 x i16> %1
@@ -369,10 +369,10 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
; RV32I-NEXT: add s2, s7, s2
; RV32I-NEXT: add s3, s6, s3
; RV32I-NEXT: add s4, s5, s4
-; RV32I-NEXT: sh s4, 6(s0)
-; RV32I-NEXT: sh s3, 4(s0)
-; RV32I-NEXT: sh s2, 2(s0)
; RV32I-NEXT: sh a0, 0(s0)
+; RV32I-NEXT: sh s2, 2(s0)
+; RV32I-NEXT: sh s3, 4(s0)
+; RV32I-NEXT: sh s4, 6(s0)
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
@@ -411,10 +411,10 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
; RV32IM-NEXT: sub a1, a1, t2
; RV32IM-NEXT: add a2, a2, a6
; RV32IM-NEXT: sub a2, a2, t0
-; RV32IM-NEXT: sh a2, 6(a0)
-; RV32IM-NEXT: sh a1, 4(a0)
-; RV32IM-NEXT: sh a4, 2(a0)
; RV32IM-NEXT: sh a3, 0(a0)
+; RV32IM-NEXT: sh a4, 2(a0)
+; RV32IM-NEXT: sh a1, 4(a0)
+; RV32IM-NEXT: sh a2, 6(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: combine_urem_udiv:
@@ -470,10 +470,10 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
; RV64I-NEXT: add s2, s7, s2
; RV64I-NEXT: add s3, s6, s3
; RV64I-NEXT: add s4, s5, s4
-; RV64I-NEXT: sh s4, 6(s0)
-; RV64I-NEXT: sh s3, 4(s0)
-; RV64I-NEXT: sh s2, 2(s0)
; RV64I-NEXT: sh a0, 0(s0)
+; RV64I-NEXT: sh s2, 2(s0)
+; RV64I-NEXT: sh s3, 4(s0)
+; RV64I-NEXT: sh s4, 6(s0)
; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
@@ -512,10 +512,10 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
; RV64IM-NEXT: subw a1, a1, t2
; RV64IM-NEXT: add a2, a2, a6
; RV64IM-NEXT: subw a2, a2, t0
-; RV64IM-NEXT: sh a2, 6(a0)
-; RV64IM-NEXT: sh a1, 4(a0)
-; RV64IM-NEXT: sh a4, 2(a0)
; RV64IM-NEXT: sh a3, 0(a0)
+; RV64IM-NEXT: sh a4, 2(a0)
+; RV64IM-NEXT: sh a1, 4(a0)
+; RV64IM-NEXT: sh a2, 6(a0)
; RV64IM-NEXT: ret
%1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
%2 = udiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
@@ -544,10 +544,10 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind {
; RV32I-NEXT: andi a1, s1, 63
; RV32I-NEXT: andi a2, s2, 31
; RV32I-NEXT: andi a3, s3, 7
-; RV32I-NEXT: sh a0, 6(s0)
-; RV32I-NEXT: sh a3, 4(s0)
-; RV32I-NEXT: sh a2, 2(s0)
; RV32I-NEXT: sh a1, 0(s0)
+; RV32I-NEXT: sh a2, 2(s0)
+; RV32I-NEXT: sh a3, 4(s0)
+; RV32I-NEXT: sh a0, 6(s0)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -571,9 +571,9 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind {
; RV32IM-NEXT: andi a1, a1, 63
; RV32IM-NEXT: andi a2, a2, 31
; RV32IM-NEXT: andi a3, a3, 7
-; RV32IM-NEXT: sh a3, 4(a0)
-; RV32IM-NEXT: sh a2, 2(a0)
; RV32IM-NEXT: sh a1, 0(a0)
+; RV32IM-NEXT: sh a2, 2(a0)
+; RV32IM-NEXT: sh a3, 4(a0)
; RV32IM-NEXT: sh a4, 6(a0)
; RV32IM-NEXT: ret
;
@@ -596,10 +596,10 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind {
; RV64I-NEXT: andi a1, s1, 63
; RV64I-NEXT: andi a2, s2, 31
; RV64I-NEXT: andi a3, s3, 7
-; RV64I-NEXT: sh a0, 6(s0)
-; RV64I-NEXT: sh a3, 4(s0)
-; RV64I-NEXT: sh a2, 2(s0)
; RV64I-NEXT: sh a1, 0(s0)
+; RV64I-NEXT: sh a2, 2(s0)
+; RV64I-NEXT: sh a3, 4(s0)
+; RV64I-NEXT: sh a0, 6(s0)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -623,9 +623,9 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind {
; RV64IM-NEXT: andi a1, a1, 63
; RV64IM-NEXT: andi a5, a5, 31
; RV64IM-NEXT: andi a4, a4, 7
-; RV64IM-NEXT: sh a4, 4(a0)
-; RV64IM-NEXT: sh a5, 2(a0)
; RV64IM-NEXT: sh a1, 0(a0)
+; RV64IM-NEXT: sh a5, 2(a0)
+; RV64IM-NEXT: sh a4, 4(a0)
; RV64IM-NEXT: sh a2, 6(a0)
; RV64IM-NEXT: ret
%1 = urem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
@@ -658,10 +658,10 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
; RV32I-NEXT: addi a1, a0, 1327
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __umodsi3
-; RV32I-NEXT: sh a0, 6(s2)
-; RV32I-NEXT: sh s0, 4(s2)
-; RV32I-NEXT: sh s3, 2(s2)
; RV32I-NEXT: sh zero, 0(s2)
+; RV32I-NEXT: sh s3, 2(s2)
+; RV32I-NEXT: sh s0, 4(s2)
+; RV32I-NEXT: sh a0, 6(s2)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -695,9 +695,9 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
; RV32IM-NEXT: mul a4, a4, a5
; RV32IM-NEXT: sub a1, a1, a4
; RV32IM-NEXT: sh zero, 0(a0)
-; RV32IM-NEXT: sh a1, 6(a0)
-; RV32IM-NEXT: sh a3, 4(a0)
; RV32IM-NEXT: sh a2, 2(a0)
+; RV32IM-NEXT: sh a3, 4(a0)
+; RV32IM-NEXT: sh a1, 6(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: dont_fold_urem_one:
@@ -724,10 +724,10 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
; RV64I-NEXT: addiw a1, a0, 1327
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __umoddi3
-; RV64I-NEXT: sh a0, 6(s2)
-; RV64I-NEXT: sh s0, 4(s2)
-; RV64I-NEXT: sh s3, 2(s2)
; RV64I-NEXT: sh zero, 0(s2)
+; RV64I-NEXT: sh s3, 2(s2)
+; RV64I-NEXT: sh s0, 4(s2)
+; RV64I-NEXT: sh a0, 6(s2)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -761,9 +761,9 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
; RV64IM-NEXT: mul a2, a2, a5
; RV64IM-NEXT: subw a1, a1, a2
; RV64IM-NEXT: sh zero, 0(a0)
-; RV64IM-NEXT: sh a1, 6(a0)
-; RV64IM-NEXT: sh a4, 4(a0)
; RV64IM-NEXT: sh a3, 2(a0)
+; RV64IM-NEXT: sh a4, 4(a0)
+; RV64IM-NEXT: sh a1, 6(a0)
; RV64IM-NEXT: ret
%1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
ret <4 x i16> %1
@@ -793,15 +793,15 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw s0, 16(a1)
-; RV32I-NEXT: lw s1, 20(a1)
-; RV32I-NEXT: lw s2, 24(a1)
-; RV32I-NEXT: lw s3, 28(a1)
+; RV32I-NEXT: lw s1, 16(a1)
+; RV32I-NEXT: lw s2, 20(a1)
+; RV32I-NEXT: lw s3, 24(a1)
+; RV32I-NEXT: lw s4, 28(a1)
; RV32I-NEXT: lw a3, 0(a1)
; RV32I-NEXT: lw a4, 4(a1)
-; RV32I-NEXT: lw s4, 8(a1)
-; RV32I-NEXT: lw s5, 12(a1)
-; RV32I-NEXT: mv s6, a0
+; RV32I-NEXT: lw s5, 8(a1)
+; RV32I-NEXT: lw s6, 12(a1)
+; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: li a2, 1
; RV32I-NEXT: mv a0, a3
; RV32I-NEXT: mv a1, a4
@@ -810,33 +810,33 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
; RV32I-NEXT: mv s7, a0
; RV32I-NEXT: mv s8, a1
; RV32I-NEXT: li a2, 654
-; RV32I-NEXT: mv a0, s4
-; RV32I-NEXT: mv a1, s5
+; RV32I-NEXT: mv a0, s5
+; RV32I-NEXT: mv a1, s6
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __umoddi3
-; RV32I-NEXT: mv s4, a0
-; RV32I-NEXT: mv s5, a1
+; RV32I-NEXT: mv s5, a0
+; RV32I-NEXT: mv s6, a1
; RV32I-NEXT: li a2, 23
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a1, s2
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __umoddi3
-; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: mv s1, a1
+; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: addi a2, a0, 1327
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s3
+; RV32I-NEXT: mv a0, s3
+; RV32I-NEXT: mv a1, s4
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __umoddi3
-; RV32I-NEXT: sw a1, 28(s6)
-; RV32I-NEXT: sw a0, 24(s6)
-; RV32I-NEXT: sw s1, 20(s6)
-; RV32I-NEXT: sw s0, 16(s6)
-; RV32I-NEXT: sw s5, 12(s6)
-; RV32I-NEXT: sw s4, 8(s6)
-; RV32I-NEXT: sw s8, 4(s6)
-; RV32I-NEXT: sw s7, 0(s6)
+; RV32I-NEXT: sw s1, 16(s0)
+; RV32I-NEXT: sw s2, 20(s0)
+; RV32I-NEXT: sw a0, 24(s0)
+; RV32I-NEXT: sw a1, 28(s0)
+; RV32I-NEXT: sw s7, 0(s0)
+; RV32I-NEXT: sw s8, 4(s0)
+; RV32I-NEXT: sw s5, 8(s0)
+; RV32I-NEXT: sw s6, 12(s0)
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
@@ -863,15 +863,15 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s0, 16(a1)
-; RV32IM-NEXT: lw s1, 20(a1)
-; RV32IM-NEXT: lw s2, 24(a1)
-; RV32IM-NEXT: lw s3, 28(a1)
+; RV32IM-NEXT: lw s1, 16(a1)
+; RV32IM-NEXT: lw s2, 20(a1)
+; RV32IM-NEXT: lw s3, 24(a1)
+; RV32IM-NEXT: lw s4, 28(a1)
; RV32IM-NEXT: lw a3, 0(a1)
; RV32IM-NEXT: lw a4, 4(a1)
-; RV32IM-NEXT: lw s4, 8(a1)
-; RV32IM-NEXT: lw s5, 12(a1)
-; RV32IM-NEXT: mv s6, a0
+; RV32IM-NEXT: lw s5, 8(a1)
+; RV32IM-NEXT: lw s6, 12(a1)
+; RV32IM-NEXT: mv s0, a0
; RV32IM-NEXT: li a2, 1
; RV32IM-NEXT: mv a0, a3
; RV32IM-NEXT: mv a1, a4
@@ -880,33 +880,33 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
; RV32IM-NEXT: mv s7, a0
; RV32IM-NEXT: mv s8, a1
; RV32IM-NEXT: li a2, 654
-; RV32IM-NEXT: mv a0, s4
-; RV32IM-NEXT: mv a1, s5
+; RV32IM-NEXT: mv a0, s5
+; RV32IM-NEXT: mv a1, s6
; RV32IM-NEXT: li a3, 0
; RV32IM-NEXT: call __umoddi3
-; RV32IM-NEXT: mv s4, a0
-; RV32IM-NEXT: mv s5, a1
+; RV32IM-NEXT: mv s5, a0
+; RV32IM-NEXT: mv s6, a1
; RV32IM-NEXT: li a2, 23
-; RV32IM-NEXT: mv a0, s0
-; RV32IM-NEXT: mv a1, s1
+; RV32IM-NEXT: mv a0, s1
+; RV32IM-NEXT: mv a1, s2
; RV32IM-NEXT: li a3, 0
; RV32IM-NEXT: call __umoddi3
-; RV32IM-NEXT: mv s0, a0
-; RV32IM-NEXT: mv s1, a1
+; RV32IM-NEXT: mv s1, a0
+; RV32IM-NEXT: mv s2, a1
; RV32IM-NEXT: lui a0, 1
; RV32IM-NEXT: addi a2, a0, 1327
-; RV32IM-NEXT: mv a0, s2
-; RV32IM-NEXT: mv a1, s3
+; RV32IM-NEXT: mv a0, s3
+; RV32IM-NEXT: mv a1, s4
; RV32IM-NEXT: li a3, 0
; RV32IM-NEXT: call __umoddi3
-; RV32IM-NEXT: sw a1, 28(s6)
-; RV32IM-NEXT: sw a0, 24(s6)
-; RV32IM-NEXT: sw s1, 20(s6)
-; RV32IM-NEXT: sw s0, 16(s6)
-; RV32IM-NEXT: sw s5, 12(s6)
-; RV32IM-NEXT: sw s4, 8(s6)
-; RV32IM-NEXT: sw s8, 4(s6)
-; RV32IM-NEXT: sw s7, 0(s6)
+; RV32IM-NEXT: sw s1, 16(s0)
+; RV32IM-NEXT: sw s2, 20(s0)
+; RV32IM-NEXT: sw a0, 24(s0)
+; RV32IM-NEXT: sw a1, 28(s0)
+; RV32IM-NEXT: sw s7, 0(s0)
+; RV32IM-NEXT: sw s8, 4(s0)
+; RV32IM-NEXT: sw s5, 8(s0)
+; RV32IM-NEXT: sw s6, 12(s0)
; RV32IM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IM-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
@@ -944,10 +944,10 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
; RV64I-NEXT: addiw a1, a0, 1327
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __umoddi3
-; RV64I-NEXT: sd a0, 24(s2)
-; RV64I-NEXT: sd s0, 16(s2)
-; RV64I-NEXT: sd s3, 8(s2)
; RV64I-NEXT: sd zero, 0(s2)
+; RV64I-NEXT: sd s3, 8(s2)
+; RV64I-NEXT: sd s0, 16(s2)
+; RV64I-NEXT: sd a0, 24(s2)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -988,9 +988,9 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
; RV64IM-NEXT: mul a2, a2, a5
; RV64IM-NEXT: sub a1, a1, a2
; RV64IM-NEXT: sd zero, 0(a0)
-; RV64IM-NEXT: sd a1, 24(a0)
; RV64IM-NEXT: sd a4, 8(a0)
; RV64IM-NEXT: sd a3, 16(a0)
+; RV64IM-NEXT: sd a1, 24(a0)
; RV64IM-NEXT: ret
%1 = urem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>
ret <4 x i64> %1
diff --git a/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll b/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll
index 18bb4f5ad0f0c0..7c4c98a12f4b81 100644
--- a/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll
+++ b/llvm/test/CodeGen/RISCV/vararg-ilp32e.ll
@@ -64,10 +64,10 @@ define void @va_double(i32 %n, ...) {
; ILP32E-NEXT: sw ra, 4(sp) # 4-byte Folded Spill
; ILP32E-NEXT: .cfi_offset ra, -28
; ILP32E-NEXT: sw a5, 28(sp)
-; ILP32E-NEXT: sw a4, 24(sp)
-; ILP32E-NEXT: sw a3, 20(sp)
-; ILP32E-NEXT: sw a2, 16(sp)
; ILP32E-NEXT: sw a1, 12(sp)
+; ILP32E-NEXT: sw a2, 16(sp)
+; ILP32E-NEXT: sw a3, 20(sp)
+; ILP32E-NEXT: sw a4, 24(sp)
; ILP32E-NEXT: addi a0, sp, 12
; ILP32E-NEXT: sw a0, 0(sp)
; ILP32E-NEXT: addi a0, sp, 19
@@ -98,10 +98,10 @@ define void @va_double(i32 %n, ...) {
; ILP32E-WITHFP-NEXT: addi s0, sp, 12
; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24
; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
-; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
-; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
-; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
; ILP32E-WITHFP-NEXT: addi a0, s0, 4
; ILP32E-WITHFP-NEXT: sw a0, -12(s0)
; ILP32E-WITHFP-NEXT: addi a0, s0, 11
diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll
index d2c30c54390702..c53e5a78d1988c 100644
--- a/llvm/test/CodeGen/RISCV/vararg.ll
+++ b/llvm/test/CodeGen/RISCV/vararg.ll
@@ -55,13 +55,13 @@ define i32 @va1(ptr %fmt, ...) {
; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -48
; ILP32-ILP32F-FPELIM-NEXT: .cfi_def_cfa_offset 48
; ILP32-ILP32F-FPELIM-NEXT: mv a0, a1
-; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp)
; ILP32-ILP32F-FPELIM-NEXT: sw a5, 36(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp)
; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp)
; ILP32-ILP32F-FPELIM-NEXT: addi a1, sp, 24
; ILP32-ILP32F-FPELIM-NEXT: sw a1, 12(sp)
; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 48
@@ -78,13 +78,13 @@ define i32 @va1(ptr %fmt, ...) {
; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 16
; ILP32-ILP32F-WITHFP-NEXT: .cfi_def_cfa s0, 32
; ILP32-ILP32F-WITHFP-NEXT: mv a0, a1
-; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0)
; ILP32-ILP32F-WITHFP-NEXT: addi a1, s0, 8
; ILP32-ILP32F-WITHFP-NEXT: sw a1, -12(s0)
; ILP32-ILP32F-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -97,13 +97,13 @@ define i32 @va1(ptr %fmt, ...) {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -48
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: .cfi_def_cfa_offset 48
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: mv a0, a1
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, sp, 24
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 12(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48
@@ -115,10 +115,10 @@ define i32 @va1(ptr %fmt, ...) {
; ILP32E-FPELIM-NEXT: .cfi_def_cfa_offset 28
; ILP32E-FPELIM-NEXT: mv a0, a1
; ILP32E-FPELIM-NEXT: sw a5, 24(sp)
-; ILP32E-FPELIM-NEXT: sw a4, 20(sp)
-; ILP32E-FPELIM-NEXT: sw a3, 16(sp)
-; ILP32E-FPELIM-NEXT: sw a2, 12(sp)
; ILP32E-FPELIM-NEXT: sw a1, 8(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 12(sp)
+; ILP32E-FPELIM-NEXT: sw a3, 16(sp)
+; ILP32E-FPELIM-NEXT: sw a4, 20(sp)
; ILP32E-FPELIM-NEXT: addi a1, sp, 12
; ILP32E-FPELIM-NEXT: sw a1, 0(sp)
; ILP32E-FPELIM-NEXT: addi sp, sp, 28
@@ -136,10 +136,10 @@ define i32 @va1(ptr %fmt, ...) {
; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 24
; ILP32E-WITHFP-NEXT: mv a0, a1
; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
-; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
-; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
-; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
; ILP32E-WITHFP-NEXT: addi a1, s0, 8
; ILP32E-WITHFP-NEXT: sw a1, -12(s0)
; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload
@@ -152,15 +152,15 @@ define i32 @va1(ptr %fmt, ...) {
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80
; LP64-LP64F-LP64D-FPELIM-NEXT: .cfi_def_cfa_offset 80
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, sp, 28
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: lw a0, 24(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80
; LP64-LP64F-LP64D-FPELIM-NEXT: ret
;
@@ -175,15 +175,15 @@ define i32 @va1(ptr %fmt, ...) {
; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32
; LP64-LP64F-LP64D-WITHFP-NEXT: .cfi_def_cfa s0, 64
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 12
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -24(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: lw a0, 8(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 96
@@ -193,14 +193,14 @@ define i32 @va1(ptr %fmt, ...) {
; LP64E-FPELIM: # %bb.0:
; LP64E-FPELIM-NEXT: addi sp, sp, -56
; LP64E-FPELIM-NEXT: .cfi_def_cfa_offset 56
-; LP64E-FPELIM-NEXT: sd a1, 16(sp)
; LP64E-FPELIM-NEXT: addi a0, sp, 20
; LP64E-FPELIM-NEXT: sd a0, 0(sp)
+; LP64E-FPELIM-NEXT: sd a1, 16(sp)
; LP64E-FPELIM-NEXT: lw a0, 16(sp)
; LP64E-FPELIM-NEXT: sd a5, 48(sp)
-; LP64E-FPELIM-NEXT: sd a4, 40(sp)
-; LP64E-FPELIM-NEXT: sd a3, 32(sp)
; LP64E-FPELIM-NEXT: sd a2, 24(sp)
+; LP64E-FPELIM-NEXT: sd a3, 32(sp)
+; LP64E-FPELIM-NEXT: sd a4, 40(sp)
; LP64E-FPELIM-NEXT: addi sp, sp, 56
; LP64E-FPELIM-NEXT: ret
;
@@ -214,14 +214,14 @@ define i32 @va1(ptr %fmt, ...) {
; LP64E-WITHFP-NEXT: .cfi_offset s0, -64
; LP64E-WITHFP-NEXT: addi s0, sp, 24
; LP64E-WITHFP-NEXT: .cfi_def_cfa s0, 48
-; LP64E-WITHFP-NEXT: sd a1, 8(s0)
; LP64E-WITHFP-NEXT: addi a0, s0, 12
; LP64E-WITHFP-NEXT: sd a0, -24(s0)
+; LP64E-WITHFP-NEXT: sd a1, 8(s0)
; LP64E-WITHFP-NEXT: lw a0, 8(s0)
; LP64E-WITHFP-NEXT: sd a5, 40(s0)
-; LP64E-WITHFP-NEXT: sd a4, 32(s0)
-; LP64E-WITHFP-NEXT: sd a3, 24(s0)
; LP64E-WITHFP-NEXT: sd a2, 16(s0)
+; LP64E-WITHFP-NEXT: sd a3, 24(s0)
+; LP64E-WITHFP-NEXT: sd a4, 32(s0)
; LP64E-WITHFP-NEXT: ld ra, 16(sp) # 8-byte Folded Reload
; LP64E-WITHFP-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; LP64E-WITHFP-NEXT: addi sp, sp, 72
@@ -241,13 +241,13 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind {
; ILP32-ILP32F-FPELIM: # %bb.0:
; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -48
; ILP32-ILP32F-FPELIM-NEXT: mv a0, a1
-; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp)
; ILP32-ILP32F-FPELIM-NEXT: sw a5, 36(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp)
; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp)
; ILP32-ILP32F-FPELIM-NEXT: addi a1, sp, 24
; ILP32-ILP32F-FPELIM-NEXT: sw a1, 12(sp)
; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 48
@@ -260,13 +260,13 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind {
; ILP32-ILP32F-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 16
; ILP32-ILP32F-WITHFP-NEXT: mv a0, a1
-; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0)
; ILP32-ILP32F-WITHFP-NEXT: addi a1, s0, 8
; ILP32-ILP32F-WITHFP-NEXT: sw a1, -12(s0)
; ILP32-ILP32F-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -278,13 +278,13 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM: # %bb.0:
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -48
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: mv a0, a1
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, sp, 24
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 12(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48
@@ -295,10 +295,10 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind {
; ILP32E-FPELIM-NEXT: addi sp, sp, -28
; ILP32E-FPELIM-NEXT: mv a0, a1
; ILP32E-FPELIM-NEXT: sw a5, 24(sp)
-; ILP32E-FPELIM-NEXT: sw a4, 20(sp)
-; ILP32E-FPELIM-NEXT: sw a3, 16(sp)
-; ILP32E-FPELIM-NEXT: sw a2, 12(sp)
; ILP32E-FPELIM-NEXT: sw a1, 8(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 12(sp)
+; ILP32E-FPELIM-NEXT: sw a3, 16(sp)
+; ILP32E-FPELIM-NEXT: sw a4, 20(sp)
; ILP32E-FPELIM-NEXT: addi a1, sp, 12
; ILP32E-FPELIM-NEXT: sw a1, 0(sp)
; ILP32E-FPELIM-NEXT: addi sp, sp, 28
@@ -312,10 +312,10 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind {
; ILP32E-WITHFP-NEXT: addi s0, sp, 12
; ILP32E-WITHFP-NEXT: mv a0, a1
; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
-; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
-; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
-; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
; ILP32E-WITHFP-NEXT: addi a1, s0, 8
; ILP32E-WITHFP-NEXT: sw a1, -12(s0)
; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload
@@ -327,13 +327,13 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind {
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80
; LP64-LP64F-LP64D-FPELIM-NEXT: mv a0, a1
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, sp, 32
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80
@@ -346,13 +346,13 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32
; LP64-LP64F-LP64D-WITHFP-NEXT: mv a0, a1
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, s0, 16
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -24(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -365,10 +365,10 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind {
; LP64E-FPELIM-NEXT: addi sp, sp, -56
; LP64E-FPELIM-NEXT: mv a0, a1
; LP64E-FPELIM-NEXT: sd a5, 48(sp)
-; LP64E-FPELIM-NEXT: sd a4, 40(sp)
-; LP64E-FPELIM-NEXT: sd a3, 32(sp)
-; LP64E-FPELIM-NEXT: sd a2, 24(sp)
; LP64E-FPELIM-NEXT: sd a1, 16(sp)
+; LP64E-FPELIM-NEXT: sd a2, 24(sp)
+; LP64E-FPELIM-NEXT: sd a3, 32(sp)
+; LP64E-FPELIM-NEXT: sd a4, 40(sp)
; LP64E-FPELIM-NEXT: addi a1, sp, 24
; LP64E-FPELIM-NEXT: sd a1, 0(sp)
; LP64E-FPELIM-NEXT: addi sp, sp, 56
@@ -382,10 +382,10 @@ define i32 @va1_va_arg(ptr %fmt, ...) nounwind {
; LP64E-WITHFP-NEXT: addi s0, sp, 24
; LP64E-WITHFP-NEXT: mv a0, a1
; LP64E-WITHFP-NEXT: sd a5, 40(s0)
-; LP64E-WITHFP-NEXT: sd a4, 32(s0)
-; LP64E-WITHFP-NEXT: sd a3, 24(s0)
-; LP64E-WITHFP-NEXT: sd a2, 16(s0)
; LP64E-WITHFP-NEXT: sd a1, 8(s0)
+; LP64E-WITHFP-NEXT: sd a2, 16(s0)
+; LP64E-WITHFP-NEXT: sd a3, 24(s0)
+; LP64E-WITHFP-NEXT: sd a4, 32(s0)
; LP64E-WITHFP-NEXT: addi a1, s0, 16
; LP64E-WITHFP-NEXT: sd a1, -24(s0)
; LP64E-WITHFP-NEXT: ld ra, 16(sp) # 8-byte Folded Reload
@@ -410,13 +410,13 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind {
; ILP32-ILP32F-FPELIM-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; ILP32-ILP32F-FPELIM-NEXT: addi s0, sp, 16
; ILP32-ILP32F-FPELIM-NEXT: mv s1, a1
-; ILP32-ILP32F-FPELIM-NEXT: sw a7, 28(s0)
-; ILP32-ILP32F-FPELIM-NEXT: sw a6, 24(s0)
; ILP32-ILP32F-FPELIM-NEXT: sw a5, 20(s0)
-; ILP32-ILP32F-FPELIM-NEXT: sw a4, 16(s0)
-; ILP32-ILP32F-FPELIM-NEXT: sw a3, 12(s0)
-; ILP32-ILP32F-FPELIM-NEXT: sw a2, 8(s0)
+; ILP32-ILP32F-FPELIM-NEXT: sw a6, 24(s0)
+; ILP32-ILP32F-FPELIM-NEXT: sw a7, 28(s0)
; ILP32-ILP32F-FPELIM-NEXT: sw a1, 4(s0)
+; ILP32-ILP32F-FPELIM-NEXT: sw a2, 8(s0)
+; ILP32-ILP32F-FPELIM-NEXT: sw a3, 12(s0)
+; ILP32-ILP32F-FPELIM-NEXT: sw a4, 16(s0)
; ILP32-ILP32F-FPELIM-NEXT: addi a0, s0, 8
; ILP32-ILP32F-FPELIM-NEXT: sw a0, -16(s0)
; ILP32-ILP32F-FPELIM-NEXT: addi a0, a1, 15
@@ -440,13 +440,13 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind {
; ILP32-ILP32F-WITHFP-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 16
; ILP32-ILP32F-WITHFP-NEXT: mv s1, a1
-; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0)
; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 8
; ILP32-ILP32F-WITHFP-NEXT: sw a0, -16(s0)
; ILP32-ILP32F-WITHFP-NEXT: addi a0, a1, 15
@@ -470,13 +470,13 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi s0, sp, 16
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: mv s1, a1
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 28(s0)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 24(s0)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 20(s0)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 16(s0)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 12(s0)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 8(s0)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 24(s0)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 28(s0)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 4(s0)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 8(s0)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 12(s0)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 16(s0)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, s0, 8
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, -16(s0)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a1, 15
@@ -501,10 +501,10 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind {
; ILP32E-FPELIM-NEXT: addi s0, sp, 16
; ILP32E-FPELIM-NEXT: mv s1, a1
; ILP32E-FPELIM-NEXT: sw a5, 20(s0)
-; ILP32E-FPELIM-NEXT: sw a4, 16(s0)
-; ILP32E-FPELIM-NEXT: sw a3, 12(s0)
-; ILP32E-FPELIM-NEXT: sw a2, 8(s0)
; ILP32E-FPELIM-NEXT: sw a1, 4(s0)
+; ILP32E-FPELIM-NEXT: sw a2, 8(s0)
+; ILP32E-FPELIM-NEXT: sw a3, 12(s0)
+; ILP32E-FPELIM-NEXT: sw a4, 16(s0)
; ILP32E-FPELIM-NEXT: addi a0, s0, 8
; ILP32E-FPELIM-NEXT: sw a0, -16(s0)
; ILP32E-FPELIM-NEXT: addi a0, a1, 3
@@ -529,10 +529,10 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind {
; ILP32E-WITHFP-NEXT: addi s0, sp, 16
; ILP32E-WITHFP-NEXT: mv s1, a1
; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
-; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
-; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
-; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
; ILP32E-WITHFP-NEXT: addi a0, s0, 8
; ILP32E-WITHFP-NEXT: sw a0, -16(s0)
; ILP32E-WITHFP-NEXT: addi a0, a1, 3
@@ -556,13 +556,13 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind {
; LP64-LP64F-LP64D-FPELIM-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; LP64-LP64F-LP64D-FPELIM-NEXT: addi s0, sp, 32
; LP64-LP64F-LP64D-FPELIM-NEXT: mv s1, a1
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 56(s0)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 48(s0)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 40(s0)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(s0)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(s0)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 16(s0)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 48(s0)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 56(s0)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(s0)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 16(s0)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(s0)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(s0)
; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, s0, 16
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, -32(s0)
; LP64-LP64F-LP64D-FPELIM-NEXT: slli a0, a1, 32
@@ -588,13 +588,13 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32
; LP64-LP64F-LP64D-WITHFP-NEXT: mv s1, a1
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 16
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -32(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: slli a0, a1, 32
@@ -621,10 +621,10 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind {
; LP64E-FPELIM-NEXT: addi s0, sp, 32
; LP64E-FPELIM-NEXT: mv s1, a1
; LP64E-FPELIM-NEXT: sd a5, 40(s0)
-; LP64E-FPELIM-NEXT: sd a4, 32(s0)
-; LP64E-FPELIM-NEXT: sd a3, 24(s0)
-; LP64E-FPELIM-NEXT: sd a2, 16(s0)
; LP64E-FPELIM-NEXT: sd a1, 8(s0)
+; LP64E-FPELIM-NEXT: sd a2, 16(s0)
+; LP64E-FPELIM-NEXT: sd a3, 24(s0)
+; LP64E-FPELIM-NEXT: sd a4, 32(s0)
; LP64E-FPELIM-NEXT: addi a0, s0, 16
; LP64E-FPELIM-NEXT: sd a0, -32(s0)
; LP64E-FPELIM-NEXT: slli a0, a1, 32
@@ -651,10 +651,10 @@ define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind {
; LP64E-WITHFP-NEXT: addi s0, sp, 32
; LP64E-WITHFP-NEXT: mv s1, a1
; LP64E-WITHFP-NEXT: sd a5, 40(s0)
-; LP64E-WITHFP-NEXT: sd a4, 32(s0)
-; LP64E-WITHFP-NEXT: sd a3, 24(s0)
-; LP64E-WITHFP-NEXT: sd a2, 16(s0)
; LP64E-WITHFP-NEXT: sd a1, 8(s0)
+; LP64E-WITHFP-NEXT: sd a2, 16(s0)
+; LP64E-WITHFP-NEXT: sd a3, 24(s0)
+; LP64E-WITHFP-NEXT: sd a4, 32(s0)
; LP64E-WITHFP-NEXT: addi a0, s0, 16
; LP64E-WITHFP-NEXT: sd a0, -32(s0)
; LP64E-WITHFP-NEXT: slli a0, a1, 32
@@ -812,13 +812,13 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; ILP32-ILP32F-FPELIM-LABEL: va2:
; ILP32-ILP32F-FPELIM: # %bb.0:
; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -48
-; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp)
; ILP32-ILP32F-FPELIM-NEXT: sw a5, 36(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp)
; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp)
; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 20
; ILP32-ILP32F-FPELIM-NEXT: sw a0, 12(sp)
; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 27
@@ -836,13 +836,13 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; ILP32-ILP32F-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; ILP32-ILP32F-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 16
-; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0)
; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 4
; ILP32-ILP32F-WITHFP-NEXT: sw a0, -12(s0)
; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 11
@@ -859,13 +859,13 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-LABEL: va2:
; RV32D-ILP32-ILP32F-ILP32D-FPELIM: # %bb.0:
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -48
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 20
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 12(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 27
@@ -881,10 +881,10 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; ILP32E-FPELIM: # %bb.0:
; ILP32E-FPELIM-NEXT: addi sp, sp, -28
; ILP32E-FPELIM-NEXT: sw a5, 24(sp)
-; ILP32E-FPELIM-NEXT: sw a4, 20(sp)
-; ILP32E-FPELIM-NEXT: sw a3, 16(sp)
-; ILP32E-FPELIM-NEXT: sw a2, 12(sp)
; ILP32E-FPELIM-NEXT: sw a1, 8(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 12(sp)
+; ILP32E-FPELIM-NEXT: sw a3, 16(sp)
+; ILP32E-FPELIM-NEXT: sw a4, 20(sp)
; ILP32E-FPELIM-NEXT: addi a0, sp, 8
; ILP32E-FPELIM-NEXT: sw a0, 0(sp)
; ILP32E-FPELIM-NEXT: addi a0, sp, 15
@@ -903,10 +903,10 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill
; ILP32E-WITHFP-NEXT: addi s0, sp, 12
; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
-; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
-; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
-; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
; ILP32E-WITHFP-NEXT: addi a0, s0, 4
; ILP32E-WITHFP-NEXT: sw a0, -12(s0)
; ILP32E-WITHFP-NEXT: addi a0, s0, 11
@@ -924,13 +924,13 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80
; LP64-LP64F-LP64D-FPELIM-NEXT: mv a0, a1
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, sp, 39
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80
@@ -943,13 +943,13 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32
; LP64-LP64F-LP64D-WITHFP-NEXT: mv a0, a1
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, s0, 23
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -24(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -962,10 +962,10 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; LP64E-FPELIM-NEXT: addi sp, sp, -56
; LP64E-FPELIM-NEXT: mv a0, a1
; LP64E-FPELIM-NEXT: sd a5, 48(sp)
-; LP64E-FPELIM-NEXT: sd a4, 40(sp)
-; LP64E-FPELIM-NEXT: sd a3, 32(sp)
-; LP64E-FPELIM-NEXT: sd a2, 24(sp)
; LP64E-FPELIM-NEXT: sd a1, 16(sp)
+; LP64E-FPELIM-NEXT: sd a2, 24(sp)
+; LP64E-FPELIM-NEXT: sd a3, 32(sp)
+; LP64E-FPELIM-NEXT: sd a4, 40(sp)
; LP64E-FPELIM-NEXT: addi a1, sp, 31
; LP64E-FPELIM-NEXT: sd a1, 0(sp)
; LP64E-FPELIM-NEXT: addi sp, sp, 56
@@ -979,10 +979,10 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; LP64E-WITHFP-NEXT: addi s0, sp, 24
; LP64E-WITHFP-NEXT: mv a0, a1
; LP64E-WITHFP-NEXT: sd a5, 40(s0)
-; LP64E-WITHFP-NEXT: sd a4, 32(s0)
-; LP64E-WITHFP-NEXT: sd a3, 24(s0)
-; LP64E-WITHFP-NEXT: sd a2, 16(s0)
; LP64E-WITHFP-NEXT: sd a1, 8(s0)
+; LP64E-WITHFP-NEXT: sd a2, 16(s0)
+; LP64E-WITHFP-NEXT: sd a3, 24(s0)
+; LP64E-WITHFP-NEXT: sd a4, 32(s0)
; LP64E-WITHFP-NEXT: addi a1, s0, 23
; LP64E-WITHFP-NEXT: sd a1, -24(s0)
; LP64E-WITHFP-NEXT: ld ra, 16(sp) # 8-byte Folded Reload
@@ -1009,13 +1009,13 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind {
; ILP32-ILP32F-FPELIM-LABEL: va2_va_arg:
; ILP32-ILP32F-FPELIM: # %bb.0:
; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -48
-; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp)
; ILP32-ILP32F-FPELIM-NEXT: sw a5, 36(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp)
; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp)
; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 27
; ILP32-ILP32F-FPELIM-NEXT: andi a1, a0, -8
; ILP32-ILP32F-FPELIM-NEXT: addi a0, a1, 4
@@ -1033,13 +1033,13 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind {
; ILP32-ILP32F-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; ILP32-ILP32F-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 16
-; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0)
; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 11
; ILP32-ILP32F-WITHFP-NEXT: andi a1, a0, -8
; ILP32-ILP32F-WITHFP-NEXT: addi a0, a1, 4
@@ -1056,13 +1056,13 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-LABEL: va2_va_arg:
; RV32D-ILP32-ILP32F-ILP32D-FPELIM: # %bb.0:
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -48
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 27
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: andi a0, a0, -8
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, a0, 8
@@ -1078,10 +1078,10 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind {
; ILP32E-FPELIM: # %bb.0:
; ILP32E-FPELIM-NEXT: addi sp, sp, -28
; ILP32E-FPELIM-NEXT: sw a5, 24(sp)
-; ILP32E-FPELIM-NEXT: sw a4, 20(sp)
-; ILP32E-FPELIM-NEXT: sw a3, 16(sp)
-; ILP32E-FPELIM-NEXT: sw a2, 12(sp)
; ILP32E-FPELIM-NEXT: sw a1, 8(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 12(sp)
+; ILP32E-FPELIM-NEXT: sw a3, 16(sp)
+; ILP32E-FPELIM-NEXT: sw a4, 20(sp)
; ILP32E-FPELIM-NEXT: addi a0, sp, 15
; ILP32E-FPELIM-NEXT: andi a1, a0, -8
; ILP32E-FPELIM-NEXT: addi a0, a1, 4
@@ -1100,10 +1100,10 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind {
; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill
; ILP32E-WITHFP-NEXT: addi s0, sp, 12
; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
-; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
-; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
-; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
; ILP32E-WITHFP-NEXT: addi a0, s0, 11
; ILP32E-WITHFP-NEXT: andi a1, a0, -8
; ILP32E-WITHFP-NEXT: addi a0, a1, 4
@@ -1121,13 +1121,13 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind {
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80
; LP64-LP64F-LP64D-FPELIM-NEXT: mv a0, a1
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, sp, 32
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80
@@ -1140,13 +1140,13 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32
; LP64-LP64F-LP64D-WITHFP-NEXT: mv a0, a1
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, s0, 16
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -24(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -1159,10 +1159,10 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind {
; LP64E-FPELIM-NEXT: addi sp, sp, -56
; LP64E-FPELIM-NEXT: mv a0, a1
; LP64E-FPELIM-NEXT: sd a5, 48(sp)
-; LP64E-FPELIM-NEXT: sd a4, 40(sp)
-; LP64E-FPELIM-NEXT: sd a3, 32(sp)
-; LP64E-FPELIM-NEXT: sd a2, 24(sp)
; LP64E-FPELIM-NEXT: sd a1, 16(sp)
+; LP64E-FPELIM-NEXT: sd a2, 24(sp)
+; LP64E-FPELIM-NEXT: sd a3, 32(sp)
+; LP64E-FPELIM-NEXT: sd a4, 40(sp)
; LP64E-FPELIM-NEXT: addi a1, sp, 24
; LP64E-FPELIM-NEXT: sd a1, 0(sp)
; LP64E-FPELIM-NEXT: addi sp, sp, 56
@@ -1176,10 +1176,10 @@ define i64 @va2_va_arg(ptr %fmt, ...) nounwind {
; LP64E-WITHFP-NEXT: addi s0, sp, 24
; LP64E-WITHFP-NEXT: mv a0, a1
; LP64E-WITHFP-NEXT: sd a5, 40(s0)
-; LP64E-WITHFP-NEXT: sd a4, 32(s0)
-; LP64E-WITHFP-NEXT: sd a3, 24(s0)
-; LP64E-WITHFP-NEXT: sd a2, 16(s0)
; LP64E-WITHFP-NEXT: sd a1, 8(s0)
+; LP64E-WITHFP-NEXT: sd a2, 16(s0)
+; LP64E-WITHFP-NEXT: sd a3, 24(s0)
+; LP64E-WITHFP-NEXT: sd a4, 32(s0)
; LP64E-WITHFP-NEXT: addi a1, s0, 16
; LP64E-WITHFP-NEXT: sd a1, -24(s0)
; LP64E-WITHFP-NEXT: ld ra, 16(sp) # 8-byte Folded Reload
@@ -1317,10 +1317,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; ILP32-ILP32F-FPELIM: # %bb.0:
; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -32
; ILP32-ILP32F-FPELIM-NEXT: sw a7, 28(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a6, 24(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a5, 20(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a4, 16(sp)
; ILP32-ILP32F-FPELIM-NEXT: sw a3, 12(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a4, 16(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a5, 20(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a6, 24(sp)
; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 12
; ILP32-ILP32F-FPELIM-NEXT: sw a0, 4(sp)
; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 19
@@ -1343,10 +1343,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; ILP32-ILP32F-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill
; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 24
; ILP32-ILP32F-WITHFP-NEXT: sw a7, 20(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a6, 16(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a5, 12(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a4, 8(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a3, 4(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a4, 8(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a5, 12(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a6, 16(s0)
; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 4
; ILP32-ILP32F-WITHFP-NEXT: sw a0, -12(s0)
; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 11
@@ -1368,10 +1368,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM: # %bb.0:
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -32
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 28(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 24(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 20(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 16(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 12(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 16(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 20(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 24(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 12
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 4(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 19
@@ -1390,9 +1390,9 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; ILP32E-FPELIM-LABEL: va3:
; ILP32E-FPELIM: # %bb.0:
; ILP32E-FPELIM-NEXT: addi sp, sp, -20
-; ILP32E-FPELIM-NEXT: sw a5, 16(sp)
-; ILP32E-FPELIM-NEXT: sw a4, 12(sp)
; ILP32E-FPELIM-NEXT: sw a3, 8(sp)
+; ILP32E-FPELIM-NEXT: sw a4, 12(sp)
+; ILP32E-FPELIM-NEXT: sw a5, 16(sp)
; ILP32E-FPELIM-NEXT: addi a0, sp, 8
; ILP32E-FPELIM-NEXT: sw a0, 0(sp)
; ILP32E-FPELIM-NEXT: addi a0, sp, 15
@@ -1414,9 +1414,9 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill
; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill
; ILP32E-WITHFP-NEXT: addi s0, sp, 12
-; ILP32E-WITHFP-NEXT: sw a5, 12(s0)
-; ILP32E-WITHFP-NEXT: sw a4, 8(s0)
; ILP32E-WITHFP-NEXT: sw a3, 4(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a5, 12(s0)
; ILP32E-WITHFP-NEXT: addi a0, s0, 4
; ILP32E-WITHFP-NEXT: sw a0, -12(s0)
; ILP32E-WITHFP-NEXT: addi a0, s0, 11
@@ -1437,12 +1437,12 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; LP64-LP64F-LP64D-FPELIM-LABEL: va3:
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 56(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 48(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 40(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 56(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 16(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 40(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: addi a3, sp, 31
; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, a1, a2
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 8(sp)
@@ -1455,12 +1455,12 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 40(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 32(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 24(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 16(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 8(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 40(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 0(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 8(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 16(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 24(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a3, s0, 15
; LP64-LP64F-LP64D-WITHFP-NEXT: add a0, a1, a2
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, -24(s0)
@@ -1472,10 +1472,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; LP64E-FPELIM-LABEL: va3:
; LP64E-FPELIM: # %bb.0:
; LP64E-FPELIM-NEXT: addi sp, sp, -40
-; LP64E-FPELIM-NEXT: sd a5, 32(sp)
-; LP64E-FPELIM-NEXT: sd a4, 24(sp)
-; LP64E-FPELIM-NEXT: sd a3, 16(sp)
; LP64E-FPELIM-NEXT: sd a2, 8(sp)
+; LP64E-FPELIM-NEXT: sd a3, 16(sp)
+; LP64E-FPELIM-NEXT: sd a4, 24(sp)
+; LP64E-FPELIM-NEXT: sd a5, 32(sp)
; LP64E-FPELIM-NEXT: addi a3, sp, 23
; LP64E-FPELIM-NEXT: add a0, a1, a2
; LP64E-FPELIM-NEXT: sd a3, 0(sp)
@@ -1488,10 +1488,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; LP64E-WITHFP-NEXT: sd ra, 16(sp) # 8-byte Folded Spill
; LP64E-WITHFP-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
; LP64E-WITHFP-NEXT: addi s0, sp, 24
-; LP64E-WITHFP-NEXT: sd a5, 24(s0)
-; LP64E-WITHFP-NEXT: sd a4, 16(s0)
-; LP64E-WITHFP-NEXT: sd a3, 8(s0)
; LP64E-WITHFP-NEXT: sd a2, 0(s0)
+; LP64E-WITHFP-NEXT: sd a3, 8(s0)
+; LP64E-WITHFP-NEXT: sd a4, 16(s0)
+; LP64E-WITHFP-NEXT: sd a5, 24(s0)
; LP64E-WITHFP-NEXT: addi a3, s0, 15
; LP64E-WITHFP-NEXT: add a0, a1, a2
; LP64E-WITHFP-NEXT: sd a3, -24(s0)
@@ -1521,10 +1521,10 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind {
; ILP32-ILP32F-FPELIM: # %bb.0:
; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -32
; ILP32-ILP32F-FPELIM-NEXT: sw a7, 28(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a6, 24(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a5, 20(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a4, 16(sp)
; ILP32-ILP32F-FPELIM-NEXT: sw a3, 12(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a4, 16(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a5, 20(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a6, 24(sp)
; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 19
; ILP32-ILP32F-FPELIM-NEXT: andi a0, a0, -8
; ILP32-ILP32F-FPELIM-NEXT: addi a3, a0, 4
@@ -1547,10 +1547,10 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind {
; ILP32-ILP32F-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill
; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 24
; ILP32-ILP32F-WITHFP-NEXT: sw a7, 20(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a6, 16(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a5, 12(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a4, 8(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a3, 4(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a4, 8(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a5, 12(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a6, 16(s0)
; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 11
; ILP32-ILP32F-WITHFP-NEXT: andi a0, a0, -8
; ILP32-ILP32F-WITHFP-NEXT: addi a3, a0, 4
@@ -1572,10 +1572,10 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM: # %bb.0:
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -48
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 35
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: andi a0, a0, -8
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a3, a0, 8
@@ -1594,9 +1594,9 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind {
; ILP32E-FPELIM-LABEL: va3_va_arg:
; ILP32E-FPELIM: # %bb.0:
; ILP32E-FPELIM-NEXT: addi sp, sp, -20
-; ILP32E-FPELIM-NEXT: sw a5, 16(sp)
-; ILP32E-FPELIM-NEXT: sw a4, 12(sp)
; ILP32E-FPELIM-NEXT: sw a3, 8(sp)
+; ILP32E-FPELIM-NEXT: sw a4, 12(sp)
+; ILP32E-FPELIM-NEXT: sw a5, 16(sp)
; ILP32E-FPELIM-NEXT: addi a0, sp, 15
; ILP32E-FPELIM-NEXT: andi a0, a0, -8
; ILP32E-FPELIM-NEXT: addi a3, a0, 4
@@ -1618,9 +1618,9 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind {
; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill
; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill
; ILP32E-WITHFP-NEXT: addi s0, sp, 12
-; ILP32E-WITHFP-NEXT: sw a5, 12(s0)
-; ILP32E-WITHFP-NEXT: sw a4, 8(s0)
; ILP32E-WITHFP-NEXT: sw a3, 4(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a5, 12(s0)
; ILP32E-WITHFP-NEXT: addi a0, s0, 11
; ILP32E-WITHFP-NEXT: andi a0, a0, -8
; ILP32E-WITHFP-NEXT: addi a3, a0, 4
@@ -1641,12 +1641,12 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind {
; LP64-LP64F-LP64D-FPELIM-LABEL: va3_va_arg:
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -64
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 56(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 48(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 40(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 56(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 16(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 40(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: addi a3, sp, 24
; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, a1, a2
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 8(sp)
@@ -1659,12 +1659,12 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 40(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 32(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 24(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 16(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 8(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 40(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 0(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 8(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 16(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 24(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a3, s0, 8
; LP64-LP64F-LP64D-WITHFP-NEXT: add a0, a1, a2
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, -24(s0)
@@ -1676,10 +1676,10 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind {
; LP64E-FPELIM-LABEL: va3_va_arg:
; LP64E-FPELIM: # %bb.0:
; LP64E-FPELIM-NEXT: addi sp, sp, -40
-; LP64E-FPELIM-NEXT: sd a5, 32(sp)
-; LP64E-FPELIM-NEXT: sd a4, 24(sp)
-; LP64E-FPELIM-NEXT: sd a3, 16(sp)
; LP64E-FPELIM-NEXT: sd a2, 8(sp)
+; LP64E-FPELIM-NEXT: sd a3, 16(sp)
+; LP64E-FPELIM-NEXT: sd a4, 24(sp)
+; LP64E-FPELIM-NEXT: sd a5, 32(sp)
; LP64E-FPELIM-NEXT: addi a3, sp, 16
; LP64E-FPELIM-NEXT: add a0, a1, a2
; LP64E-FPELIM-NEXT: sd a3, 0(sp)
@@ -1692,10 +1692,10 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind {
; LP64E-WITHFP-NEXT: sd ra, 16(sp) # 8-byte Folded Spill
; LP64E-WITHFP-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
; LP64E-WITHFP-NEXT: addi s0, sp, 24
-; LP64E-WITHFP-NEXT: sd a5, 24(s0)
-; LP64E-WITHFP-NEXT: sd a4, 16(s0)
-; LP64E-WITHFP-NEXT: sd a3, 8(s0)
; LP64E-WITHFP-NEXT: sd a2, 0(s0)
+; LP64E-WITHFP-NEXT: sd a3, 8(s0)
+; LP64E-WITHFP-NEXT: sd a4, 16(s0)
+; LP64E-WITHFP-NEXT: sd a5, 24(s0)
; LP64E-WITHFP-NEXT: addi a3, s0, 8
; LP64E-WITHFP-NEXT: add a0, a1, a2
; LP64E-WITHFP-NEXT: sd a3, -24(s0)
@@ -1859,13 +1859,13 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind {
; ILP32-ILP32F-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; ILP32-ILP32F-FPELIM-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; ILP32-ILP32F-FPELIM-NEXT: mv s0, a1
-; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp)
; ILP32-ILP32F-FPELIM-NEXT: sw a5, 36(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp)
; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp)
; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 24
; ILP32-ILP32F-FPELIM-NEXT: sw a0, 4(sp)
; ILP32-ILP32F-FPELIM-NEXT: sw a0, 0(sp)
@@ -1902,13 +1902,13 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind {
; ILP32-ILP32F-WITHFP-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 32
; ILP32-ILP32F-WITHFP-NEXT: mv s1, a1
-; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0)
; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 8
; ILP32-ILP32F-WITHFP-NEXT: sw a0, -16(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a0, -20(s0)
@@ -1944,13 +1944,13 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: mv s0, a1
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 24
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 4(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 0(sp)
@@ -1986,10 +1986,10 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind {
; ILP32E-FPELIM-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; ILP32E-FPELIM-NEXT: mv s0, a1
; ILP32E-FPELIM-NEXT: sw a5, 36(sp)
-; ILP32E-FPELIM-NEXT: sw a4, 32(sp)
-; ILP32E-FPELIM-NEXT: sw a3, 28(sp)
-; ILP32E-FPELIM-NEXT: sw a2, 24(sp)
; ILP32E-FPELIM-NEXT: sw a1, 20(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 24(sp)
+; ILP32E-FPELIM-NEXT: sw a3, 28(sp)
+; ILP32E-FPELIM-NEXT: sw a4, 32(sp)
; ILP32E-FPELIM-NEXT: addi a0, sp, 24
; ILP32E-FPELIM-NEXT: sw a0, 4(sp)
; ILP32E-FPELIM-NEXT: sw a0, 0(sp)
@@ -2027,10 +2027,10 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind {
; ILP32E-WITHFP-NEXT: addi s0, sp, 20
; ILP32E-WITHFP-NEXT: mv s1, a1
; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
-; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
-; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
-; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
; ILP32E-WITHFP-NEXT: addi a0, s0, 8
; ILP32E-WITHFP-NEXT: sw a0, -16(s0)
; ILP32E-WITHFP-NEXT: sw a0, -20(s0)
@@ -2066,13 +2066,13 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind {
; LP64-LP64F-LP64D-FPELIM-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; LP64-LP64F-LP64D-FPELIM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; LP64-LP64F-LP64D-FPELIM-NEXT: mv s0, a1
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 88(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 80(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 72(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 64(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 56(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 48(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 80(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 88(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 40(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 48(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 56(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 64(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, sp, 48
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 0(sp)
@@ -2109,13 +2109,13 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 48
; LP64-LP64F-LP64D-WITHFP-NEXT: mv s1, a1
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 16
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -32(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -40(s0)
@@ -2152,10 +2152,10 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind {
; LP64E-FPELIM-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; LP64E-FPELIM-NEXT: mv s0, a1
; LP64E-FPELIM-NEXT: sd a5, 72(sp)
-; LP64E-FPELIM-NEXT: sd a4, 64(sp)
-; LP64E-FPELIM-NEXT: sd a3, 56(sp)
-; LP64E-FPELIM-NEXT: sd a2, 48(sp)
; LP64E-FPELIM-NEXT: sd a1, 40(sp)
+; LP64E-FPELIM-NEXT: sd a2, 48(sp)
+; LP64E-FPELIM-NEXT: sd a3, 56(sp)
+; LP64E-FPELIM-NEXT: sd a4, 64(sp)
; LP64E-FPELIM-NEXT: addi a0, sp, 48
; LP64E-FPELIM-NEXT: sd a0, 8(sp)
; LP64E-FPELIM-NEXT: sd a0, 0(sp)
@@ -2193,10 +2193,10 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind {
; LP64E-WITHFP-NEXT: addi s0, sp, 40
; LP64E-WITHFP-NEXT: mv s1, a1
; LP64E-WITHFP-NEXT: sd a5, 40(s0)
-; LP64E-WITHFP-NEXT: sd a4, 32(s0)
-; LP64E-WITHFP-NEXT: sd a3, 24(s0)
-; LP64E-WITHFP-NEXT: sd a2, 16(s0)
; LP64E-WITHFP-NEXT: sd a1, 8(s0)
+; LP64E-WITHFP-NEXT: sd a2, 16(s0)
+; LP64E-WITHFP-NEXT: sd a3, 24(s0)
+; LP64E-WITHFP-NEXT: sd a4, 32(s0)
; LP64E-WITHFP-NEXT: addi a0, s0, 16
; LP64E-WITHFP-NEXT: sd a0, -32(s0)
; LP64E-WITHFP-NEXT: sd a0, -40(s0)
@@ -2255,30 +2255,27 @@ define void @va5_aligned_stack_caller() nounwind {
; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -64
; ILP32-ILP32F-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
; ILP32-ILP32F-FPELIM-NEXT: li a0, 17
+; ILP32-ILP32F-FPELIM-NEXT: li a1, 16
+; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp)
; ILP32-ILP32F-FPELIM-NEXT: sw a0, 24(sp)
-; ILP32-ILP32F-FPELIM-NEXT: li a0, 16
-; ILP32-ILP32F-FPELIM-NEXT: sw a0, 20(sp)
; ILP32-ILP32F-FPELIM-NEXT: li a0, 15
+; ILP32-ILP32F-FPELIM-NEXT: lui a1, 262236
+; ILP32-ILP32F-FPELIM-NEXT: addi a1, a1, 655
+; ILP32-ILP32F-FPELIM-NEXT: lui a2, 377487
+; ILP32-ILP32F-FPELIM-NEXT: addi a2, a2, 1475
+; ILP32-ILP32F-FPELIM-NEXT: li a3, 14
+; ILP32-ILP32F-FPELIM-NEXT: sw a3, 0(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a2, 8(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a1, 12(sp)
; ILP32-ILP32F-FPELIM-NEXT: sw a0, 16(sp)
-; ILP32-ILP32F-FPELIM-NEXT: lui a0, 262236
-; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, 655
-; ILP32-ILP32F-FPELIM-NEXT: sw a0, 12(sp)
-; ILP32-ILP32F-FPELIM-NEXT: lui a0, 377487
-; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, 1475
-; ILP32-ILP32F-FPELIM-NEXT: sw a0, 8(sp)
-; ILP32-ILP32F-FPELIM-NEXT: li a0, 14
-; ILP32-ILP32F-FPELIM-NEXT: sw a0, 0(sp)
; ILP32-ILP32F-FPELIM-NEXT: lui a0, 262153
-; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, 491
-; ILP32-ILP32F-FPELIM-NEXT: sw a0, 44(sp)
+; ILP32-ILP32F-FPELIM-NEXT: addi a5, a0, 491
; ILP32-ILP32F-FPELIM-NEXT: lui a0, 545260
-; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, -1967
-; ILP32-ILP32F-FPELIM-NEXT: sw a0, 40(sp)
+; ILP32-ILP32F-FPELIM-NEXT: addi t0, a0, -1967
; ILP32-ILP32F-FPELIM-NEXT: lui a0, 964690
-; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, -328
-; ILP32-ILP32F-FPELIM-NEXT: sw a0, 36(sp)
+; ILP32-ILP32F-FPELIM-NEXT: addi t1, a0, -328
; ILP32-ILP32F-FPELIM-NEXT: lui a0, 335544
-; ILP32-ILP32F-FPELIM-NEXT: addi a5, a0, 1311
+; ILP32-ILP32F-FPELIM-NEXT: addi t2, a0, 1311
; ILP32-ILP32F-FPELIM-NEXT: lui a0, 688509
; ILP32-ILP32F-FPELIM-NEXT: addi a6, a0, -2048
; ILP32-ILP32F-FPELIM-NEXT: li a0, 1
@@ -2287,7 +2284,10 @@ define void @va5_aligned_stack_caller() nounwind {
; ILP32-ILP32F-FPELIM-NEXT: li a3, 12
; ILP32-ILP32F-FPELIM-NEXT: li a4, 13
; ILP32-ILP32F-FPELIM-NEXT: li a7, 4
-; ILP32-ILP32F-FPELIM-NEXT: sw a5, 32(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw t2, 32(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw t1, 36(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw t0, 40(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a5, 44(sp)
; ILP32-ILP32F-FPELIM-NEXT: call va5_aligned_stack_callee
; ILP32-ILP32F-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 64
@@ -2300,30 +2300,27 @@ define void @va5_aligned_stack_caller() nounwind {
; ILP32-ILP32F-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 64
; ILP32-ILP32F-WITHFP-NEXT: li a0, 17
+; ILP32-ILP32F-WITHFP-NEXT: li a1, 16
+; ILP32-ILP32F-WITHFP-NEXT: sw a1, 20(sp)
; ILP32-ILP32F-WITHFP-NEXT: sw a0, 24(sp)
-; ILP32-ILP32F-WITHFP-NEXT: li a0, 16
-; ILP32-ILP32F-WITHFP-NEXT: sw a0, 20(sp)
; ILP32-ILP32F-WITHFP-NEXT: li a0, 15
+; ILP32-ILP32F-WITHFP-NEXT: lui a1, 262236
+; ILP32-ILP32F-WITHFP-NEXT: addi a1, a1, 655
+; ILP32-ILP32F-WITHFP-NEXT: lui a2, 377487
+; ILP32-ILP32F-WITHFP-NEXT: addi a2, a2, 1475
+; ILP32-ILP32F-WITHFP-NEXT: li a3, 14
+; ILP32-ILP32F-WITHFP-NEXT: sw a3, 0(sp)
+; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(sp)
+; ILP32-ILP32F-WITHFP-NEXT: sw a1, 12(sp)
; ILP32-ILP32F-WITHFP-NEXT: sw a0, 16(sp)
-; ILP32-ILP32F-WITHFP-NEXT: lui a0, 262236
-; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, 655
-; ILP32-ILP32F-WITHFP-NEXT: sw a0, 12(sp)
-; ILP32-ILP32F-WITHFP-NEXT: lui a0, 377487
-; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, 1475
-; ILP32-ILP32F-WITHFP-NEXT: sw a0, 8(sp)
-; ILP32-ILP32F-WITHFP-NEXT: li a0, 14
-; ILP32-ILP32F-WITHFP-NEXT: sw a0, 0(sp)
; ILP32-ILP32F-WITHFP-NEXT: lui a0, 262153
-; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, 491
-; ILP32-ILP32F-WITHFP-NEXT: sw a0, -20(s0)
+; ILP32-ILP32F-WITHFP-NEXT: addi a5, a0, 491
; ILP32-ILP32F-WITHFP-NEXT: lui a0, 545260
-; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, -1967
-; ILP32-ILP32F-WITHFP-NEXT: sw a0, -24(s0)
+; ILP32-ILP32F-WITHFP-NEXT: addi t0, a0, -1967
; ILP32-ILP32F-WITHFP-NEXT: lui a0, 964690
-; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, -328
-; ILP32-ILP32F-WITHFP-NEXT: sw a0, -28(s0)
+; ILP32-ILP32F-WITHFP-NEXT: addi t1, a0, -328
; ILP32-ILP32F-WITHFP-NEXT: lui a0, 335544
-; ILP32-ILP32F-WITHFP-NEXT: addi a5, a0, 1311
+; ILP32-ILP32F-WITHFP-NEXT: addi t2, a0, 1311
; ILP32-ILP32F-WITHFP-NEXT: lui a0, 688509
; ILP32-ILP32F-WITHFP-NEXT: addi a6, a0, -2048
; ILP32-ILP32F-WITHFP-NEXT: li a0, 1
@@ -2332,7 +2329,10 @@ define void @va5_aligned_stack_caller() nounwind {
; ILP32-ILP32F-WITHFP-NEXT: li a3, 12
; ILP32-ILP32F-WITHFP-NEXT: li a4, 13
; ILP32-ILP32F-WITHFP-NEXT: li a7, 4
-; ILP32-ILP32F-WITHFP-NEXT: sw a5, -32(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw t2, -32(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw t1, -28(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw t0, -24(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a5, -20(s0)
; ILP32-ILP32F-WITHFP-NEXT: call va5_aligned_stack_callee
; ILP32-ILP32F-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; ILP32-ILP32F-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
@@ -2345,29 +2345,26 @@ define void @va5_aligned_stack_caller() nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 262236
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, 655
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a1, 377487
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, a1, 1475
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a2, 17
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a3, 16
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 20(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a2, 15
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a3, 14
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 0(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 8(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 12(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 377487
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, 1475
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 8(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a0, 17
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 24(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a0, 16
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 20(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a0, 15
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 16(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a0, 14
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 0(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 16(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 262153
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, 491
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 44(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a5, a0, 491
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 545260
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, -1967
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 40(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi t0, a0, -1967
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 964690
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, -328
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 36(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi t1, a0, -328
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 335544
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a5, a0, 1311
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi t2, a0, 1311
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 688509
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a6, a0, -2048
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a0, 1
@@ -2376,7 +2373,10 @@ define void @va5_aligned_stack_caller() nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a3, 12
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a4, 13
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a7, 4
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 32(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw t2, 32(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw t1, 36(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw t0, 40(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 44(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: call va5_aligned_stack_callee
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 64
@@ -2390,32 +2390,29 @@ define void @va5_aligned_stack_caller() nounwind {
; ILP32E-FPELIM-NEXT: addi s0, sp, 64
; ILP32E-FPELIM-NEXT: andi sp, sp, -16
; ILP32E-FPELIM-NEXT: li a0, 17
+; ILP32E-FPELIM-NEXT: li a1, 16
+; ILP32E-FPELIM-NEXT: li a2, 15
+; ILP32E-FPELIM-NEXT: sw a2, 16(sp)
+; ILP32E-FPELIM-NEXT: sw a1, 20(sp)
; ILP32E-FPELIM-NEXT: sw a0, 24(sp)
-; ILP32E-FPELIM-NEXT: li a0, 16
-; ILP32E-FPELIM-NEXT: sw a0, 20(sp)
-; ILP32E-FPELIM-NEXT: li a0, 15
-; ILP32E-FPELIM-NEXT: sw a0, 16(sp)
; ILP32E-FPELIM-NEXT: lui a0, 262236
; ILP32E-FPELIM-NEXT: addi a0, a0, 655
+; ILP32E-FPELIM-NEXT: lui a1, 377487
+; ILP32E-FPELIM-NEXT: addi a1, a1, 1475
+; ILP32E-FPELIM-NEXT: li a2, 14
+; ILP32E-FPELIM-NEXT: li a3, 4
+; ILP32E-FPELIM-NEXT: sw a3, 0(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 4(sp)
+; ILP32E-FPELIM-NEXT: sw a1, 8(sp)
; ILP32E-FPELIM-NEXT: sw a0, 12(sp)
-; ILP32E-FPELIM-NEXT: lui a0, 377487
-; ILP32E-FPELIM-NEXT: addi a0, a0, 1475
-; ILP32E-FPELIM-NEXT: sw a0, 8(sp)
-; ILP32E-FPELIM-NEXT: li a0, 14
-; ILP32E-FPELIM-NEXT: sw a0, 4(sp)
-; ILP32E-FPELIM-NEXT: li a0, 4
-; ILP32E-FPELIM-NEXT: sw a0, 0(sp)
; ILP32E-FPELIM-NEXT: lui a0, 262153
-; ILP32E-FPELIM-NEXT: addi a0, a0, 491
-; ILP32E-FPELIM-NEXT: sw a0, 44(sp)
+; ILP32E-FPELIM-NEXT: addi a6, a0, 491
; ILP32E-FPELIM-NEXT: lui a0, 545260
-; ILP32E-FPELIM-NEXT: addi a0, a0, -1967
-; ILP32E-FPELIM-NEXT: sw a0, 40(sp)
+; ILP32E-FPELIM-NEXT: addi a7, a0, -1967
; ILP32E-FPELIM-NEXT: lui a0, 964690
-; ILP32E-FPELIM-NEXT: addi a0, a0, -328
-; ILP32E-FPELIM-NEXT: sw a0, 36(sp)
+; ILP32E-FPELIM-NEXT: addi t0, a0, -328
; ILP32E-FPELIM-NEXT: lui a0, 335544
-; ILP32E-FPELIM-NEXT: addi a6, a0, 1311
+; ILP32E-FPELIM-NEXT: addi t1, a0, 1311
; ILP32E-FPELIM-NEXT: lui a0, 688509
; ILP32E-FPELIM-NEXT: addi a5, a0, -2048
; ILP32E-FPELIM-NEXT: li a0, 1
@@ -2423,7 +2420,10 @@ define void @va5_aligned_stack_caller() nounwind {
; ILP32E-FPELIM-NEXT: addi a2, sp, 32
; ILP32E-FPELIM-NEXT: li a3, 12
; ILP32E-FPELIM-NEXT: li a4, 13
-; ILP32E-FPELIM-NEXT: sw a6, 32(sp)
+; ILP32E-FPELIM-NEXT: sw t1, 32(sp)
+; ILP32E-FPELIM-NEXT: sw t0, 36(sp)
+; ILP32E-FPELIM-NEXT: sw a7, 40(sp)
+; ILP32E-FPELIM-NEXT: sw a6, 44(sp)
; ILP32E-FPELIM-NEXT: call va5_aligned_stack_callee
; ILP32E-FPELIM-NEXT: addi sp, s0, -64
; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
@@ -2439,32 +2439,29 @@ define void @va5_aligned_stack_caller() nounwind {
; ILP32E-WITHFP-NEXT: addi s0, sp, 64
; ILP32E-WITHFP-NEXT: andi sp, sp, -16
; ILP32E-WITHFP-NEXT: li a0, 17
+; ILP32E-WITHFP-NEXT: li a1, 16
+; ILP32E-WITHFP-NEXT: li a2, 15
+; ILP32E-WITHFP-NEXT: sw a2, 16(sp)
+; ILP32E-WITHFP-NEXT: sw a1, 20(sp)
; ILP32E-WITHFP-NEXT: sw a0, 24(sp)
-; ILP32E-WITHFP-NEXT: li a0, 16
-; ILP32E-WITHFP-NEXT: sw a0, 20(sp)
-; ILP32E-WITHFP-NEXT: li a0, 15
-; ILP32E-WITHFP-NEXT: sw a0, 16(sp)
; ILP32E-WITHFP-NEXT: lui a0, 262236
; ILP32E-WITHFP-NEXT: addi a0, a0, 655
+; ILP32E-WITHFP-NEXT: lui a1, 377487
+; ILP32E-WITHFP-NEXT: addi a1, a1, 1475
+; ILP32E-WITHFP-NEXT: li a2, 14
+; ILP32E-WITHFP-NEXT: li a3, 4
+; ILP32E-WITHFP-NEXT: sw a3, 0(sp)
+; ILP32E-WITHFP-NEXT: sw a2, 4(sp)
+; ILP32E-WITHFP-NEXT: sw a1, 8(sp)
; ILP32E-WITHFP-NEXT: sw a0, 12(sp)
-; ILP32E-WITHFP-NEXT: lui a0, 377487
-; ILP32E-WITHFP-NEXT: addi a0, a0, 1475
-; ILP32E-WITHFP-NEXT: sw a0, 8(sp)
-; ILP32E-WITHFP-NEXT: li a0, 14
-; ILP32E-WITHFP-NEXT: sw a0, 4(sp)
-; ILP32E-WITHFP-NEXT: li a0, 4
-; ILP32E-WITHFP-NEXT: sw a0, 0(sp)
; ILP32E-WITHFP-NEXT: lui a0, 262153
-; ILP32E-WITHFP-NEXT: addi a0, a0, 491
-; ILP32E-WITHFP-NEXT: sw a0, 44(sp)
+; ILP32E-WITHFP-NEXT: addi a6, a0, 491
; ILP32E-WITHFP-NEXT: lui a0, 545260
-; ILP32E-WITHFP-NEXT: addi a0, a0, -1967
-; ILP32E-WITHFP-NEXT: sw a0, 40(sp)
+; ILP32E-WITHFP-NEXT: addi a7, a0, -1967
; ILP32E-WITHFP-NEXT: lui a0, 964690
-; ILP32E-WITHFP-NEXT: addi a0, a0, -328
-; ILP32E-WITHFP-NEXT: sw a0, 36(sp)
+; ILP32E-WITHFP-NEXT: addi t0, a0, -328
; ILP32E-WITHFP-NEXT: lui a0, 335544
-; ILP32E-WITHFP-NEXT: addi a6, a0, 1311
+; ILP32E-WITHFP-NEXT: addi t1, a0, 1311
; ILP32E-WITHFP-NEXT: lui a0, 688509
; ILP32E-WITHFP-NEXT: addi a5, a0, -2048
; ILP32E-WITHFP-NEXT: li a0, 1
@@ -2472,7 +2469,10 @@ define void @va5_aligned_stack_caller() nounwind {
; ILP32E-WITHFP-NEXT: addi a2, sp, 32
; ILP32E-WITHFP-NEXT: li a3, 12
; ILP32E-WITHFP-NEXT: li a4, 13
-; ILP32E-WITHFP-NEXT: sw a6, 32(sp)
+; ILP32E-WITHFP-NEXT: sw t1, 32(sp)
+; ILP32E-WITHFP-NEXT: sw t0, 36(sp)
+; ILP32E-WITHFP-NEXT: sw a7, 40(sp)
+; ILP32E-WITHFP-NEXT: sw a6, 44(sp)
; ILP32E-WITHFP-NEXT: call va5_aligned_stack_callee
; ILP32E-WITHFP-NEXT: addi sp, s0, -64
; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
@@ -2484,14 +2484,11 @@ define void @va5_aligned_stack_caller() nounwind {
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -48
; LP64-LP64F-LP64D-FPELIM-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
-; LP64-LP64F-LP64D-FPELIM-NEXT: li a0, 17
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 24(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: li a0, 16
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 16(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: li a0, 15
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: li t0, 17
+; LP64-LP64F-LP64D-FPELIM-NEXT: li t1, 16
+; LP64-LP64F-LP64D-FPELIM-NEXT: li t2, 15
; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, %hi(.LCPI11_0)
-; LP64-LP64F-LP64D-FPELIM-NEXT: ld t0, %lo(.LCPI11_0)(a0)
+; LP64-LP64F-LP64D-FPELIM-NEXT: ld t3, %lo(.LCPI11_0)(a0)
; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, %hi(.LCPI11_1)
; LP64-LP64F-LP64D-FPELIM-NEXT: ld a2, %lo(.LCPI11_1)(a0)
; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, %hi(.LCPI11_2)
@@ -2504,7 +2501,10 @@ define void @va5_aligned_stack_caller() nounwind {
; LP64-LP64F-LP64D-FPELIM-NEXT: li a4, 12
; LP64-LP64F-LP64D-FPELIM-NEXT: li a5, 13
; LP64-LP64F-LP64D-FPELIM-NEXT: li a7, 14
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd t0, 0(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd t3, 0(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd t2, 8(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd t1, 16(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd t0, 24(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: call va5_aligned_stack_callee
; LP64-LP64F-LP64D-FPELIM-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 48
@@ -2516,14 +2516,11 @@ define void @va5_aligned_stack_caller() nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 48
-; LP64-LP64F-LP64D-WITHFP-NEXT: li a0, 17
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 24(sp)
-; LP64-LP64F-LP64D-WITHFP-NEXT: li a0, 16
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 16(sp)
-; LP64-LP64F-LP64D-WITHFP-NEXT: li a0, 15
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 8(sp)
+; LP64-LP64F-LP64D-WITHFP-NEXT: li t0, 17
+; LP64-LP64F-LP64D-WITHFP-NEXT: li t1, 16
+; LP64-LP64F-LP64D-WITHFP-NEXT: li t2, 15
; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, %hi(.LCPI11_0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: ld t0, %lo(.LCPI11_0)(a0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: ld t3, %lo(.LCPI11_0)(a0)
; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, %hi(.LCPI11_1)
; LP64-LP64F-LP64D-WITHFP-NEXT: ld a2, %lo(.LCPI11_1)(a0)
; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, %hi(.LCPI11_2)
@@ -2536,7 +2533,10 @@ define void @va5_aligned_stack_caller() nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: li a4, 12
; LP64-LP64F-LP64D-WITHFP-NEXT: li a5, 13
; LP64-LP64F-LP64D-WITHFP-NEXT: li a7, 14
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd t0, 0(sp)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd t3, 0(sp)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd t2, 8(sp)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd t1, 16(sp)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd t0, 24(sp)
; LP64-LP64F-LP64D-WITHFP-NEXT: call va5_aligned_stack_callee
; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -2548,19 +2548,16 @@ define void @va5_aligned_stack_caller() nounwind {
; LP64E-FPELIM-NEXT: addi sp, sp, -56
; LP64E-FPELIM-NEXT: sd ra, 48(sp) # 8-byte Folded Spill
; LP64E-FPELIM-NEXT: li a0, 17
+; LP64E-FPELIM-NEXT: li a1, 16
+; LP64E-FPELIM-NEXT: sd a1, 32(sp)
; LP64E-FPELIM-NEXT: sd a0, 40(sp)
-; LP64E-FPELIM-NEXT: li a0, 16
-; LP64E-FPELIM-NEXT: lui a1, %hi(.LCPI11_0)
-; LP64E-FPELIM-NEXT: ld a1, %lo(.LCPI11_0)(a1)
-; LP64E-FPELIM-NEXT: sd a0, 32(sp)
-; LP64E-FPELIM-NEXT: li a0, 15
-; LP64E-FPELIM-NEXT: sd a0, 24(sp)
-; LP64E-FPELIM-NEXT: sd a1, 16(sp)
-; LP64E-FPELIM-NEXT: li a0, 14
-; LP64E-FPELIM-NEXT: sd a0, 8(sp)
+; LP64E-FPELIM-NEXT: li a6, 15
+; LP64E-FPELIM-NEXT: lui a0, %hi(.LCPI11_0)
+; LP64E-FPELIM-NEXT: ld a7, %lo(.LCPI11_0)(a0)
+; LP64E-FPELIM-NEXT: li t0, 14
; LP64E-FPELIM-NEXT: lui a0, 2384
; LP64E-FPELIM-NEXT: addiw a0, a0, 761
-; LP64E-FPELIM-NEXT: slli a6, a0, 11
+; LP64E-FPELIM-NEXT: slli t1, a0, 11
; LP64E-FPELIM-NEXT: lui a0, %hi(.LCPI11_1)
; LP64E-FPELIM-NEXT: ld a2, %lo(.LCPI11_1)(a0)
; LP64E-FPELIM-NEXT: lui a0, %hi(.LCPI11_2)
@@ -2569,7 +2566,10 @@ define void @va5_aligned_stack_caller() nounwind {
; LP64E-FPELIM-NEXT: li a1, 11
; LP64E-FPELIM-NEXT: li a4, 12
; LP64E-FPELIM-NEXT: li a5, 13
-; LP64E-FPELIM-NEXT: sd a6, 0(sp)
+; LP64E-FPELIM-NEXT: sd t1, 0(sp)
+; LP64E-FPELIM-NEXT: sd t0, 8(sp)
+; LP64E-FPELIM-NEXT: sd a7, 16(sp)
+; LP64E-FPELIM-NEXT: sd a6, 24(sp)
; LP64E-FPELIM-NEXT: call va5_aligned_stack_callee
; LP64E-FPELIM-NEXT: ld ra, 48(sp) # 8-byte Folded Reload
; LP64E-FPELIM-NEXT: addi sp, sp, 56
@@ -2582,19 +2582,16 @@ define void @va5_aligned_stack_caller() nounwind {
; LP64E-WITHFP-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
; LP64E-WITHFP-NEXT: addi s0, sp, 64
; LP64E-WITHFP-NEXT: li a0, 17
+; LP64E-WITHFP-NEXT: li a1, 16
+; LP64E-WITHFP-NEXT: sd a1, 32(sp)
; LP64E-WITHFP-NEXT: sd a0, 40(sp)
-; LP64E-WITHFP-NEXT: li a0, 16
-; LP64E-WITHFP-NEXT: lui a1, %hi(.LCPI11_0)
-; LP64E-WITHFP-NEXT: ld a1, %lo(.LCPI11_0)(a1)
-; LP64E-WITHFP-NEXT: sd a0, 32(sp)
-; LP64E-WITHFP-NEXT: li a0, 15
-; LP64E-WITHFP-NEXT: sd a0, 24(sp)
-; LP64E-WITHFP-NEXT: sd a1, 16(sp)
-; LP64E-WITHFP-NEXT: li a0, 14
-; LP64E-WITHFP-NEXT: sd a0, 8(sp)
+; LP64E-WITHFP-NEXT: li a6, 15
+; LP64E-WITHFP-NEXT: lui a0, %hi(.LCPI11_0)
+; LP64E-WITHFP-NEXT: ld a7, %lo(.LCPI11_0)(a0)
+; LP64E-WITHFP-NEXT: li t0, 14
; LP64E-WITHFP-NEXT: lui a0, 2384
; LP64E-WITHFP-NEXT: addiw a0, a0, 761
-; LP64E-WITHFP-NEXT: slli a6, a0, 11
+; LP64E-WITHFP-NEXT: slli t1, a0, 11
; LP64E-WITHFP-NEXT: lui a0, %hi(.LCPI11_1)
; LP64E-WITHFP-NEXT: ld a2, %lo(.LCPI11_1)(a0)
; LP64E-WITHFP-NEXT: lui a0, %hi(.LCPI11_2)
@@ -2603,7 +2600,10 @@ define void @va5_aligned_stack_caller() nounwind {
; LP64E-WITHFP-NEXT: li a1, 11
; LP64E-WITHFP-NEXT: li a4, 12
; LP64E-WITHFP-NEXT: li a5, 13
-; LP64E-WITHFP-NEXT: sd a6, 0(sp)
+; LP64E-WITHFP-NEXT: sd t1, 0(sp)
+; LP64E-WITHFP-NEXT: sd t0, 8(sp)
+; LP64E-WITHFP-NEXT: sd a7, 16(sp)
+; LP64E-WITHFP-NEXT: sd a6, 24(sp)
; LP64E-WITHFP-NEXT: call va5_aligned_stack_callee
; LP64E-WITHFP-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; LP64E-WITHFP-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
@@ -2623,14 +2623,14 @@ define i32 @va6_no_fixed_args(...) nounwind {
; ILP32-ILP32F-FPELIM-LABEL: va6_no_fixed_args:
; ILP32-ILP32F-FPELIM: # %bb.0:
; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -48
-; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a5, 36(sp)
; ILP32-ILP32F-FPELIM-NEXT: sw a4, 32(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp)
-; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a5, 36(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a6, 40(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a7, 44(sp)
; ILP32-ILP32F-FPELIM-NEXT: sw a0, 16(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a1, 20(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a2, 24(sp)
+; ILP32-ILP32F-FPELIM-NEXT: sw a3, 28(sp)
; ILP32-ILP32F-FPELIM-NEXT: addi a1, sp, 20
; ILP32-ILP32F-FPELIM-NEXT: sw a1, 12(sp)
; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 48
@@ -2642,14 +2642,14 @@ define i32 @va6_no_fixed_args(...) nounwind {
; ILP32-ILP32F-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; ILP32-ILP32F-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 16
-; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a0, 0(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0)
; ILP32-ILP32F-WITHFP-NEXT: addi a1, s0, 4
; ILP32-ILP32F-WITHFP-NEXT: sw a1, -12(s0)
; ILP32-ILP32F-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
@@ -2660,14 +2660,14 @@ define i32 @va6_no_fixed_args(...) nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-LABEL: va6_no_fixed_args:
; RV32D-ILP32-ILP32F-ILP32D-FPELIM: # %bb.0:
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -48
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 32(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 36(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 40(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 44(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 16(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 20(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 24(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 28(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, sp, 20
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 12(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48
@@ -2676,12 +2676,12 @@ define i32 @va6_no_fixed_args(...) nounwind {
; ILP32E-FPELIM-LABEL: va6_no_fixed_args:
; ILP32E-FPELIM: # %bb.0:
; ILP32E-FPELIM-NEXT: addi sp, sp, -28
-; ILP32E-FPELIM-NEXT: sw a5, 24(sp)
; ILP32E-FPELIM-NEXT: sw a4, 20(sp)
-; ILP32E-FPELIM-NEXT: sw a3, 16(sp)
-; ILP32E-FPELIM-NEXT: sw a2, 12(sp)
-; ILP32E-FPELIM-NEXT: sw a1, 8(sp)
+; ILP32E-FPELIM-NEXT: sw a5, 24(sp)
; ILP32E-FPELIM-NEXT: sw a0, 4(sp)
+; ILP32E-FPELIM-NEXT: sw a1, 8(sp)
+; ILP32E-FPELIM-NEXT: sw a2, 12(sp)
+; ILP32E-FPELIM-NEXT: sw a3, 16(sp)
; ILP32E-FPELIM-NEXT: addi a1, sp, 8
; ILP32E-FPELIM-NEXT: sw a1, 0(sp)
; ILP32E-FPELIM-NEXT: addi sp, sp, 28
@@ -2693,12 +2693,12 @@ define i32 @va6_no_fixed_args(...) nounwind {
; ILP32E-WITHFP-NEXT: sw ra, 8(sp) # 4-byte Folded Spill
; ILP32E-WITHFP-NEXT: sw s0, 4(sp) # 4-byte Folded Spill
; ILP32E-WITHFP-NEXT: addi s0, sp, 12
-; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
-; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
-; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
-; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
; ILP32E-WITHFP-NEXT: sw a0, 0(s0)
+; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
; ILP32E-WITHFP-NEXT: addi a1, s0, 4
; ILP32E-WITHFP-NEXT: sw a1, -12(s0)
; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload
@@ -2709,14 +2709,14 @@ define i32 @va6_no_fixed_args(...) nounwind {
; LP64-LP64F-LP64D-FPELIM-LABEL: va6_no_fixed_args:
; LP64-LP64F-LP64D-FPELIM: # %bb.0:
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp)
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 16(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: addi a1, sp, 24
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 8(sp)
; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80
@@ -2728,14 +2728,14 @@ define i32 @va6_no_fixed_args(...) nounwind {
; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, 0(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a1, s0, 8
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, -24(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
@@ -2746,12 +2746,12 @@ define i32 @va6_no_fixed_args(...) nounwind {
; LP64E-FPELIM-LABEL: va6_no_fixed_args:
; LP64E-FPELIM: # %bb.0:
; LP64E-FPELIM-NEXT: addi sp, sp, -56
-; LP64E-FPELIM-NEXT: sd a5, 48(sp)
; LP64E-FPELIM-NEXT: sd a4, 40(sp)
-; LP64E-FPELIM-NEXT: sd a3, 32(sp)
-; LP64E-FPELIM-NEXT: sd a2, 24(sp)
-; LP64E-FPELIM-NEXT: sd a1, 16(sp)
+; LP64E-FPELIM-NEXT: sd a5, 48(sp)
; LP64E-FPELIM-NEXT: sd a0, 8(sp)
+; LP64E-FPELIM-NEXT: sd a1, 16(sp)
+; LP64E-FPELIM-NEXT: sd a2, 24(sp)
+; LP64E-FPELIM-NEXT: sd a3, 32(sp)
; LP64E-FPELIM-NEXT: addi a1, sp, 16
; LP64E-FPELIM-NEXT: sd a1, 0(sp)
; LP64E-FPELIM-NEXT: addi sp, sp, 56
@@ -2763,12 +2763,12 @@ define i32 @va6_no_fixed_args(...) nounwind {
; LP64E-WITHFP-NEXT: sd ra, 16(sp) # 8-byte Folded Spill
; LP64E-WITHFP-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
; LP64E-WITHFP-NEXT: addi s0, sp, 24
-; LP64E-WITHFP-NEXT: sd a5, 40(s0)
; LP64E-WITHFP-NEXT: sd a4, 32(s0)
-; LP64E-WITHFP-NEXT: sd a3, 24(s0)
-; LP64E-WITHFP-NEXT: sd a2, 16(s0)
-; LP64E-WITHFP-NEXT: sd a1, 8(s0)
+; LP64E-WITHFP-NEXT: sd a5, 40(s0)
; LP64E-WITHFP-NEXT: sd a0, 0(s0)
+; LP64E-WITHFP-NEXT: sd a1, 8(s0)
+; LP64E-WITHFP-NEXT: sd a2, 16(s0)
+; LP64E-WITHFP-NEXT: sd a3, 24(s0)
; LP64E-WITHFP-NEXT: addi a1, s0, 8
; LP64E-WITHFP-NEXT: sd a1, -24(s0)
; LP64E-WITHFP-NEXT: ld ra, 16(sp) # 8-byte Folded Reload
@@ -2794,25 +2794,25 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; ILP32-ILP32F-FPELIM-NEXT: mv a0, a1
; ILP32-ILP32F-FPELIM-NEXT: lui t0, 24414
; ILP32-ILP32F-FPELIM-NEXT: add t0, sp, t0
-; ILP32-ILP32F-FPELIM-NEXT: sw a7, 300(t0)
-; ILP32-ILP32F-FPELIM-NEXT: lui a7, 24414
-; ILP32-ILP32F-FPELIM-NEXT: add a7, sp, a7
-; ILP32-ILP32F-FPELIM-NEXT: sw a6, 296(a7)
-; ILP32-ILP32F-FPELIM-NEXT: lui a6, 24414
-; ILP32-ILP32F-FPELIM-NEXT: add a6, sp, a6
-; ILP32-ILP32F-FPELIM-NEXT: sw a5, 292(a6)
+; ILP32-ILP32F-FPELIM-NEXT: sw a5, 292(t0)
+; ILP32-ILP32F-FPELIM-NEXT: lui a5, 24414
+; ILP32-ILP32F-FPELIM-NEXT: add a5, sp, a5
+; ILP32-ILP32F-FPELIM-NEXT: sw a6, 296(a5)
+; ILP32-ILP32F-FPELIM-NEXT: lui a5, 24414
+; ILP32-ILP32F-FPELIM-NEXT: add a5, sp, a5
+; ILP32-ILP32F-FPELIM-NEXT: sw a7, 300(a5)
; ILP32-ILP32F-FPELIM-NEXT: lui a5, 24414
; ILP32-ILP32F-FPELIM-NEXT: add a5, sp, a5
-; ILP32-ILP32F-FPELIM-NEXT: sw a4, 288(a5)
-; ILP32-ILP32F-FPELIM-NEXT: lui a4, 24414
-; ILP32-ILP32F-FPELIM-NEXT: add a4, sp, a4
-; ILP32-ILP32F-FPELIM-NEXT: sw a3, 284(a4)
-; ILP32-ILP32F-FPELIM-NEXT: lui a3, 24414
-; ILP32-ILP32F-FPELIM-NEXT: add a3, sp, a3
-; ILP32-ILP32F-FPELIM-NEXT: sw a2, 280(a3)
-; ILP32-ILP32F-FPELIM-NEXT: lui a2, 24414
-; ILP32-ILP32F-FPELIM-NEXT: add a2, sp, a2
-; ILP32-ILP32F-FPELIM-NEXT: sw a1, 276(a2)
+; ILP32-ILP32F-FPELIM-NEXT: sw a1, 276(a5)
+; ILP32-ILP32F-FPELIM-NEXT: lui a1, 24414
+; ILP32-ILP32F-FPELIM-NEXT: add a1, sp, a1
+; ILP32-ILP32F-FPELIM-NEXT: sw a2, 280(a1)
+; ILP32-ILP32F-FPELIM-NEXT: lui a1, 24414
+; ILP32-ILP32F-FPELIM-NEXT: add a1, sp, a1
+; ILP32-ILP32F-FPELIM-NEXT: sw a3, 284(a1)
+; ILP32-ILP32F-FPELIM-NEXT: lui a1, 24414
+; ILP32-ILP32F-FPELIM-NEXT: add a1, sp, a1
+; ILP32-ILP32F-FPELIM-NEXT: sw a4, 288(a1)
; ILP32-ILP32F-FPELIM-NEXT: lui a1, 24414
; ILP32-ILP32F-FPELIM-NEXT: addi a1, a1, 280
; ILP32-ILP32F-FPELIM-NEXT: add a1, sp, a1
@@ -2836,13 +2836,13 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, -1728
; ILP32-ILP32F-WITHFP-NEXT: sub sp, sp, a0
; ILP32-ILP32F-WITHFP-NEXT: mv a0, a1
-; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0)
-; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a6, 24(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a7, 28(s0)
; ILP32-ILP32F-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32-ILP32F-WITHFP-NEXT: sw a4, 16(s0)
; ILP32-ILP32F-WITHFP-NEXT: addi a1, s0, 8
; ILP32-ILP32F-WITHFP-NEXT: lui a2, 24414
; ILP32-ILP32F-WITHFP-NEXT: sub a2, s0, a2
@@ -2864,25 +2864,25 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: mv a0, a1
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui t0, 24414
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add t0, sp, t0
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 300(t0)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a7, 24414
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a7, sp, a7
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 296(a7)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a6, 24414
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a6, sp, a6
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 292(a6)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 292(t0)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a5, 24414
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a5, sp, a5
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 296(a5)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a5, 24414
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a5, sp, a5
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 300(a5)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a5, 24414
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a5, sp, a5
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 288(a5)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a4, 24414
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a4, sp, a4
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 284(a4)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a3, 24414
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a3, sp, a3
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 280(a3)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a2, 24414
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a2, sp, a2
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 276(a2)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 276(a5)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a1, 24414
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, sp, a1
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a2, 280(a1)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a1, 24414
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, sp, a1
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 284(a1)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a1, 24414
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, sp, a1
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 288(a1)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a1, 24414
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, a1, 280
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, sp, a1
@@ -2904,16 +2904,16 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; ILP32E-FPELIM-NEXT: sw a5, 284(a6)
; ILP32E-FPELIM-NEXT: lui a5, 24414
; ILP32E-FPELIM-NEXT: add a5, sp, a5
-; ILP32E-FPELIM-NEXT: sw a4, 280(a5)
-; ILP32E-FPELIM-NEXT: lui a4, 24414
-; ILP32E-FPELIM-NEXT: add a4, sp, a4
-; ILP32E-FPELIM-NEXT: sw a3, 276(a4)
-; ILP32E-FPELIM-NEXT: lui a3, 24414
-; ILP32E-FPELIM-NEXT: add a3, sp, a3
-; ILP32E-FPELIM-NEXT: sw a2, 272(a3)
-; ILP32E-FPELIM-NEXT: lui a2, 24414
-; ILP32E-FPELIM-NEXT: add a2, sp, a2
-; ILP32E-FPELIM-NEXT: sw a1, 268(a2)
+; ILP32E-FPELIM-NEXT: sw a1, 268(a5)
+; ILP32E-FPELIM-NEXT: lui a1, 24414
+; ILP32E-FPELIM-NEXT: add a1, sp, a1
+; ILP32E-FPELIM-NEXT: sw a2, 272(a1)
+; ILP32E-FPELIM-NEXT: lui a1, 24414
+; ILP32E-FPELIM-NEXT: add a1, sp, a1
+; ILP32E-FPELIM-NEXT: sw a3, 276(a1)
+; ILP32E-FPELIM-NEXT: lui a1, 24414
+; ILP32E-FPELIM-NEXT: add a1, sp, a1
+; ILP32E-FPELIM-NEXT: sw a4, 280(a1)
; ILP32E-FPELIM-NEXT: lui a1, 24414
; ILP32E-FPELIM-NEXT: addi a1, a1, 272
; ILP32E-FPELIM-NEXT: add a1, sp, a1
@@ -2938,10 +2938,10 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; ILP32E-WITHFP-NEXT: sub sp, sp, a0
; ILP32E-WITHFP-NEXT: mv a0, a1
; ILP32E-WITHFP-NEXT: sw a5, 20(s0)
-; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
-; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
-; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
; ILP32E-WITHFP-NEXT: sw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: sw a2, 8(s0)
+; ILP32E-WITHFP-NEXT: sw a3, 12(s0)
+; ILP32E-WITHFP-NEXT: sw a4, 16(s0)
; ILP32E-WITHFP-NEXT: addi a1, s0, 8
; ILP32E-WITHFP-NEXT: lui a2, 24414
; ILP32E-WITHFP-NEXT: sub a2, s0, a2
@@ -2964,12 +2964,6 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, sp, a0
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 280(a0)
; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414
-; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, sp, a0
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 328(a0)
-; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414
-; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, sp, a0
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 320(a0)
-; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414
; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 284
; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, sp, a0
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp)
@@ -2981,14 +2975,20 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 312(a1)
; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414
; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, sp, a1
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 304(a1)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 320(a1)
; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414
; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, sp, a1
-; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 296(a1)
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 328(a1)
; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414
; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, sp, a1
; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 288(a1)
; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414
+; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, sp, a1
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 296(a1)
+; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414
+; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, sp, a1
+; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 304(a1)
+; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414
; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a1, a1, 336
; LP64-LP64F-LP64D-FPELIM-NEXT: add sp, sp, a1
; LP64-LP64F-LP64D-FPELIM-NEXT: ret
@@ -3007,17 +3007,17 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, -1680
; LP64-LP64F-LP64D-WITHFP-NEXT: sub sp, sp, a0
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 12
; LP64-LP64F-LP64D-WITHFP-NEXT: lui a1, 24414
; LP64-LP64F-LP64D-WITHFP-NEXT: sub a1, s0, a1
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -288(a1)
; LP64-LP64F-LP64D-WITHFP-NEXT: lw a0, 8(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0)
+; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0)
; LP64-LP64F-LP64D-WITHFP-NEXT: lui a1, 24414
; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a1, a1, -1680
; LP64-LP64F-LP64D-WITHFP-NEXT: add sp, sp, a1
@@ -3033,27 +3033,27 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; LP64E-FPELIM-NEXT: sub sp, sp, a0
; LP64E-FPELIM-NEXT: .cfi_def_cfa_offset 100000064
; LP64E-FPELIM-NEXT: lui a0, 24414
-; LP64E-FPELIM-NEXT: add a0, sp, a0
-; LP64E-FPELIM-NEXT: sd a1, 280(a0)
-; LP64E-FPELIM-NEXT: lui a0, 24414
; LP64E-FPELIM-NEXT: addiw a0, a0, 284
; LP64E-FPELIM-NEXT: add a0, sp, a0
; LP64E-FPELIM-NEXT: sd a0, 8(sp)
; LP64E-FPELIM-NEXT: lui a0, 24414
; LP64E-FPELIM-NEXT: add a0, sp, a0
+; LP64E-FPELIM-NEXT: sd a1, 280(a0)
+; LP64E-FPELIM-NEXT: lui a0, 24414
+; LP64E-FPELIM-NEXT: add a0, sp, a0
; LP64E-FPELIM-NEXT: lw a0, 280(a0)
; LP64E-FPELIM-NEXT: lui a1, 24414
; LP64E-FPELIM-NEXT: add a1, sp, a1
; LP64E-FPELIM-NEXT: sd a5, 312(a1)
; LP64E-FPELIM-NEXT: lui a1, 24414
; LP64E-FPELIM-NEXT: add a1, sp, a1
-; LP64E-FPELIM-NEXT: sd a4, 304(a1)
+; LP64E-FPELIM-NEXT: sd a2, 288(a1)
; LP64E-FPELIM-NEXT: lui a1, 24414
; LP64E-FPELIM-NEXT: add a1, sp, a1
; LP64E-FPELIM-NEXT: sd a3, 296(a1)
; LP64E-FPELIM-NEXT: lui a1, 24414
; LP64E-FPELIM-NEXT: add a1, sp, a1
-; LP64E-FPELIM-NEXT: sd a2, 288(a1)
+; LP64E-FPELIM-NEXT: sd a4, 304(a1)
; LP64E-FPELIM-NEXT: lui a1, 24414
; LP64E-FPELIM-NEXT: addiw a1, a1, 320
; LP64E-FPELIM-NEXT: add sp, sp, a1
@@ -3072,16 +3072,16 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; LP64E-WITHFP-NEXT: lui a0, 24414
; LP64E-WITHFP-NEXT: addiw a0, a0, -1704
; LP64E-WITHFP-NEXT: sub sp, sp, a0
-; LP64E-WITHFP-NEXT: sd a1, 8(s0)
; LP64E-WITHFP-NEXT: addi a0, s0, 12
-; LP64E-WITHFP-NEXT: lui a1, 24414
-; LP64E-WITHFP-NEXT: sub a1, s0, a1
-; LP64E-WITHFP-NEXT: sd a0, -288(a1)
+; LP64E-WITHFP-NEXT: lui a6, 24414
+; LP64E-WITHFP-NEXT: sub a6, s0, a6
+; LP64E-WITHFP-NEXT: sd a0, -288(a6)
+; LP64E-WITHFP-NEXT: sd a1, 8(s0)
; LP64E-WITHFP-NEXT: lw a0, 8(s0)
; LP64E-WITHFP-NEXT: sd a5, 40(s0)
-; LP64E-WITHFP-NEXT: sd a4, 32(s0)
-; LP64E-WITHFP-NEXT: sd a3, 24(s0)
; LP64E-WITHFP-NEXT: sd a2, 16(s0)
+; LP64E-WITHFP-NEXT: sd a3, 24(s0)
+; LP64E-WITHFP-NEXT: sd a4, 32(s0)
; LP64E-WITHFP-NEXT: lui a1, 24414
; LP64E-WITHFP-NEXT: addiw a1, a1, -1704
; LP64E-WITHFP-NEXT: add sp, sp, a1
diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll
index 3e14317a004745..cae59c79aaaa8a 100644
--- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll
+++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll
@@ -18,13 +18,13 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: srlw a0, a0, a1
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
+; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: lshr_4bytes:
@@ -51,13 +51,13 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: srl a0, a0, a1
-; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb a1, 2(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 1(a2)
+; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: ret
%src = load i32, ptr %src.ptr, align 1
%byteOff = load i32, ptr %byteOff.ptr, align 1
@@ -82,13 +82,13 @@ define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: sllw a0, a0, a1
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
+; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: shl_4bytes:
@@ -115,13 +115,13 @@ define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: sll a0, a0, a1
-; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb a1, 2(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 1(a2)
+; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: ret
%src = load i32, ptr %src.ptr, align 1
%byteOff = load i32, ptr %byteOff.ptr, align 1
@@ -146,13 +146,13 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: sraw a0, a0, a1
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
+; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: ashr_4bytes:
@@ -179,13 +179,13 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: sra a0, a0, a1
-; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb a1, 2(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 1(a2)
+; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: ret
%src = load i32, ptr %src.ptr, align 1
%byteOff = load i32, ptr %byteOff.ptr, align 1
@@ -244,21 +244,21 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a3, a3, 35
; RV64I-NEXT: or a1, a3, a1
; RV64I-NEXT: srl a0, a0, a1
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 48
+; RV64I-NEXT: srli a3, a0, 56
+; RV64I-NEXT: srli a4, a0, 32
+; RV64I-NEXT: srli a5, a0, 40
+; RV64I-NEXT: sb a4, 4(a2)
+; RV64I-NEXT: sb a5, 5(a2)
; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a0, 56
-; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 4(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 5(a2)
+; RV64I-NEXT: sb a3, 7(a2)
; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
+; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: lshr_8bytes:
@@ -309,20 +309,20 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: .LBB3_3:
; RV32I-NEXT: srai a4, a4, 31
; RV32I-NEXT: and a1, a4, a1
-; RV32I-NEXT: sb a1, 4(a2)
; RV32I-NEXT: srli a3, a1, 16
+; RV32I-NEXT: srli a4, a1, 24
+; RV32I-NEXT: srli a5, a1, 8
+; RV32I-NEXT: sb a1, 4(a2)
+; RV32I-NEXT: sb a5, 5(a2)
; RV32I-NEXT: sb a3, 6(a2)
-; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 7(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 5(a2)
-; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: sb a4, 7(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb a1, 2(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 1(a2)
+; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: ret
%src = load i64, ptr %src.ptr, align 1
%byteOff = load i64, ptr %byteOff.ptr, align 1
@@ -380,21 +380,21 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a3, a3, 35
; RV64I-NEXT: or a1, a3, a1
; RV64I-NEXT: sll a0, a0, a1
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 48
+; RV64I-NEXT: srli a3, a0, 56
+; RV64I-NEXT: srli a4, a0, 32
+; RV64I-NEXT: srli a5, a0, 40
+; RV64I-NEXT: sb a4, 4(a2)
+; RV64I-NEXT: sb a5, 5(a2)
; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a0, 56
-; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 4(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 5(a2)
+; RV64I-NEXT: sb a3, 7(a2)
; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
+; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: shl_8bytes:
@@ -445,20 +445,20 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: .LBB4_3:
; RV32I-NEXT: srai a4, a4, 31
; RV32I-NEXT: and a1, a4, a1
-; RV32I-NEXT: sb a1, 0(a2)
-; RV32I-NEXT: sb a0, 4(a2)
; RV32I-NEXT: srli a3, a1, 16
+; RV32I-NEXT: srli a4, a1, 24
+; RV32I-NEXT: srli a5, a1, 8
+; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: sb a5, 1(a2)
; RV32I-NEXT: sb a3, 2(a2)
-; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 3(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 1(a2)
+; RV32I-NEXT: sb a4, 3(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 4(a2)
+; RV32I-NEXT: sb a4, 5(a2)
; RV32I-NEXT: sb a1, 6(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 7(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 5(a2)
+; RV32I-NEXT: sb a3, 7(a2)
; RV32I-NEXT: ret
%src = load i64, ptr %src.ptr, align 1
%byteOff = load i64, ptr %byteOff.ptr, align 1
@@ -516,21 +516,21 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a3, a3, 35
; RV64I-NEXT: or a1, a3, a1
; RV64I-NEXT: sra a0, a0, a1
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 48
+; RV64I-NEXT: srli a3, a0, 56
+; RV64I-NEXT: srli a4, a0, 32
+; RV64I-NEXT: srli a5, a0, 40
+; RV64I-NEXT: sb a4, 4(a2)
+; RV64I-NEXT: sb a5, 5(a2)
; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a0, 56
-; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 4(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 5(a2)
+; RV64I-NEXT: sb a3, 7(a2)
; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
+; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: ashr_8bytes:
@@ -581,20 +581,20 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sll a3, a3, a4
; RV32I-NEXT: or a0, a0, a3
; RV32I-NEXT: .LBB5_3:
-; RV32I-NEXT: sb a1, 4(a2)
; RV32I-NEXT: srli a3, a1, 16
+; RV32I-NEXT: srli a4, a1, 24
+; RV32I-NEXT: srli a5, a1, 8
+; RV32I-NEXT: sb a1, 4(a2)
+; RV32I-NEXT: sb a5, 5(a2)
; RV32I-NEXT: sb a3, 6(a2)
-; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 7(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 5(a2)
-; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: sb a4, 7(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb a1, 2(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 1(a2)
+; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: ret
%src = load i64, ptr %src.ptr, align 1
%byteOff = load i64, ptr %byteOff.ptr, align 1
@@ -689,36 +689,36 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: .LBB6_3:
; RV64I-NEXT: srai a4, a4, 63
; RV64I-NEXT: and a1, a4, a1
-; RV64I-NEXT: sb a1, 8(a2)
; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a6, a1, 32
+; RV64I-NEXT: sb a6, 12(a2)
+; RV64I-NEXT: sb a5, 13(a2)
+; RV64I-NEXT: sb a4, 14(a2)
; RV64I-NEXT: sb a3, 15(a2)
-; RV64I-NEXT: srli a3, a1, 48
-; RV64I-NEXT: sb a3, 14(a2)
-; RV64I-NEXT: srli a3, a1, 40
-; RV64I-NEXT: sb a3, 13(a2)
-; RV64I-NEXT: srli a3, a1, 32
-; RV64I-NEXT: sb a3, 12(a2)
; RV64I-NEXT: srli a3, a1, 24
+; RV64I-NEXT: srli a4, a1, 16
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: sb a1, 8(a2)
+; RV64I-NEXT: sb a5, 9(a2)
+; RV64I-NEXT: sb a4, 10(a2)
; RV64I-NEXT: sb a3, 11(a2)
-; RV64I-NEXT: srli a3, a1, 16
-; RV64I-NEXT: sb a3, 10(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 9(a2)
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 56
+; RV64I-NEXT: srli a3, a0, 48
+; RV64I-NEXT: srli a4, a0, 40
+; RV64I-NEXT: srli a5, a0, 32
+; RV64I-NEXT: sb a5, 4(a2)
+; RV64I-NEXT: sb a4, 5(a2)
+; RV64I-NEXT: sb a3, 6(a2)
; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 5(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 4(a2)
; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
+; RV64I-NEXT: sb a3, 2(a2)
; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: lshr_16bytes:
@@ -774,14 +774,14 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, a7
; RV32I-NEXT: or a1, a1, a6
-; RV32I-NEXT: sw zero, 28(sp)
-; RV32I-NEXT: sw zero, 24(sp)
-; RV32I-NEXT: sw zero, 20(sp)
; RV32I-NEXT: sw zero, 16(sp)
-; RV32I-NEXT: sw a0, 12(sp)
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw zero, 20(sp)
+; RV32I-NEXT: sw zero, 24(sp)
+; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a0, 12(sp)
; RV32I-NEXT: andi a0, a1, 12
; RV32I-NEXT: mv a3, sp
; RV32I-NEXT: add a0, a3, a0
@@ -805,34 +805,34 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sll a7, t1, a7
; RV32I-NEXT: or a7, a5, a7
; RV32I-NEXT: srl a0, a0, a1
-; RV32I-NEXT: sb a5, 8(a2)
-; RV32I-NEXT: sb a0, 12(a2)
-; RV32I-NEXT: sb a3, 0(a2)
-; RV32I-NEXT: sb a6, 4(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli t1, a0, 24
+; RV32I-NEXT: srli t2, a0, 8
+; RV32I-NEXT: sb a0, 12(a2)
+; RV32I-NEXT: sb t2, 13(a2)
; RV32I-NEXT: sb a1, 14(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 15(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 13(a2)
+; RV32I-NEXT: sb t1, 15(a2)
; RV32I-NEXT: srli a0, a7, 16
+; RV32I-NEXT: srli a1, a7, 24
+; RV32I-NEXT: srli a7, a7, 8
+; RV32I-NEXT: sb a5, 8(a2)
+; RV32I-NEXT: sb a7, 9(a2)
; RV32I-NEXT: sb a0, 10(a2)
-; RV32I-NEXT: srli a0, a7, 24
-; RV32I-NEXT: sb a0, 11(a2)
-; RV32I-NEXT: srli a0, a7, 8
-; RV32I-NEXT: sb a0, 9(a2)
+; RV32I-NEXT: sb a1, 11(a2)
; RV32I-NEXT: srli a0, a4, 16
-; RV32I-NEXT: sb a0, 2(a2)
-; RV32I-NEXT: srli a0, a4, 24
-; RV32I-NEXT: sb a0, 3(a2)
+; RV32I-NEXT: srli a1, a4, 24
; RV32I-NEXT: srli a4, a4, 8
+; RV32I-NEXT: sb a3, 0(a2)
; RV32I-NEXT: sb a4, 1(a2)
+; RV32I-NEXT: sb a0, 2(a2)
+; RV32I-NEXT: sb a1, 3(a2)
; RV32I-NEXT: srli a0, t0, 16
+; RV32I-NEXT: srli a1, t0, 24
+; RV32I-NEXT: srli a3, t0, 8
+; RV32I-NEXT: sb a6, 4(a2)
+; RV32I-NEXT: sb a3, 5(a2)
; RV32I-NEXT: sb a0, 6(a2)
-; RV32I-NEXT: srli a0, t0, 24
-; RV32I-NEXT: sb a0, 7(a2)
-; RV32I-NEXT: srli a0, t0, 8
-; RV32I-NEXT: sb a0, 5(a2)
+; RV32I-NEXT: sb a1, 7(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
@@ -928,36 +928,36 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: .LBB7_3:
; RV64I-NEXT: srai a4, a4, 63
; RV64I-NEXT: and a1, a4, a1
-; RV64I-NEXT: sb a1, 8(a2)
; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a6, a1, 32
+; RV64I-NEXT: sb a6, 12(a2)
+; RV64I-NEXT: sb a5, 13(a2)
+; RV64I-NEXT: sb a4, 14(a2)
; RV64I-NEXT: sb a3, 15(a2)
-; RV64I-NEXT: srli a3, a1, 48
-; RV64I-NEXT: sb a3, 14(a2)
-; RV64I-NEXT: srli a3, a1, 40
-; RV64I-NEXT: sb a3, 13(a2)
-; RV64I-NEXT: srli a3, a1, 32
-; RV64I-NEXT: sb a3, 12(a2)
; RV64I-NEXT: srli a3, a1, 24
+; RV64I-NEXT: srli a4, a1, 16
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: sb a1, 8(a2)
+; RV64I-NEXT: sb a5, 9(a2)
+; RV64I-NEXT: sb a4, 10(a2)
; RV64I-NEXT: sb a3, 11(a2)
-; RV64I-NEXT: srli a3, a1, 16
-; RV64I-NEXT: sb a3, 10(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 9(a2)
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 56
+; RV64I-NEXT: srli a3, a0, 48
+; RV64I-NEXT: srli a4, a0, 40
+; RV64I-NEXT: srli a5, a0, 32
+; RV64I-NEXT: sb a5, 4(a2)
+; RV64I-NEXT: sb a4, 5(a2)
+; RV64I-NEXT: sb a3, 6(a2)
; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 5(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 4(a2)
; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
+; RV64I-NEXT: sb a3, 2(a2)
; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: lshr_16bytes_wordOff:
@@ -1004,50 +1004,50 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: or a0, a0, a6
; RV32I-NEXT: or a0, a0, a7
; RV32I-NEXT: lbu a1, 0(a1)
-; RV32I-NEXT: sw zero, 28(sp)
-; RV32I-NEXT: sw zero, 24(sp)
-; RV32I-NEXT: sw zero, 20(sp)
; RV32I-NEXT: sw zero, 16(sp)
-; RV32I-NEXT: sw a0, 12(sp)
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw zero, 20(sp)
+; RV32I-NEXT: sw zero, 24(sp)
+; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a0, 12(sp)
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: andi a1, a1, 12
; RV32I-NEXT: mv a0, sp
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: lw a1, 8(a0)
-; RV32I-NEXT: lw a3, 12(a0)
+; RV32I-NEXT: lw a3, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
-; RV32I-NEXT: lw a0, 4(a0)
-; RV32I-NEXT: sb a1, 8(a2)
-; RV32I-NEXT: sb a3, 12(a2)
-; RV32I-NEXT: sb a4, 0(a2)
-; RV32I-NEXT: sb a0, 4(a2)
+; RV32I-NEXT: lw a0, 12(a0)
; RV32I-NEXT: srli a5, a1, 16
+; RV32I-NEXT: srli a6, a1, 24
+; RV32I-NEXT: srli a7, a1, 8
+; RV32I-NEXT: sb a1, 8(a2)
+; RV32I-NEXT: sb a7, 9(a2)
; RV32I-NEXT: sb a5, 10(a2)
-; RV32I-NEXT: srli a5, a1, 24
-; RV32I-NEXT: sb a5, 11(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 9(a2)
-; RV32I-NEXT: srli a1, a3, 16
+; RV32I-NEXT: sb a6, 11(a2)
+; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a5, a0, 24
+; RV32I-NEXT: srli a6, a0, 8
+; RV32I-NEXT: sb a0, 12(a2)
+; RV32I-NEXT: sb a6, 13(a2)
; RV32I-NEXT: sb a1, 14(a2)
-; RV32I-NEXT: srli a1, a3, 24
-; RV32I-NEXT: sb a1, 15(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 13(a2)
-; RV32I-NEXT: srli a1, a4, 16
-; RV32I-NEXT: sb a1, 2(a2)
+; RV32I-NEXT: sb a5, 15(a2)
+; RV32I-NEXT: srli a0, a4, 16
; RV32I-NEXT: srli a1, a4, 24
+; RV32I-NEXT: srli a5, a4, 8
+; RV32I-NEXT: sb a4, 0(a2)
+; RV32I-NEXT: sb a5, 1(a2)
+; RV32I-NEXT: sb a0, 2(a2)
; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 1(a2)
-; RV32I-NEXT: srli a1, a0, 16
-; RV32I-NEXT: sb a1, 6(a2)
-; RV32I-NEXT: srli a1, a0, 24
+; RV32I-NEXT: srli a0, a3, 16
+; RV32I-NEXT: srli a1, a3, 24
+; RV32I-NEXT: srli a4, a3, 8
+; RV32I-NEXT: sb a3, 4(a2)
+; RV32I-NEXT: sb a4, 5(a2)
+; RV32I-NEXT: sb a0, 6(a2)
; RV32I-NEXT: sb a1, 7(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
@@ -1143,36 +1143,36 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: .LBB8_3:
; RV64I-NEXT: srai a4, a4, 63
; RV64I-NEXT: and a1, a4, a1
-; RV64I-NEXT: sb a1, 0(a2)
-; RV64I-NEXT: sb a0, 8(a2)
; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a6, a1, 32
+; RV64I-NEXT: sb a6, 4(a2)
+; RV64I-NEXT: sb a5, 5(a2)
+; RV64I-NEXT: sb a4, 6(a2)
; RV64I-NEXT: sb a3, 7(a2)
-; RV64I-NEXT: srli a3, a1, 48
-; RV64I-NEXT: sb a3, 6(a2)
-; RV64I-NEXT: srli a3, a1, 40
-; RV64I-NEXT: sb a3, 5(a2)
-; RV64I-NEXT: srli a3, a1, 32
-; RV64I-NEXT: sb a3, 4(a2)
; RV64I-NEXT: srli a3, a1, 24
+; RV64I-NEXT: srli a4, a1, 16
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: sb a1, 0(a2)
+; RV64I-NEXT: sb a5, 1(a2)
+; RV64I-NEXT: sb a4, 2(a2)
; RV64I-NEXT: sb a3, 3(a2)
-; RV64I-NEXT: srli a3, a1, 16
-; RV64I-NEXT: sb a3, 2(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 1(a2)
; RV64I-NEXT: srli a1, a0, 56
+; RV64I-NEXT: srli a3, a0, 48
+; RV64I-NEXT: srli a4, a0, 40
+; RV64I-NEXT: srli a5, a0, 32
+; RV64I-NEXT: sb a5, 12(a2)
+; RV64I-NEXT: sb a4, 13(a2)
+; RV64I-NEXT: sb a3, 14(a2)
; RV64I-NEXT: sb a1, 15(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 14(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 13(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 12(a2)
; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 8(a2)
+; RV64I-NEXT: sb a4, 9(a2)
+; RV64I-NEXT: sb a3, 10(a2)
; RV64I-NEXT: sb a1, 11(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 10(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 9(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: shl_16bytes:
@@ -1228,14 +1228,14 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, a7
; RV32I-NEXT: or a1, a1, a6
-; RV32I-NEXT: sw zero, 12(sp)
-; RV32I-NEXT: sw zero, 8(sp)
-; RV32I-NEXT: sw zero, 4(sp)
; RV32I-NEXT: sw zero, 0(sp)
-; RV32I-NEXT: sw a0, 28(sp)
-; RV32I-NEXT: sw a5, 24(sp)
-; RV32I-NEXT: sw a4, 20(sp)
+; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 8(sp)
+; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a4, 20(sp)
+; RV32I-NEXT: sw a5, 24(sp)
+; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: andi a0, a1, 12
; RV32I-NEXT: addi a3, sp, 16
; RV32I-NEXT: sub a3, a3, a0
@@ -1259,34 +1259,34 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: srl a4, a4, a7
; RV32I-NEXT: or a4, a5, a4
; RV32I-NEXT: sll a0, a0, a1
-; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: srli a5, a5, 24
+; RV32I-NEXT: srli a1, a4, 16
+; RV32I-NEXT: srli a7, a4, 8
+; RV32I-NEXT: sb a4, 8(a2)
+; RV32I-NEXT: sb a7, 9(a2)
+; RV32I-NEXT: sb a1, 10(a2)
; RV32I-NEXT: sb a5, 11(a2)
; RV32I-NEXT: srli a3, a3, 24
+; RV32I-NEXT: srli a1, t1, 16
+; RV32I-NEXT: srli a4, t1, 8
+; RV32I-NEXT: sb t1, 12(a2)
+; RV32I-NEXT: sb a4, 13(a2)
+; RV32I-NEXT: sb a1, 14(a2)
; RV32I-NEXT: sb a3, 15(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb a1, 2(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 1(a2)
+; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: srli a0, a6, 24
-; RV32I-NEXT: sb a0, 7(a2)
-; RV32I-NEXT: sb a4, 8(a2)
-; RV32I-NEXT: sb t1, 12(a2)
+; RV32I-NEXT: srli a1, t0, 16
+; RV32I-NEXT: srli a3, t0, 8
; RV32I-NEXT: sb t0, 4(a2)
-; RV32I-NEXT: srli a0, a4, 16
-; RV32I-NEXT: sb a0, 10(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 9(a2)
-; RV32I-NEXT: srli a0, t1, 16
-; RV32I-NEXT: sb a0, 14(a2)
-; RV32I-NEXT: srli a0, t1, 8
-; RV32I-NEXT: sb a0, 13(a2)
-; RV32I-NEXT: srli a0, t0, 16
-; RV32I-NEXT: sb a0, 6(a2)
-; RV32I-NEXT: srli a0, t0, 8
-; RV32I-NEXT: sb a0, 5(a2)
+; RV32I-NEXT: sb a3, 5(a2)
+; RV32I-NEXT: sb a1, 6(a2)
+; RV32I-NEXT: sb a0, 7(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
@@ -1382,36 +1382,36 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV64I-NEXT: .LBB9_3:
; RV64I-NEXT: srai a4, a4, 63
; RV64I-NEXT: and a1, a4, a1
-; RV64I-NEXT: sb a1, 0(a2)
-; RV64I-NEXT: sb a0, 8(a2)
; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a6, a1, 32
+; RV64I-NEXT: sb a6, 4(a2)
+; RV64I-NEXT: sb a5, 5(a2)
+; RV64I-NEXT: sb a4, 6(a2)
; RV64I-NEXT: sb a3, 7(a2)
-; RV64I-NEXT: srli a3, a1, 48
-; RV64I-NEXT: sb a3, 6(a2)
-; RV64I-NEXT: srli a3, a1, 40
-; RV64I-NEXT: sb a3, 5(a2)
-; RV64I-NEXT: srli a3, a1, 32
-; RV64I-NEXT: sb a3, 4(a2)
; RV64I-NEXT: srli a3, a1, 24
+; RV64I-NEXT: srli a4, a1, 16
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: sb a1, 0(a2)
+; RV64I-NEXT: sb a5, 1(a2)
+; RV64I-NEXT: sb a4, 2(a2)
; RV64I-NEXT: sb a3, 3(a2)
-; RV64I-NEXT: srli a3, a1, 16
-; RV64I-NEXT: sb a3, 2(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 1(a2)
; RV64I-NEXT: srli a1, a0, 56
+; RV64I-NEXT: srli a3, a0, 48
+; RV64I-NEXT: srli a4, a0, 40
+; RV64I-NEXT: srli a5, a0, 32
+; RV64I-NEXT: sb a5, 12(a2)
+; RV64I-NEXT: sb a4, 13(a2)
+; RV64I-NEXT: sb a3, 14(a2)
; RV64I-NEXT: sb a1, 15(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 14(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 13(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 12(a2)
; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 8(a2)
+; RV64I-NEXT: sb a4, 9(a2)
+; RV64I-NEXT: sb a3, 10(a2)
; RV64I-NEXT: sb a1, 11(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 10(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 9(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: shl_16bytes_wordOff:
@@ -1458,50 +1458,50 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV32I-NEXT: or a0, a0, a6
; RV32I-NEXT: or a0, a0, a7
; RV32I-NEXT: lbu a1, 0(a1)
-; RV32I-NEXT: sw zero, 12(sp)
-; RV32I-NEXT: sw zero, 8(sp)
-; RV32I-NEXT: sw zero, 4(sp)
; RV32I-NEXT: sw zero, 0(sp)
-; RV32I-NEXT: sw a0, 28(sp)
-; RV32I-NEXT: sw a5, 24(sp)
-; RV32I-NEXT: sw a4, 20(sp)
+; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 8(sp)
+; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a4, 20(sp)
+; RV32I-NEXT: sw a5, 24(sp)
+; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: andi a1, a1, 12
; RV32I-NEXT: addi a0, sp, 16
; RV32I-NEXT: sub a0, a0, a1
; RV32I-NEXT: lw a1, 8(a0)
-; RV32I-NEXT: lw a3, 12(a0)
+; RV32I-NEXT: lw a3, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
-; RV32I-NEXT: lw a0, 4(a0)
-; RV32I-NEXT: sb a1, 8(a2)
-; RV32I-NEXT: sb a3, 12(a2)
-; RV32I-NEXT: sb a4, 0(a2)
-; RV32I-NEXT: sb a0, 4(a2)
+; RV32I-NEXT: lw a0, 12(a0)
; RV32I-NEXT: srli a5, a1, 16
+; RV32I-NEXT: srli a6, a1, 24
+; RV32I-NEXT: srli a7, a1, 8
+; RV32I-NEXT: sb a1, 8(a2)
+; RV32I-NEXT: sb a7, 9(a2)
; RV32I-NEXT: sb a5, 10(a2)
-; RV32I-NEXT: srli a5, a1, 24
-; RV32I-NEXT: sb a5, 11(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 9(a2)
-; RV32I-NEXT: srli a1, a3, 16
+; RV32I-NEXT: sb a6, 11(a2)
+; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a5, a0, 24
+; RV32I-NEXT: srli a6, a0, 8
+; RV32I-NEXT: sb a0, 12(a2)
+; RV32I-NEXT: sb a6, 13(a2)
; RV32I-NEXT: sb a1, 14(a2)
-; RV32I-NEXT: srli a1, a3, 24
-; RV32I-NEXT: sb a1, 15(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 13(a2)
-; RV32I-NEXT: srli a1, a4, 16
-; RV32I-NEXT: sb a1, 2(a2)
+; RV32I-NEXT: sb a5, 15(a2)
+; RV32I-NEXT: srli a0, a4, 16
; RV32I-NEXT: srli a1, a4, 24
+; RV32I-NEXT: srli a5, a4, 8
+; RV32I-NEXT: sb a4, 0(a2)
+; RV32I-NEXT: sb a5, 1(a2)
+; RV32I-NEXT: sb a0, 2(a2)
; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 1(a2)
-; RV32I-NEXT: srli a1, a0, 16
-; RV32I-NEXT: sb a1, 6(a2)
-; RV32I-NEXT: srli a1, a0, 24
+; RV32I-NEXT: srli a0, a3, 16
+; RV32I-NEXT: srli a1, a3, 24
+; RV32I-NEXT: srli a4, a3, 8
+; RV32I-NEXT: sb a3, 4(a2)
+; RV32I-NEXT: sb a4, 5(a2)
+; RV32I-NEXT: sb a0, 6(a2)
; RV32I-NEXT: sb a1, 7(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
@@ -1598,36 +1598,36 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: sll a3, a3, a4
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: .LBB10_3:
-; RV64I-NEXT: sb a1, 8(a2)
; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a6, a1, 32
+; RV64I-NEXT: sb a6, 12(a2)
+; RV64I-NEXT: sb a5, 13(a2)
+; RV64I-NEXT: sb a4, 14(a2)
; RV64I-NEXT: sb a3, 15(a2)
-; RV64I-NEXT: srli a3, a1, 48
-; RV64I-NEXT: sb a3, 14(a2)
-; RV64I-NEXT: srli a3, a1, 40
-; RV64I-NEXT: sb a3, 13(a2)
-; RV64I-NEXT: srli a3, a1, 32
-; RV64I-NEXT: sb a3, 12(a2)
; RV64I-NEXT: srli a3, a1, 24
+; RV64I-NEXT: srli a4, a1, 16
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: sb a1, 8(a2)
+; RV64I-NEXT: sb a5, 9(a2)
+; RV64I-NEXT: sb a4, 10(a2)
; RV64I-NEXT: sb a3, 11(a2)
-; RV64I-NEXT: srli a3, a1, 16
-; RV64I-NEXT: sb a3, 10(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 9(a2)
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 56
+; RV64I-NEXT: srli a3, a0, 48
+; RV64I-NEXT: srli a4, a0, 40
+; RV64I-NEXT: srli a5, a0, 32
+; RV64I-NEXT: sb a5, 4(a2)
+; RV64I-NEXT: sb a4, 5(a2)
+; RV64I-NEXT: sb a3, 6(a2)
; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 5(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 4(a2)
; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
+; RV64I-NEXT: sb a3, 2(a2)
; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: ashr_16bytes:
@@ -1684,14 +1684,14 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: or a1, a1, a7
; RV32I-NEXT: or a1, a1, t0
; RV32I-NEXT: srai a0, a0, 31
-; RV32I-NEXT: sw a0, 28(sp)
-; RV32I-NEXT: sw a0, 24(sp)
-; RV32I-NEXT: sw a0, 20(sp)
; RV32I-NEXT: sw a0, 16(sp)
-; RV32I-NEXT: sw a6, 12(sp)
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a0, 20(sp)
+; RV32I-NEXT: sw a0, 24(sp)
+; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a6, 12(sp)
; RV32I-NEXT: andi a0, a1, 12
; RV32I-NEXT: mv a3, sp
; RV32I-NEXT: add a0, a3, a0
@@ -1715,34 +1715,34 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sll a7, t1, a7
; RV32I-NEXT: or a7, a5, a7
; RV32I-NEXT: sra a0, a0, a1
-; RV32I-NEXT: sb a5, 8(a2)
-; RV32I-NEXT: sb a0, 12(a2)
-; RV32I-NEXT: sb a3, 0(a2)
-; RV32I-NEXT: sb a6, 4(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli t1, a0, 24
+; RV32I-NEXT: srli t2, a0, 8
+; RV32I-NEXT: sb a0, 12(a2)
+; RV32I-NEXT: sb t2, 13(a2)
; RV32I-NEXT: sb a1, 14(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 15(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 13(a2)
+; RV32I-NEXT: sb t1, 15(a2)
; RV32I-NEXT: srli a0, a7, 16
+; RV32I-NEXT: srli a1, a7, 24
+; RV32I-NEXT: srli a7, a7, 8
+; RV32I-NEXT: sb a5, 8(a2)
+; RV32I-NEXT: sb a7, 9(a2)
; RV32I-NEXT: sb a0, 10(a2)
-; RV32I-NEXT: srli a0, a7, 24
-; RV32I-NEXT: sb a0, 11(a2)
-; RV32I-NEXT: srli a0, a7, 8
-; RV32I-NEXT: sb a0, 9(a2)
+; RV32I-NEXT: sb a1, 11(a2)
; RV32I-NEXT: srli a0, a4, 16
-; RV32I-NEXT: sb a0, 2(a2)
-; RV32I-NEXT: srli a0, a4, 24
-; RV32I-NEXT: sb a0, 3(a2)
+; RV32I-NEXT: srli a1, a4, 24
; RV32I-NEXT: srli a4, a4, 8
+; RV32I-NEXT: sb a3, 0(a2)
; RV32I-NEXT: sb a4, 1(a2)
+; RV32I-NEXT: sb a0, 2(a2)
+; RV32I-NEXT: sb a1, 3(a2)
; RV32I-NEXT: srli a0, t0, 16
+; RV32I-NEXT: srli a1, t0, 24
+; RV32I-NEXT: srli a3, t0, 8
+; RV32I-NEXT: sb a6, 4(a2)
+; RV32I-NEXT: sb a3, 5(a2)
; RV32I-NEXT: sb a0, 6(a2)
-; RV32I-NEXT: srli a0, t0, 24
-; RV32I-NEXT: sb a0, 7(a2)
-; RV32I-NEXT: srli a0, t0, 8
-; RV32I-NEXT: sb a0, 5(a2)
+; RV32I-NEXT: sb a1, 7(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
@@ -1838,36 +1838,36 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: sll a3, a3, a4
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: .LBB11_3:
-; RV64I-NEXT: sb a1, 8(a2)
; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a6, a1, 32
+; RV64I-NEXT: sb a6, 12(a2)
+; RV64I-NEXT: sb a5, 13(a2)
+; RV64I-NEXT: sb a4, 14(a2)
; RV64I-NEXT: sb a3, 15(a2)
-; RV64I-NEXT: srli a3, a1, 48
-; RV64I-NEXT: sb a3, 14(a2)
-; RV64I-NEXT: srli a3, a1, 40
-; RV64I-NEXT: sb a3, 13(a2)
-; RV64I-NEXT: srli a3, a1, 32
-; RV64I-NEXT: sb a3, 12(a2)
; RV64I-NEXT: srli a3, a1, 24
+; RV64I-NEXT: srli a4, a1, 16
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: sb a1, 8(a2)
+; RV64I-NEXT: sb a5, 9(a2)
+; RV64I-NEXT: sb a4, 10(a2)
; RV64I-NEXT: sb a3, 11(a2)
-; RV64I-NEXT: srli a3, a1, 16
-; RV64I-NEXT: sb a3, 10(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 9(a2)
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 56
+; RV64I-NEXT: srli a3, a0, 48
+; RV64I-NEXT: srli a4, a0, 40
+; RV64I-NEXT: srli a5, a0, 32
+; RV64I-NEXT: sb a5, 4(a2)
+; RV64I-NEXT: sb a4, 5(a2)
+; RV64I-NEXT: sb a3, 6(a2)
; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 5(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 4(a2)
; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
+; RV64I-NEXT: sb a3, 2(a2)
; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: ashr_16bytes_wordOff:
@@ -1915,50 +1915,50 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: or a6, a6, a7
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: srai a0, a0, 31
-; RV32I-NEXT: sw a0, 28(sp)
-; RV32I-NEXT: sw a0, 24(sp)
-; RV32I-NEXT: sw a0, 20(sp)
; RV32I-NEXT: sw a0, 16(sp)
-; RV32I-NEXT: sw a6, 12(sp)
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a0, 20(sp)
+; RV32I-NEXT: sw a0, 24(sp)
+; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a6, 12(sp)
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: andi a1, a1, 12
; RV32I-NEXT: mv a0, sp
; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: lw a1, 8(a0)
-; RV32I-NEXT: lw a3, 12(a0)
+; RV32I-NEXT: lw a3, 4(a0)
; RV32I-NEXT: lw a4, 0(a0)
-; RV32I-NEXT: lw a0, 4(a0)
-; RV32I-NEXT: sb a1, 8(a2)
-; RV32I-NEXT: sb a3, 12(a2)
-; RV32I-NEXT: sb a4, 0(a2)
-; RV32I-NEXT: sb a0, 4(a2)
+; RV32I-NEXT: lw a0, 12(a0)
; RV32I-NEXT: srli a5, a1, 16
+; RV32I-NEXT: srli a6, a1, 24
+; RV32I-NEXT: srli a7, a1, 8
+; RV32I-NEXT: sb a1, 8(a2)
+; RV32I-NEXT: sb a7, 9(a2)
; RV32I-NEXT: sb a5, 10(a2)
-; RV32I-NEXT: srli a5, a1, 24
-; RV32I-NEXT: sb a5, 11(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 9(a2)
-; RV32I-NEXT: srli a1, a3, 16
+; RV32I-NEXT: sb a6, 11(a2)
+; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a5, a0, 24
+; RV32I-NEXT: srli a6, a0, 8
+; RV32I-NEXT: sb a0, 12(a2)
+; RV32I-NEXT: sb a6, 13(a2)
; RV32I-NEXT: sb a1, 14(a2)
-; RV32I-NEXT: srli a1, a3, 24
-; RV32I-NEXT: sb a1, 15(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 13(a2)
-; RV32I-NEXT: srli a1, a4, 16
-; RV32I-NEXT: sb a1, 2(a2)
+; RV32I-NEXT: sb a5, 15(a2)
+; RV32I-NEXT: srli a0, a4, 16
; RV32I-NEXT: srli a1, a4, 24
+; RV32I-NEXT: srli a5, a4, 8
+; RV32I-NEXT: sb a4, 0(a2)
+; RV32I-NEXT: sb a5, 1(a2)
+; RV32I-NEXT: sb a0, 2(a2)
; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 1(a2)
-; RV32I-NEXT: srli a1, a0, 16
-; RV32I-NEXT: sb a1, 6(a2)
-; RV32I-NEXT: srli a1, a0, 24
+; RV32I-NEXT: srli a0, a3, 16
+; RV32I-NEXT: srli a1, a3, 24
+; RV32I-NEXT: srli a4, a3, 8
+; RV32I-NEXT: sb a3, 4(a2)
+; RV32I-NEXT: sb a4, 5(a2)
+; RV32I-NEXT: sb a0, 6(a2)
; RV32I-NEXT: sb a1, 7(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
@@ -2083,97 +2083,97 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a6
-; RV64I-NEXT: sd zero, 56(sp)
-; RV64I-NEXT: sd zero, 48(sp)
-; RV64I-NEXT: sd zero, 40(sp)
; RV64I-NEXT: sd zero, 32(sp)
-; RV64I-NEXT: sd a0, 24(sp)
-; RV64I-NEXT: sd a5, 16(sp)
-; RV64I-NEXT: sd a4, 8(sp)
+; RV64I-NEXT: sd zero, 40(sp)
+; RV64I-NEXT: sd zero, 48(sp)
+; RV64I-NEXT: sd zero, 56(sp)
; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: sd a4, 8(sp)
+; RV64I-NEXT: sd a5, 16(sp)
+; RV64I-NEXT: sd a0, 24(sp)
; RV64I-NEXT: andi a0, a1, 24
; RV64I-NEXT: mv a3, sp
; RV64I-NEXT: add a0, a3, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: ld a4, 8(a0)
-; RV64I-NEXT: slli a1, a1, 3
-; RV64I-NEXT: ld a5, 16(a0)
-; RV64I-NEXT: ld a6, 24(a0)
-; RV64I-NEXT: srl a7, a4, a1
-; RV64I-NEXT: andi a0, a1, 56
-; RV64I-NEXT: xori t0, a0, 63
-; RV64I-NEXT: slli a0, a5, 1
-; RV64I-NEXT: sll a0, a0, t0
-; RV64I-NEXT: or a0, a7, a0
-; RV64I-NEXT: srl a3, a3, a1
+; RV64I-NEXT: slli a5, a1, 3
+; RV64I-NEXT: ld a6, 16(a0)
+; RV64I-NEXT: ld a7, 24(a0)
+; RV64I-NEXT: srl a0, a4, a5
+; RV64I-NEXT: andi a1, a5, 56
+; RV64I-NEXT: xori t0, a1, 63
+; RV64I-NEXT: slli a1, a6, 1
+; RV64I-NEXT: sll a1, a1, t0
+; RV64I-NEXT: or a1, a0, a1
+; RV64I-NEXT: srl a3, a3, a5
; RV64I-NEXT: slli a4, a4, 1
; RV64I-NEXT: sll a4, a4, t0
; RV64I-NEXT: or a4, a3, a4
-; RV64I-NEXT: srl a5, a5, a1
-; RV64I-NEXT: slli t1, a6, 1
+; RV64I-NEXT: srl a6, a6, a5
+; RV64I-NEXT: slli t1, a7, 1
; RV64I-NEXT: sll t0, t1, t0
-; RV64I-NEXT: or t0, a5, t0
-; RV64I-NEXT: srl a1, a6, a1
-; RV64I-NEXT: sb a5, 16(a2)
-; RV64I-NEXT: sb a1, 24(a2)
+; RV64I-NEXT: or t0, a6, t0
+; RV64I-NEXT: srl a5, a7, a5
+; RV64I-NEXT: srli a7, a5, 56
+; RV64I-NEXT: srli t1, a5, 48
+; RV64I-NEXT: srli t2, a5, 40
+; RV64I-NEXT: srli t3, a5, 32
+; RV64I-NEXT: sb t3, 28(a2)
+; RV64I-NEXT: sb t2, 29(a2)
+; RV64I-NEXT: sb t1, 30(a2)
+; RV64I-NEXT: sb a7, 31(a2)
+; RV64I-NEXT: srli a7, a5, 24
+; RV64I-NEXT: srli t1, a5, 16
+; RV64I-NEXT: srli t2, a5, 8
+; RV64I-NEXT: sb a5, 24(a2)
+; RV64I-NEXT: sb t2, 25(a2)
+; RV64I-NEXT: sb t1, 26(a2)
+; RV64I-NEXT: sb a7, 27(a2)
+; RV64I-NEXT: srli a5, t0, 56
+; RV64I-NEXT: srli a7, t0, 48
+; RV64I-NEXT: srli t1, t0, 40
+; RV64I-NEXT: srli t2, t0, 32
+; RV64I-NEXT: sb t2, 20(a2)
+; RV64I-NEXT: sb t1, 21(a2)
+; RV64I-NEXT: sb a7, 22(a2)
+; RV64I-NEXT: sb a5, 23(a2)
+; RV64I-NEXT: srli a5, t0, 24
+; RV64I-NEXT: srli a7, t0, 16
+; RV64I-NEXT: srli t0, t0, 8
+; RV64I-NEXT: sb a6, 16(a2)
+; RV64I-NEXT: sb t0, 17(a2)
+; RV64I-NEXT: sb a7, 18(a2)
+; RV64I-NEXT: sb a5, 19(a2)
+; RV64I-NEXT: srli a5, a4, 56
+; RV64I-NEXT: srli a6, a4, 48
+; RV64I-NEXT: srli a7, a4, 40
+; RV64I-NEXT: srli t0, a4, 32
+; RV64I-NEXT: sb t0, 4(a2)
+; RV64I-NEXT: sb a7, 5(a2)
+; RV64I-NEXT: sb a6, 6(a2)
+; RV64I-NEXT: sb a5, 7(a2)
+; RV64I-NEXT: srli a5, a4, 24
+; RV64I-NEXT: srli a6, a4, 16
+; RV64I-NEXT: srli a4, a4, 8
; RV64I-NEXT: sb a3, 0(a2)
-; RV64I-NEXT: sb a7, 8(a2)
+; RV64I-NEXT: sb a4, 1(a2)
+; RV64I-NEXT: sb a6, 2(a2)
+; RV64I-NEXT: sb a5, 3(a2)
; RV64I-NEXT: srli a3, a1, 56
-; RV64I-NEXT: sb a3, 31(a2)
-; RV64I-NEXT: srli a3, a1, 48
-; RV64I-NEXT: sb a3, 30(a2)
-; RV64I-NEXT: srli a3, a1, 40
-; RV64I-NEXT: sb a3, 29(a2)
-; RV64I-NEXT: srli a3, a1, 32
-; RV64I-NEXT: sb a3, 28(a2)
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a6, a1, 32
+; RV64I-NEXT: sb a6, 12(a2)
+; RV64I-NEXT: sb a5, 13(a2)
+; RV64I-NEXT: sb a4, 14(a2)
+; RV64I-NEXT: sb a3, 15(a2)
; RV64I-NEXT: srli a3, a1, 24
-; RV64I-NEXT: sb a3, 27(a2)
-; RV64I-NEXT: srli a3, a1, 16
-; RV64I-NEXT: sb a3, 26(a2)
+; RV64I-NEXT: srli a4, a1, 16
; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 25(a2)
-; RV64I-NEXT: srli a1, t0, 56
-; RV64I-NEXT: sb a1, 23(a2)
-; RV64I-NEXT: srli a1, t0, 48
-; RV64I-NEXT: sb a1, 22(a2)
-; RV64I-NEXT: srli a1, t0, 40
-; RV64I-NEXT: sb a1, 21(a2)
-; RV64I-NEXT: srli a1, t0, 32
-; RV64I-NEXT: sb a1, 20(a2)
-; RV64I-NEXT: srli a1, t0, 24
-; RV64I-NEXT: sb a1, 19(a2)
-; RV64I-NEXT: srli a1, t0, 16
-; RV64I-NEXT: sb a1, 18(a2)
-; RV64I-NEXT: srli a1, t0, 8
-; RV64I-NEXT: sb a1, 17(a2)
-; RV64I-NEXT: srli a1, a4, 56
-; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a4, 48
-; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a4, 40
-; RV64I-NEXT: sb a1, 5(a2)
-; RV64I-NEXT: srli a1, a4, 32
-; RV64I-NEXT: sb a1, 4(a2)
-; RV64I-NEXT: srli a1, a4, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a1, a4, 16
-; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a4, a4, 8
-; RV64I-NEXT: sb a4, 1(a2)
-; RV64I-NEXT: srli a1, a0, 56
-; RV64I-NEXT: sb a1, 15(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 14(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 13(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 12(a2)
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: sb a1, 11(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 10(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 9(a2)
+; RV64I-NEXT: sb a0, 8(a2)
+; RV64I-NEXT: sb a1, 9(a2)
+; RV64I-NEXT: sb a4, 10(a2)
+; RV64I-NEXT: sb a3, 11(a2)
; RV64I-NEXT: addi sp, sp, 64
; RV64I-NEXT: ret
;
@@ -2183,6 +2183,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sw s0, 76(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 72(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 68(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 64(sp) # 4-byte Folded Spill
; RV32I-NEXT: lbu a3, 1(a0)
; RV32I-NEXT: lbu a4, 0(a0)
; RV32I-NEXT: lbu a5, 2(a0)
@@ -2273,124 +2274,125 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, t3
; RV32I-NEXT: or a1, a1, t2
-; RV32I-NEXT: sw zero, 60(sp)
-; RV32I-NEXT: sw zero, 56(sp)
-; RV32I-NEXT: sw zero, 52(sp)
; RV32I-NEXT: sw zero, 48(sp)
-; RV32I-NEXT: sw zero, 44(sp)
-; RV32I-NEXT: sw zero, 40(sp)
-; RV32I-NEXT: sw zero, 36(sp)
+; RV32I-NEXT: sw zero, 52(sp)
+; RV32I-NEXT: sw zero, 56(sp)
+; RV32I-NEXT: sw zero, 60(sp)
; RV32I-NEXT: sw zero, 32(sp)
-; RV32I-NEXT: sw a0, 28(sp)
-; RV32I-NEXT: sw t1, 24(sp)
-; RV32I-NEXT: sw t0, 20(sp)
+; RV32I-NEXT: sw zero, 36(sp)
+; RV32I-NEXT: sw zero, 40(sp)
+; RV32I-NEXT: sw zero, 44(sp)
; RV32I-NEXT: sw a7, 16(sp)
-; RV32I-NEXT: sw a6, 12(sp)
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw t0, 20(sp)
+; RV32I-NEXT: sw t1, 24(sp)
+; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a6, 12(sp)
; RV32I-NEXT: andi a0, a1, 28
; RV32I-NEXT: mv a3, sp
-; RV32I-NEXT: add a3, a3, a0
-; RV32I-NEXT: lw a6, 0(a3)
-; RV32I-NEXT: lw a7, 4(a3)
-; RV32I-NEXT: slli a5, a1, 3
-; RV32I-NEXT: lw t0, 8(a3)
-; RV32I-NEXT: lw t1, 12(a3)
-; RV32I-NEXT: srl a4, a7, a5
-; RV32I-NEXT: andi a0, a5, 24
-; RV32I-NEXT: xori t2, a0, 31
-; RV32I-NEXT: slli a0, t0, 1
-; RV32I-NEXT: sll a0, a0, t2
-; RV32I-NEXT: or a0, a4, a0
-; RV32I-NEXT: srl a6, a6, a5
-; RV32I-NEXT: slli a7, a7, 1
-; RV32I-NEXT: sll a1, a7, t2
-; RV32I-NEXT: or a1, a6, a1
-; RV32I-NEXT: srl a7, t1, a5
-; RV32I-NEXT: lw t3, 16(a3)
-; RV32I-NEXT: lw t4, 20(a3)
-; RV32I-NEXT: lw t5, 24(a3)
-; RV32I-NEXT: lw t6, 28(a3)
-; RV32I-NEXT: slli a3, t3, 1
-; RV32I-NEXT: sll a3, a3, t2
-; RV32I-NEXT: or a3, a7, a3
-; RV32I-NEXT: srl t0, t0, a5
-; RV32I-NEXT: slli t1, t1, 1
-; RV32I-NEXT: sll t1, t1, t2
-; RV32I-NEXT: or t1, t0, t1
-; RV32I-NEXT: srl s0, t4, a5
+; RV32I-NEXT: add a6, a3, a0
+; RV32I-NEXT: lw a3, 0(a6)
+; RV32I-NEXT: lw a4, 4(a6)
+; RV32I-NEXT: slli t1, a1, 3
+; RV32I-NEXT: lw a7, 8(a6)
+; RV32I-NEXT: lw t0, 12(a6)
+; RV32I-NEXT: srl a0, a4, t1
+; RV32I-NEXT: andi a1, t1, 24
+; RV32I-NEXT: xori t2, a1, 31
+; RV32I-NEXT: slli a1, a7, 1
+; RV32I-NEXT: sll a1, a1, t2
+; RV32I-NEXT: or a1, a0, a1
+; RV32I-NEXT: srl a3, a3, t1
+; RV32I-NEXT: slli a4, a4, 1
+; RV32I-NEXT: sll a4, a4, t2
+; RV32I-NEXT: or a4, a3, a4
+; RV32I-NEXT: srl a5, t0, t1
+; RV32I-NEXT: lw t3, 16(a6)
+; RV32I-NEXT: lw t4, 20(a6)
+; RV32I-NEXT: lw t5, 24(a6)
+; RV32I-NEXT: lw t6, 28(a6)
+; RV32I-NEXT: slli a6, t3, 1
+; RV32I-NEXT: sll a6, a6, t2
+; RV32I-NEXT: or a6, a5, a6
+; RV32I-NEXT: srl a7, a7, t1
+; RV32I-NEXT: slli t0, t0, 1
+; RV32I-NEXT: sll t0, t0, t2
+; RV32I-NEXT: or t0, a7, t0
+; RV32I-NEXT: srl s0, t4, t1
; RV32I-NEXT: slli s1, t5, 1
; RV32I-NEXT: sll s1, s1, t2
; RV32I-NEXT: or s1, s0, s1
-; RV32I-NEXT: srl t3, t3, a5
+; RV32I-NEXT: srl t3, t3, t1
; RV32I-NEXT: slli t4, t4, 1
; RV32I-NEXT: sll t4, t4, t2
; RV32I-NEXT: or t4, t3, t4
-; RV32I-NEXT: srl t5, t5, a5
+; RV32I-NEXT: srl t5, t5, t1
; RV32I-NEXT: slli s2, t6, 1
; RV32I-NEXT: sll t2, s2, t2
; RV32I-NEXT: or t2, t5, t2
-; RV32I-NEXT: srl a5, t6, a5
+; RV32I-NEXT: srl t1, t6, t1
+; RV32I-NEXT: srli t6, t1, 24
+; RV32I-NEXT: srli s2, t1, 16
+; RV32I-NEXT: srli s3, t1, 8
+; RV32I-NEXT: sb t1, 28(a2)
+; RV32I-NEXT: sb s3, 29(a2)
+; RV32I-NEXT: sb s2, 30(a2)
+; RV32I-NEXT: sb t6, 31(a2)
+; RV32I-NEXT: srli t1, t2, 24
+; RV32I-NEXT: srli t6, t2, 16
+; RV32I-NEXT: srli t2, t2, 8
; RV32I-NEXT: sb t5, 24(a2)
-; RV32I-NEXT: sb a5, 28(a2)
+; RV32I-NEXT: sb t2, 25(a2)
+; RV32I-NEXT: sb t6, 26(a2)
+; RV32I-NEXT: sb t1, 27(a2)
+; RV32I-NEXT: srli t1, t4, 24
+; RV32I-NEXT: srli t2, t4, 16
+; RV32I-NEXT: srli t4, t4, 8
; RV32I-NEXT: sb t3, 16(a2)
-; RV32I-NEXT: sb s0, 20(a2)
-; RV32I-NEXT: sb t0, 8(a2)
-; RV32I-NEXT: sb a7, 12(a2)
-; RV32I-NEXT: sb a6, 0(a2)
-; RV32I-NEXT: sb a4, 4(a2)
-; RV32I-NEXT: srli a4, a5, 24
-; RV32I-NEXT: sb a4, 31(a2)
-; RV32I-NEXT: srli a4, a5, 16
-; RV32I-NEXT: sb a4, 30(a2)
-; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 29(a2)
-; RV32I-NEXT: srli a4, t2, 24
-; RV32I-NEXT: sb a4, 27(a2)
-; RV32I-NEXT: srli a4, t2, 16
-; RV32I-NEXT: sb a4, 26(a2)
-; RV32I-NEXT: srli a4, t2, 8
-; RV32I-NEXT: sb a4, 25(a2)
-; RV32I-NEXT: srli a4, t4, 24
-; RV32I-NEXT: sb a4, 19(a2)
-; RV32I-NEXT: srli a4, t4, 16
-; RV32I-NEXT: sb a4, 18(a2)
-; RV32I-NEXT: srli a4, t4, 8
-; RV32I-NEXT: sb a4, 17(a2)
-; RV32I-NEXT: srli a4, s1, 24
-; RV32I-NEXT: sb a4, 23(a2)
-; RV32I-NEXT: srli a4, s1, 16
-; RV32I-NEXT: sb a4, 22(a2)
+; RV32I-NEXT: sb t4, 17(a2)
+; RV32I-NEXT: sb t2, 18(a2)
+; RV32I-NEXT: sb t1, 19(a2)
+; RV32I-NEXT: srli t1, s1, 24
+; RV32I-NEXT: srli t2, s1, 16
; RV32I-NEXT: srli s1, s1, 8
+; RV32I-NEXT: sb s0, 20(a2)
; RV32I-NEXT: sb s1, 21(a2)
-; RV32I-NEXT: srli a4, t1, 24
-; RV32I-NEXT: sb a4, 11(a2)
-; RV32I-NEXT: srli a4, t1, 16
-; RV32I-NEXT: sb a4, 10(a2)
-; RV32I-NEXT: srli a4, t1, 8
-; RV32I-NEXT: sb a4, 9(a2)
-; RV32I-NEXT: srli a4, a3, 24
-; RV32I-NEXT: sb a4, 15(a2)
-; RV32I-NEXT: srli a4, a3, 16
-; RV32I-NEXT: sb a4, 14(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 13(a2)
+; RV32I-NEXT: sb t2, 22(a2)
+; RV32I-NEXT: sb t1, 23(a2)
+; RV32I-NEXT: srli t1, t0, 24
+; RV32I-NEXT: srli t2, t0, 16
+; RV32I-NEXT: srli t0, t0, 8
+; RV32I-NEXT: sb a7, 8(a2)
+; RV32I-NEXT: sb t0, 9(a2)
+; RV32I-NEXT: sb t2, 10(a2)
+; RV32I-NEXT: sb t1, 11(a2)
+; RV32I-NEXT: srli a7, a6, 24
+; RV32I-NEXT: srli t0, a6, 16
+; RV32I-NEXT: srli a6, a6, 8
+; RV32I-NEXT: sb a5, 12(a2)
+; RV32I-NEXT: sb a6, 13(a2)
+; RV32I-NEXT: sb t0, 14(a2)
+; RV32I-NEXT: sb a7, 15(a2)
+; RV32I-NEXT: srli a5, a4, 24
+; RV32I-NEXT: srli a6, a4, 16
+; RV32I-NEXT: srli a4, a4, 8
+; RV32I-NEXT: sb a3, 0(a2)
+; RV32I-NEXT: sb a4, 1(a2)
+; RV32I-NEXT: sb a6, 2(a2)
+; RV32I-NEXT: sb a5, 3(a2)
; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 3(a2)
-; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 2(a2)
+; RV32I-NEXT: srli a4, a1, 16
; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 1(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 7(a2)
-; RV32I-NEXT: srli a1, a0, 16
-; RV32I-NEXT: sb a1, 6(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 5(a2)
+; RV32I-NEXT: sb a0, 4(a2)
+; RV32I-NEXT: sb a1, 5(a2)
+; RV32I-NEXT: sb a4, 6(a2)
+; RV32I-NEXT: sb a3, 7(a2)
; RV32I-NEXT: lw s0, 76(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 72(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 68(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 64(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 80
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
@@ -2515,98 +2517,98 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a6
-; RV64I-NEXT: sd zero, 56(sp)
-; RV64I-NEXT: sd zero, 48(sp)
-; RV64I-NEXT: sd zero, 40(sp)
; RV64I-NEXT: sd zero, 32(sp)
-; RV64I-NEXT: sd a0, 24(sp)
-; RV64I-NEXT: sd a5, 16(sp)
-; RV64I-NEXT: sd a4, 8(sp)
+; RV64I-NEXT: sd zero, 40(sp)
+; RV64I-NEXT: sd zero, 48(sp)
+; RV64I-NEXT: sd zero, 56(sp)
; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: sd a4, 8(sp)
+; RV64I-NEXT: sd a5, 16(sp)
+; RV64I-NEXT: sd a0, 24(sp)
; RV64I-NEXT: slli a0, a1, 2
; RV64I-NEXT: andi a0, a0, 24
; RV64I-NEXT: mv a3, sp
; RV64I-NEXT: add a0, a3, a0
-; RV64I-NEXT: ld a3, 0(a0)
-; RV64I-NEXT: ld a4, 8(a0)
-; RV64I-NEXT: slli a1, a1, 5
-; RV64I-NEXT: ld a5, 16(a0)
-; RV64I-NEXT: ld a6, 24(a0)
-; RV64I-NEXT: srl a7, a4, a1
-; RV64I-NEXT: andi a0, a1, 32
-; RV64I-NEXT: xori t0, a0, 63
-; RV64I-NEXT: slli a0, a5, 1
-; RV64I-NEXT: sll a0, a0, t0
-; RV64I-NEXT: or a0, a7, a0
-; RV64I-NEXT: srl a3, a3, a1
-; RV64I-NEXT: slli a4, a4, 1
-; RV64I-NEXT: sll a4, a4, t0
-; RV64I-NEXT: or a4, a3, a4
-; RV64I-NEXT: srl a5, a5, a1
-; RV64I-NEXT: slli t1, a6, 1
-; RV64I-NEXT: sll t0, t1, t0
-; RV64I-NEXT: or t0, a5, t0
-; RV64I-NEXT: srl a1, a6, a1
+; RV64I-NEXT: ld a4, 0(a0)
+; RV64I-NEXT: ld a5, 8(a0)
+; RV64I-NEXT: slli a6, a1, 5
+; RV64I-NEXT: ld a7, 16(a0)
+; RV64I-NEXT: ld t0, 24(a0)
+; RV64I-NEXT: srl a3, a5, a6
+; RV64I-NEXT: andi a0, a6, 32
+; RV64I-NEXT: xori t1, a0, 63
+; RV64I-NEXT: slli a0, a7, 1
+; RV64I-NEXT: sll a0, a0, t1
+; RV64I-NEXT: or a0, a3, a0
+; RV64I-NEXT: srl t2, a4, a6
+; RV64I-NEXT: slli a5, a5, 1
+; RV64I-NEXT: sll a1, a5, t1
+; RV64I-NEXT: or a1, t2, a1
+; RV64I-NEXT: srl a5, a7, a6
+; RV64I-NEXT: slli a4, t0, 1
+; RV64I-NEXT: sll a4, a4, t1
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: srl a6, t0, a6
+; RV64I-NEXT: srli a7, a5, 24
+; RV64I-NEXT: srli t0, a5, 16
+; RV64I-NEXT: srli t1, a5, 8
; RV64I-NEXT: sb a5, 16(a2)
-; RV64I-NEXT: sb a1, 24(a2)
-; RV64I-NEXT: sb a3, 0(a2)
-; RV64I-NEXT: sb a7, 8(a2)
-; RV64I-NEXT: srli a6, a5, 24
-; RV64I-NEXT: sb a6, 19(a2)
-; RV64I-NEXT: srli a6, a5, 16
-; RV64I-NEXT: sb a6, 18(a2)
-; RV64I-NEXT: srli a5, a5, 8
-; RV64I-NEXT: sb a5, 17(a2)
-; RV64I-NEXT: srli a5, a1, 56
+; RV64I-NEXT: sb t1, 17(a2)
+; RV64I-NEXT: sb t0, 18(a2)
+; RV64I-NEXT: sb a7, 19(a2)
+; RV64I-NEXT: srli a5, a6, 56
+; RV64I-NEXT: srli a7, a6, 48
+; RV64I-NEXT: srli t0, a6, 40
+; RV64I-NEXT: srli t1, a6, 32
+; RV64I-NEXT: sb t1, 28(a2)
+; RV64I-NEXT: sb t0, 29(a2)
+; RV64I-NEXT: sb a7, 30(a2)
; RV64I-NEXT: sb a5, 31(a2)
-; RV64I-NEXT: srli a5, a1, 48
-; RV64I-NEXT: sb a5, 30(a2)
-; RV64I-NEXT: srli a5, a1, 40
-; RV64I-NEXT: sb a5, 29(a2)
-; RV64I-NEXT: srli a5, a1, 32
-; RV64I-NEXT: sb a5, 28(a2)
-; RV64I-NEXT: srli a5, a1, 24
+; RV64I-NEXT: srli a5, a6, 24
+; RV64I-NEXT: srli a7, a6, 16
+; RV64I-NEXT: srli t0, a6, 8
+; RV64I-NEXT: sb a6, 24(a2)
+; RV64I-NEXT: sb t0, 25(a2)
+; RV64I-NEXT: sb a7, 26(a2)
; RV64I-NEXT: sb a5, 27(a2)
-; RV64I-NEXT: srli a5, a1, 16
-; RV64I-NEXT: sb a5, 26(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 25(a2)
-; RV64I-NEXT: srli a1, a3, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a1, a3, 16
-; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a3, a3, 8
-; RV64I-NEXT: sb a3, 1(a2)
-; RV64I-NEXT: srli a1, a7, 24
-; RV64I-NEXT: sb a1, 11(a2)
-; RV64I-NEXT: srli a1, a7, 16
-; RV64I-NEXT: sb a1, 10(a2)
-; RV64I-NEXT: srli a1, a7, 8
-; RV64I-NEXT: sb a1, 9(a2)
-; RV64I-NEXT: srli a1, t0, 56
-; RV64I-NEXT: sb a1, 23(a2)
-; RV64I-NEXT: srli a1, t0, 48
-; RV64I-NEXT: sb a1, 22(a2)
-; RV64I-NEXT: srli a1, t0, 40
-; RV64I-NEXT: sb a1, 21(a2)
-; RV64I-NEXT: srli a1, t0, 32
-; RV64I-NEXT: sb a1, 20(a2)
-; RV64I-NEXT: srli a1, a4, 56
-; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a4, 48
-; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a4, 40
-; RV64I-NEXT: sb a1, 5(a2)
+; RV64I-NEXT: srli a5, t2, 24
+; RV64I-NEXT: srli a6, t2, 16
+; RV64I-NEXT: srli a7, t2, 8
+; RV64I-NEXT: sb t2, 0(a2)
+; RV64I-NEXT: sb a7, 1(a2)
+; RV64I-NEXT: sb a6, 2(a2)
+; RV64I-NEXT: sb a5, 3(a2)
+; RV64I-NEXT: srli a5, a3, 24
+; RV64I-NEXT: srli a6, a3, 16
+; RV64I-NEXT: srli a7, a3, 8
+; RV64I-NEXT: sb a3, 8(a2)
+; RV64I-NEXT: sb a7, 9(a2)
+; RV64I-NEXT: sb a6, 10(a2)
+; RV64I-NEXT: sb a5, 11(a2)
+; RV64I-NEXT: srli a3, a4, 56
+; RV64I-NEXT: srli a5, a4, 48
+; RV64I-NEXT: srli a6, a4, 40
; RV64I-NEXT: srli a4, a4, 32
-; RV64I-NEXT: sb a4, 4(a2)
+; RV64I-NEXT: sb a4, 20(a2)
+; RV64I-NEXT: sb a6, 21(a2)
+; RV64I-NEXT: sb a5, 22(a2)
+; RV64I-NEXT: sb a3, 23(a2)
+; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a1, a1, 32
+; RV64I-NEXT: sb a1, 4(a2)
+; RV64I-NEXT: sb a5, 5(a2)
+; RV64I-NEXT: sb a4, 6(a2)
+; RV64I-NEXT: sb a3, 7(a2)
; RV64I-NEXT: srli a1, a0, 56
-; RV64I-NEXT: sb a1, 15(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 14(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 13(a2)
+; RV64I-NEXT: srli a3, a0, 48
+; RV64I-NEXT: srli a4, a0, 40
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: sb a0, 12(a2)
+; RV64I-NEXT: sb a4, 13(a2)
+; RV64I-NEXT: sb a3, 14(a2)
+; RV64I-NEXT: sb a1, 15(a2)
; RV64I-NEXT: addi sp, sp, 64
; RV64I-NEXT: ret
;
@@ -2694,90 +2696,90 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: or a0, a0, t2
; RV32I-NEXT: or a0, a0, t3
; RV32I-NEXT: lbu a1, 0(a1)
-; RV32I-NEXT: sw zero, 60(sp)
-; RV32I-NEXT: sw zero, 56(sp)
-; RV32I-NEXT: sw zero, 52(sp)
; RV32I-NEXT: sw zero, 48(sp)
-; RV32I-NEXT: sw zero, 44(sp)
-; RV32I-NEXT: sw zero, 40(sp)
-; RV32I-NEXT: sw zero, 36(sp)
+; RV32I-NEXT: sw zero, 52(sp)
+; RV32I-NEXT: sw zero, 56(sp)
+; RV32I-NEXT: sw zero, 60(sp)
; RV32I-NEXT: sw zero, 32(sp)
-; RV32I-NEXT: sw a0, 28(sp)
-; RV32I-NEXT: sw t1, 24(sp)
-; RV32I-NEXT: sw t0, 20(sp)
+; RV32I-NEXT: sw zero, 36(sp)
+; RV32I-NEXT: sw zero, 40(sp)
+; RV32I-NEXT: sw zero, 44(sp)
; RV32I-NEXT: sw a7, 16(sp)
-; RV32I-NEXT: sw a6, 12(sp)
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw t0, 20(sp)
+; RV32I-NEXT: sw t1, 24(sp)
+; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a6, 12(sp)
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: andi a1, a1, 28
; RV32I-NEXT: mv a0, sp
-; RV32I-NEXT: add a1, a0, a1
-; RV32I-NEXT: lw a3, 16(a1)
-; RV32I-NEXT: lw a4, 20(a1)
-; RV32I-NEXT: lw a5, 24(a1)
-; RV32I-NEXT: lw a6, 28(a1)
-; RV32I-NEXT: lw a7, 0(a1)
-; RV32I-NEXT: lw a0, 4(a1)
-; RV32I-NEXT: lw t0, 8(a1)
-; RV32I-NEXT: lw a1, 12(a1)
-; RV32I-NEXT: sb a5, 24(a2)
-; RV32I-NEXT: sb a6, 28(a2)
-; RV32I-NEXT: sb a3, 16(a2)
-; RV32I-NEXT: sb a4, 20(a2)
-; RV32I-NEXT: sb t0, 8(a2)
-; RV32I-NEXT: sb a1, 12(a2)
-; RV32I-NEXT: sb a7, 0(a2)
-; RV32I-NEXT: sb a0, 4(a2)
-; RV32I-NEXT: srli t1, a5, 24
+; RV32I-NEXT: add a4, a0, a1
+; RV32I-NEXT: lw a5, 16(a4)
+; RV32I-NEXT: lw a6, 20(a4)
+; RV32I-NEXT: lw a7, 24(a4)
+; RV32I-NEXT: lw a1, 0(a4)
+; RV32I-NEXT: lw a0, 4(a4)
+; RV32I-NEXT: lw t0, 8(a4)
+; RV32I-NEXT: lw a3, 12(a4)
+; RV32I-NEXT: lw a4, 28(a4)
+; RV32I-NEXT: srli t1, a7, 24
+; RV32I-NEXT: srli t2, a7, 16
+; RV32I-NEXT: srli t3, a7, 8
+; RV32I-NEXT: sb a7, 24(a2)
+; RV32I-NEXT: sb t3, 25(a2)
+; RV32I-NEXT: sb t2, 26(a2)
; RV32I-NEXT: sb t1, 27(a2)
-; RV32I-NEXT: srli t1, a5, 16
-; RV32I-NEXT: sb t1, 26(a2)
-; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 25(a2)
-; RV32I-NEXT: srli a5, a6, 24
-; RV32I-NEXT: sb a5, 31(a2)
+; RV32I-NEXT: srli a7, a4, 24
+; RV32I-NEXT: srli t1, a4, 16
+; RV32I-NEXT: srli t2, a4, 8
+; RV32I-NEXT: sb a4, 28(a2)
+; RV32I-NEXT: sb t2, 29(a2)
+; RV32I-NEXT: sb t1, 30(a2)
+; RV32I-NEXT: sb a7, 31(a2)
+; RV32I-NEXT: srli a4, a5, 24
+; RV32I-NEXT: srli a7, a5, 16
+; RV32I-NEXT: srli t1, a5, 8
+; RV32I-NEXT: sb a5, 16(a2)
+; RV32I-NEXT: sb t1, 17(a2)
+; RV32I-NEXT: sb a7, 18(a2)
+; RV32I-NEXT: sb a4, 19(a2)
+; RV32I-NEXT: srli a4, a6, 24
; RV32I-NEXT: srli a5, a6, 16
-; RV32I-NEXT: sb a5, 30(a2)
-; RV32I-NEXT: srli a5, a6, 8
-; RV32I-NEXT: sb a5, 29(a2)
-; RV32I-NEXT: srli a5, a3, 24
-; RV32I-NEXT: sb a5, 19(a2)
+; RV32I-NEXT: srli a7, a6, 8
+; RV32I-NEXT: sb a6, 20(a2)
+; RV32I-NEXT: sb a7, 21(a2)
+; RV32I-NEXT: sb a5, 22(a2)
+; RV32I-NEXT: sb a4, 23(a2)
+; RV32I-NEXT: srli a4, t0, 24
+; RV32I-NEXT: srli a5, t0, 16
+; RV32I-NEXT: srli a6, t0, 8
+; RV32I-NEXT: sb t0, 8(a2)
+; RV32I-NEXT: sb a6, 9(a2)
+; RV32I-NEXT: sb a5, 10(a2)
+; RV32I-NEXT: sb a4, 11(a2)
+; RV32I-NEXT: srli a4, a3, 24
; RV32I-NEXT: srli a5, a3, 16
-; RV32I-NEXT: sb a5, 18(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 17(a2)
-; RV32I-NEXT: srli a3, a4, 24
-; RV32I-NEXT: sb a3, 23(a2)
-; RV32I-NEXT: srli a3, a4, 16
-; RV32I-NEXT: sb a3, 22(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 21(a2)
-; RV32I-NEXT: srli a3, t0, 24
-; RV32I-NEXT: sb a3, 11(a2)
-; RV32I-NEXT: srli a3, t0, 16
-; RV32I-NEXT: sb a3, 10(a2)
-; RV32I-NEXT: srli a3, t0, 8
-; RV32I-NEXT: sb a3, 9(a2)
+; RV32I-NEXT: srli a6, a3, 8
+; RV32I-NEXT: sb a3, 12(a2)
+; RV32I-NEXT: sb a6, 13(a2)
+; RV32I-NEXT: sb a5, 14(a2)
+; RV32I-NEXT: sb a4, 15(a2)
; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 15(a2)
-; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 14(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 13(a2)
-; RV32I-NEXT: srli a1, a7, 24
-; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a1, a7, 16
-; RV32I-NEXT: sb a1, 2(a2)
-; RV32I-NEXT: srli a1, a7, 8
-; RV32I-NEXT: sb a1, 1(a2)
+; RV32I-NEXT: srli a4, a1, 16
+; RV32I-NEXT: srli a5, a1, 8
+; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: sb a5, 1(a2)
+; RV32I-NEXT: sb a4, 2(a2)
+; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: srli a1, a0, 24
+; RV32I-NEXT: srli a3, a0, 16
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 4(a2)
+; RV32I-NEXT: sb a4, 5(a2)
+; RV32I-NEXT: sb a3, 6(a2)
; RV32I-NEXT: sb a1, 7(a2)
-; RV32I-NEXT: srli a1, a0, 16
-; RV32I-NEXT: sb a1, 6(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: addi sp, sp, 64
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
@@ -2881,82 +2883,82 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: lbu a1, 0(a1)
-; RV64I-NEXT: sd zero, 56(sp)
-; RV64I-NEXT: sd zero, 48(sp)
-; RV64I-NEXT: sd zero, 40(sp)
; RV64I-NEXT: sd zero, 32(sp)
-; RV64I-NEXT: sd a0, 24(sp)
-; RV64I-NEXT: sd a5, 16(sp)
-; RV64I-NEXT: sd a4, 8(sp)
+; RV64I-NEXT: sd zero, 40(sp)
+; RV64I-NEXT: sd zero, 48(sp)
+; RV64I-NEXT: sd zero, 56(sp)
; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: sd a4, 8(sp)
+; RV64I-NEXT: sd a5, 16(sp)
+; RV64I-NEXT: sd a0, 24(sp)
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: andi a1, a1, 24
; RV64I-NEXT: mv a0, sp
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: ld a1, 16(a0)
-; RV64I-NEXT: ld a3, 24(a0)
-; RV64I-NEXT: ld a4, 0(a0)
-; RV64I-NEXT: ld a0, 8(a0)
-; RV64I-NEXT: sb a1, 16(a2)
-; RV64I-NEXT: sb a3, 24(a2)
-; RV64I-NEXT: sb a4, 0(a2)
-; RV64I-NEXT: sb a0, 8(a2)
-; RV64I-NEXT: srli a5, a1, 56
+; RV64I-NEXT: add a3, a0, a1
+; RV64I-NEXT: ld a4, 16(a3)
+; RV64I-NEXT: ld a0, 8(a3)
+; RV64I-NEXT: ld a1, 0(a3)
+; RV64I-NEXT: ld a3, 24(a3)
+; RV64I-NEXT: srli a5, a4, 56
+; RV64I-NEXT: srli a6, a4, 48
+; RV64I-NEXT: srli a7, a4, 40
+; RV64I-NEXT: srli t0, a4, 32
+; RV64I-NEXT: sb t0, 20(a2)
+; RV64I-NEXT: sb a7, 21(a2)
+; RV64I-NEXT: sb a6, 22(a2)
; RV64I-NEXT: sb a5, 23(a2)
-; RV64I-NEXT: srli a5, a1, 48
-; RV64I-NEXT: sb a5, 22(a2)
-; RV64I-NEXT: srli a5, a1, 40
-; RV64I-NEXT: sb a5, 21(a2)
-; RV64I-NEXT: srli a5, a1, 32
-; RV64I-NEXT: sb a5, 20(a2)
-; RV64I-NEXT: srli a5, a1, 24
+; RV64I-NEXT: srli a5, a4, 24
+; RV64I-NEXT: srli a6, a4, 16
+; RV64I-NEXT: srli a7, a4, 8
+; RV64I-NEXT: sb a4, 16(a2)
+; RV64I-NEXT: sb a7, 17(a2)
+; RV64I-NEXT: sb a6, 18(a2)
; RV64I-NEXT: sb a5, 19(a2)
-; RV64I-NEXT: srli a5, a1, 16
-; RV64I-NEXT: sb a5, 18(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 17(a2)
-; RV64I-NEXT: srli a1, a3, 56
-; RV64I-NEXT: sb a1, 31(a2)
-; RV64I-NEXT: srli a1, a3, 48
-; RV64I-NEXT: sb a1, 30(a2)
-; RV64I-NEXT: srli a1, a3, 40
-; RV64I-NEXT: sb a1, 29(a2)
-; RV64I-NEXT: srli a1, a3, 32
-; RV64I-NEXT: sb a1, 28(a2)
-; RV64I-NEXT: srli a1, a3, 24
-; RV64I-NEXT: sb a1, 27(a2)
-; RV64I-NEXT: srli a1, a3, 16
-; RV64I-NEXT: sb a1, 26(a2)
-; RV64I-NEXT: srli a3, a3, 8
-; RV64I-NEXT: sb a3, 25(a2)
-; RV64I-NEXT: srli a1, a4, 56
-; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a4, 48
-; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a4, 40
-; RV64I-NEXT: sb a1, 5(a2)
-; RV64I-NEXT: srli a1, a4, 32
-; RV64I-NEXT: sb a1, 4(a2)
-; RV64I-NEXT: srli a1, a4, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a1, a4, 16
-; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a4, a4, 8
-; RV64I-NEXT: sb a4, 1(a2)
+; RV64I-NEXT: srli a4, a3, 56
+; RV64I-NEXT: srli a5, a3, 48
+; RV64I-NEXT: srli a6, a3, 40
+; RV64I-NEXT: srli a7, a3, 32
+; RV64I-NEXT: sb a7, 28(a2)
+; RV64I-NEXT: sb a6, 29(a2)
+; RV64I-NEXT: sb a5, 30(a2)
+; RV64I-NEXT: sb a4, 31(a2)
+; RV64I-NEXT: srli a4, a3, 24
+; RV64I-NEXT: srli a5, a3, 16
+; RV64I-NEXT: srli a6, a3, 8
+; RV64I-NEXT: sb a3, 24(a2)
+; RV64I-NEXT: sb a6, 25(a2)
+; RV64I-NEXT: sb a5, 26(a2)
+; RV64I-NEXT: sb a4, 27(a2)
+; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a6, a1, 32
+; RV64I-NEXT: sb a6, 4(a2)
+; RV64I-NEXT: sb a5, 5(a2)
+; RV64I-NEXT: sb a4, 6(a2)
+; RV64I-NEXT: sb a3, 7(a2)
+; RV64I-NEXT: srli a3, a1, 24
+; RV64I-NEXT: srli a4, a1, 16
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: sb a1, 0(a2)
+; RV64I-NEXT: sb a5, 1(a2)
+; RV64I-NEXT: sb a4, 2(a2)
+; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: srli a1, a0, 56
+; RV64I-NEXT: srli a3, a0, 48
+; RV64I-NEXT: srli a4, a0, 40
+; RV64I-NEXT: srli a5, a0, 32
+; RV64I-NEXT: sb a5, 12(a2)
+; RV64I-NEXT: sb a4, 13(a2)
+; RV64I-NEXT: sb a3, 14(a2)
; RV64I-NEXT: sb a1, 15(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 14(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 13(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 12(a2)
; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 8(a2)
+; RV64I-NEXT: sb a4, 9(a2)
+; RV64I-NEXT: sb a3, 10(a2)
; RV64I-NEXT: sb a1, 11(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 10(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 9(a2)
; RV64I-NEXT: addi sp, sp, 64
; RV64I-NEXT: ret
;
@@ -3044,90 +3046,90 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: or a0, a0, t2
; RV32I-NEXT: or a0, a0, t3
; RV32I-NEXT: lbu a1, 0(a1)
-; RV32I-NEXT: sw zero, 60(sp)
-; RV32I-NEXT: sw zero, 56(sp)
-; RV32I-NEXT: sw zero, 52(sp)
; RV32I-NEXT: sw zero, 48(sp)
-; RV32I-NEXT: sw zero, 44(sp)
-; RV32I-NEXT: sw zero, 40(sp)
-; RV32I-NEXT: sw zero, 36(sp)
+; RV32I-NEXT: sw zero, 52(sp)
+; RV32I-NEXT: sw zero, 56(sp)
+; RV32I-NEXT: sw zero, 60(sp)
; RV32I-NEXT: sw zero, 32(sp)
-; RV32I-NEXT: sw a0, 28(sp)
-; RV32I-NEXT: sw t1, 24(sp)
-; RV32I-NEXT: sw t0, 20(sp)
+; RV32I-NEXT: sw zero, 36(sp)
+; RV32I-NEXT: sw zero, 40(sp)
+; RV32I-NEXT: sw zero, 44(sp)
; RV32I-NEXT: sw a7, 16(sp)
-; RV32I-NEXT: sw a6, 12(sp)
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw t0, 20(sp)
+; RV32I-NEXT: sw t1, 24(sp)
+; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a6, 12(sp)
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: andi a1, a1, 24
; RV32I-NEXT: mv a0, sp
-; RV32I-NEXT: add a1, a0, a1
-; RV32I-NEXT: lw a3, 16(a1)
-; RV32I-NEXT: lw a4, 20(a1)
-; RV32I-NEXT: lw a5, 24(a1)
-; RV32I-NEXT: lw a6, 28(a1)
-; RV32I-NEXT: lw a7, 0(a1)
-; RV32I-NEXT: lw a0, 4(a1)
-; RV32I-NEXT: lw t0, 8(a1)
-; RV32I-NEXT: lw a1, 12(a1)
-; RV32I-NEXT: sb a5, 24(a2)
-; RV32I-NEXT: sb a6, 28(a2)
-; RV32I-NEXT: sb a3, 16(a2)
-; RV32I-NEXT: sb a4, 20(a2)
-; RV32I-NEXT: sb t0, 8(a2)
-; RV32I-NEXT: sb a1, 12(a2)
-; RV32I-NEXT: sb a7, 0(a2)
-; RV32I-NEXT: sb a0, 4(a2)
-; RV32I-NEXT: srli t1, a5, 24
+; RV32I-NEXT: add a4, a0, a1
+; RV32I-NEXT: lw a5, 16(a4)
+; RV32I-NEXT: lw a6, 20(a4)
+; RV32I-NEXT: lw a7, 24(a4)
+; RV32I-NEXT: lw a1, 0(a4)
+; RV32I-NEXT: lw a0, 4(a4)
+; RV32I-NEXT: lw t0, 8(a4)
+; RV32I-NEXT: lw a3, 12(a4)
+; RV32I-NEXT: lw a4, 28(a4)
+; RV32I-NEXT: srli t1, a7, 24
+; RV32I-NEXT: srli t2, a7, 16
+; RV32I-NEXT: srli t3, a7, 8
+; RV32I-NEXT: sb a7, 24(a2)
+; RV32I-NEXT: sb t3, 25(a2)
+; RV32I-NEXT: sb t2, 26(a2)
; RV32I-NEXT: sb t1, 27(a2)
-; RV32I-NEXT: srli t1, a5, 16
-; RV32I-NEXT: sb t1, 26(a2)
-; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 25(a2)
-; RV32I-NEXT: srli a5, a6, 24
-; RV32I-NEXT: sb a5, 31(a2)
+; RV32I-NEXT: srli a7, a4, 24
+; RV32I-NEXT: srli t1, a4, 16
+; RV32I-NEXT: srli t2, a4, 8
+; RV32I-NEXT: sb a4, 28(a2)
+; RV32I-NEXT: sb t2, 29(a2)
+; RV32I-NEXT: sb t1, 30(a2)
+; RV32I-NEXT: sb a7, 31(a2)
+; RV32I-NEXT: srli a4, a5, 24
+; RV32I-NEXT: srli a7, a5, 16
+; RV32I-NEXT: srli t1, a5, 8
+; RV32I-NEXT: sb a5, 16(a2)
+; RV32I-NEXT: sb t1, 17(a2)
+; RV32I-NEXT: sb a7, 18(a2)
+; RV32I-NEXT: sb a4, 19(a2)
+; RV32I-NEXT: srli a4, a6, 24
; RV32I-NEXT: srli a5, a6, 16
-; RV32I-NEXT: sb a5, 30(a2)
-; RV32I-NEXT: srli a5, a6, 8
-; RV32I-NEXT: sb a5, 29(a2)
-; RV32I-NEXT: srli a5, a3, 24
-; RV32I-NEXT: sb a5, 19(a2)
+; RV32I-NEXT: srli a7, a6, 8
+; RV32I-NEXT: sb a6, 20(a2)
+; RV32I-NEXT: sb a7, 21(a2)
+; RV32I-NEXT: sb a5, 22(a2)
+; RV32I-NEXT: sb a4, 23(a2)
+; RV32I-NEXT: srli a4, t0, 24
+; RV32I-NEXT: srli a5, t0, 16
+; RV32I-NEXT: srli a6, t0, 8
+; RV32I-NEXT: sb t0, 8(a2)
+; RV32I-NEXT: sb a6, 9(a2)
+; RV32I-NEXT: sb a5, 10(a2)
+; RV32I-NEXT: sb a4, 11(a2)
+; RV32I-NEXT: srli a4, a3, 24
; RV32I-NEXT: srli a5, a3, 16
-; RV32I-NEXT: sb a5, 18(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 17(a2)
-; RV32I-NEXT: srli a3, a4, 24
-; RV32I-NEXT: sb a3, 23(a2)
-; RV32I-NEXT: srli a3, a4, 16
-; RV32I-NEXT: sb a3, 22(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 21(a2)
-; RV32I-NEXT: srli a3, t0, 24
-; RV32I-NEXT: sb a3, 11(a2)
-; RV32I-NEXT: srli a3, t0, 16
-; RV32I-NEXT: sb a3, 10(a2)
-; RV32I-NEXT: srli a3, t0, 8
-; RV32I-NEXT: sb a3, 9(a2)
+; RV32I-NEXT: srli a6, a3, 8
+; RV32I-NEXT: sb a3, 12(a2)
+; RV32I-NEXT: sb a6, 13(a2)
+; RV32I-NEXT: sb a5, 14(a2)
+; RV32I-NEXT: sb a4, 15(a2)
; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 15(a2)
-; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 14(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 13(a2)
-; RV32I-NEXT: srli a1, a7, 24
-; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a1, a7, 16
-; RV32I-NEXT: sb a1, 2(a2)
-; RV32I-NEXT: srli a1, a7, 8
-; RV32I-NEXT: sb a1, 1(a2)
+; RV32I-NEXT: srli a4, a1, 16
+; RV32I-NEXT: srli a5, a1, 8
+; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: sb a5, 1(a2)
+; RV32I-NEXT: sb a4, 2(a2)
+; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: srli a1, a0, 24
+; RV32I-NEXT: srli a3, a0, 16
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 4(a2)
+; RV32I-NEXT: sb a4, 5(a2)
+; RV32I-NEXT: sb a3, 6(a2)
; RV32I-NEXT: sb a1, 7(a2)
-; RV32I-NEXT: srli a1, a0, 16
-; RV32I-NEXT: sb a1, 6(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: addi sp, sp, 64
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
@@ -3252,97 +3254,97 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a6
-; RV64I-NEXT: sd zero, 24(sp)
-; RV64I-NEXT: sd zero, 16(sp)
-; RV64I-NEXT: sd zero, 8(sp)
; RV64I-NEXT: sd zero, 0(sp)
-; RV64I-NEXT: sd a0, 56(sp)
-; RV64I-NEXT: sd a5, 48(sp)
-; RV64I-NEXT: sd a4, 40(sp)
+; RV64I-NEXT: sd zero, 8(sp)
+; RV64I-NEXT: sd zero, 16(sp)
+; RV64I-NEXT: sd zero, 24(sp)
; RV64I-NEXT: sd a3, 32(sp)
+; RV64I-NEXT: sd a4, 40(sp)
+; RV64I-NEXT: sd a5, 48(sp)
+; RV64I-NEXT: sd a0, 56(sp)
; RV64I-NEXT: andi a0, a1, 24
; RV64I-NEXT: addi a3, sp, 32
; RV64I-NEXT: sub a3, a3, a0
-; RV64I-NEXT: ld a4, 0(a3)
-; RV64I-NEXT: ld a5, 8(a3)
-; RV64I-NEXT: slli a1, a1, 3
-; RV64I-NEXT: ld a6, 16(a3)
-; RV64I-NEXT: ld a3, 24(a3)
-; RV64I-NEXT: sll a7, a5, a1
-; RV64I-NEXT: andi a0, a1, 56
-; RV64I-NEXT: xori t0, a0, 63
-; RV64I-NEXT: srli a0, a4, 1
-; RV64I-NEXT: srl a0, a0, t0
-; RV64I-NEXT: or a0, a7, a0
-; RV64I-NEXT: sll a3, a3, a1
-; RV64I-NEXT: srli t1, a6, 1
-; RV64I-NEXT: srl t1, t1, t0
-; RV64I-NEXT: or t1, a3, t1
-; RV64I-NEXT: sll a6, a6, a1
-; RV64I-NEXT: srli a5, a5, 1
-; RV64I-NEXT: srl a5, a5, t0
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: sll a1, a4, a1
-; RV64I-NEXT: sb a1, 0(a2)
-; RV64I-NEXT: srli a4, a6, 56
-; RV64I-NEXT: sb a4, 23(a2)
-; RV64I-NEXT: srli a3, a3, 56
-; RV64I-NEXT: sb a3, 31(a2)
-; RV64I-NEXT: srli a3, a1, 56
-; RV64I-NEXT: sb a3, 7(a2)
-; RV64I-NEXT: srli a3, a1, 48
-; RV64I-NEXT: sb a3, 6(a2)
-; RV64I-NEXT: srli a3, a1, 40
-; RV64I-NEXT: sb a3, 5(a2)
-; RV64I-NEXT: srli a3, a1, 32
-; RV64I-NEXT: sb a3, 4(a2)
+; RV64I-NEXT: ld a5, 0(a3)
+; RV64I-NEXT: ld a6, 8(a3)
+; RV64I-NEXT: slli a7, a1, 3
+; RV64I-NEXT: ld t0, 16(a3)
+; RV64I-NEXT: ld a1, 24(a3)
+; RV64I-NEXT: sll a4, a6, a7
+; RV64I-NEXT: andi a0, a7, 56
+; RV64I-NEXT: xori a3, a0, 63
+; RV64I-NEXT: srli a0, a5, 1
+; RV64I-NEXT: srl a0, a0, a3
+; RV64I-NEXT: or a0, a4, a0
+; RV64I-NEXT: sll t1, a1, a7
+; RV64I-NEXT: srli a1, t0, 1
+; RV64I-NEXT: srl a1, a1, a3
+; RV64I-NEXT: or a1, t1, a1
+; RV64I-NEXT: sll t0, t0, a7
+; RV64I-NEXT: srli a6, a6, 1
+; RV64I-NEXT: srl a3, a6, a3
+; RV64I-NEXT: or a3, t0, a3
+; RV64I-NEXT: sll a5, a5, a7
+; RV64I-NEXT: srli a6, t0, 56
+; RV64I-NEXT: srli a7, a3, 48
+; RV64I-NEXT: srli t0, a3, 40
+; RV64I-NEXT: srli t2, a3, 32
+; RV64I-NEXT: sb t2, 20(a2)
+; RV64I-NEXT: sb t0, 21(a2)
+; RV64I-NEXT: sb a7, 22(a2)
+; RV64I-NEXT: sb a6, 23(a2)
+; RV64I-NEXT: srli a6, t1, 56
+; RV64I-NEXT: srli a7, a1, 48
+; RV64I-NEXT: srli t0, a1, 40
+; RV64I-NEXT: srli t1, a1, 32
+; RV64I-NEXT: sb t1, 28(a2)
+; RV64I-NEXT: sb t0, 29(a2)
+; RV64I-NEXT: sb a7, 30(a2)
+; RV64I-NEXT: sb a6, 31(a2)
+; RV64I-NEXT: srli a6, a5, 56
+; RV64I-NEXT: srli a7, a5, 48
+; RV64I-NEXT: srli t0, a5, 40
+; RV64I-NEXT: srli t1, a5, 32
+; RV64I-NEXT: sb t1, 4(a2)
+; RV64I-NEXT: sb t0, 5(a2)
+; RV64I-NEXT: sb a7, 6(a2)
+; RV64I-NEXT: sb a6, 7(a2)
+; RV64I-NEXT: srli a6, a5, 24
+; RV64I-NEXT: srli a7, a5, 16
+; RV64I-NEXT: srli t0, a5, 8
+; RV64I-NEXT: sb a5, 0(a2)
+; RV64I-NEXT: sb t0, 1(a2)
+; RV64I-NEXT: sb a7, 2(a2)
+; RV64I-NEXT: sb a6, 3(a2)
+; RV64I-NEXT: srli a4, a4, 56
+; RV64I-NEXT: srli a5, a0, 48
+; RV64I-NEXT: srli a6, a0, 40
+; RV64I-NEXT: srli a7, a0, 32
+; RV64I-NEXT: sb a7, 12(a2)
+; RV64I-NEXT: sb a6, 13(a2)
+; RV64I-NEXT: sb a5, 14(a2)
+; RV64I-NEXT: sb a4, 15(a2)
+; RV64I-NEXT: srli a4, a3, 24
+; RV64I-NEXT: srli a5, a3, 16
+; RV64I-NEXT: srli a6, a3, 8
+; RV64I-NEXT: sb a3, 16(a2)
+; RV64I-NEXT: sb a6, 17(a2)
+; RV64I-NEXT: sb a5, 18(a2)
+; RV64I-NEXT: sb a4, 19(a2)
; RV64I-NEXT: srli a3, a1, 24
-; RV64I-NEXT: sb a3, 3(a2)
-; RV64I-NEXT: srli a3, a1, 16
-; RV64I-NEXT: sb a3, 2(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 1(a2)
-; RV64I-NEXT: srli a1, a7, 56
-; RV64I-NEXT: sb a1, 15(a2)
-; RV64I-NEXT: sb a5, 16(a2)
-; RV64I-NEXT: sb t1, 24(a2)
-; RV64I-NEXT: sb a0, 8(a2)
-; RV64I-NEXT: srli a1, a5, 48
-; RV64I-NEXT: sb a1, 22(a2)
-; RV64I-NEXT: srli a1, a5, 40
-; RV64I-NEXT: sb a1, 21(a2)
-; RV64I-NEXT: srli a1, a5, 32
-; RV64I-NEXT: sb a1, 20(a2)
-; RV64I-NEXT: srli a1, a5, 24
-; RV64I-NEXT: sb a1, 19(a2)
-; RV64I-NEXT: srli a1, a5, 16
-; RV64I-NEXT: sb a1, 18(a2)
-; RV64I-NEXT: srli a5, a5, 8
-; RV64I-NEXT: sb a5, 17(a2)
-; RV64I-NEXT: srli a1, t1, 48
-; RV64I-NEXT: sb a1, 30(a2)
-; RV64I-NEXT: srli a1, t1, 40
-; RV64I-NEXT: sb a1, 29(a2)
-; RV64I-NEXT: srli a1, t1, 32
-; RV64I-NEXT: sb a1, 28(a2)
-; RV64I-NEXT: srli a1, t1, 24
-; RV64I-NEXT: sb a1, 27(a2)
-; RV64I-NEXT: srli a1, t1, 16
-; RV64I-NEXT: sb a1, 26(a2)
-; RV64I-NEXT: srli a1, t1, 8
-; RV64I-NEXT: sb a1, 25(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 14(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 13(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 12(a2)
+; RV64I-NEXT: srli a4, a1, 16
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: sb a1, 24(a2)
+; RV64I-NEXT: sb a5, 25(a2)
+; RV64I-NEXT: sb a4, 26(a2)
+; RV64I-NEXT: sb a3, 27(a2)
; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 8(a2)
+; RV64I-NEXT: sb a4, 9(a2)
+; RV64I-NEXT: sb a3, 10(a2)
; RV64I-NEXT: sb a1, 11(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 10(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 9(a2)
; RV64I-NEXT: addi sp, sp, 64
; RV64I-NEXT: ret
;
@@ -3352,6 +3354,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sw s0, 76(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 72(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 68(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 64(sp) # 4-byte Folded Spill
; RV32I-NEXT: lbu a3, 1(a0)
; RV32I-NEXT: lbu a4, 0(a0)
; RV32I-NEXT: lbu a5, 2(a0)
@@ -3442,124 +3445,125 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, t3
; RV32I-NEXT: or a1, a1, t2
-; RV32I-NEXT: sw zero, 28(sp)
-; RV32I-NEXT: sw zero, 24(sp)
-; RV32I-NEXT: sw zero, 20(sp)
; RV32I-NEXT: sw zero, 16(sp)
-; RV32I-NEXT: sw zero, 12(sp)
-; RV32I-NEXT: sw zero, 8(sp)
-; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 20(sp)
+; RV32I-NEXT: sw zero, 24(sp)
+; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw zero, 0(sp)
-; RV32I-NEXT: sw a0, 60(sp)
-; RV32I-NEXT: sw t1, 56(sp)
-; RV32I-NEXT: sw t0, 52(sp)
+; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 8(sp)
+; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: sw a7, 48(sp)
-; RV32I-NEXT: sw a6, 44(sp)
-; RV32I-NEXT: sw a5, 40(sp)
-; RV32I-NEXT: sw a4, 36(sp)
+; RV32I-NEXT: sw t0, 52(sp)
+; RV32I-NEXT: sw t1, 56(sp)
+; RV32I-NEXT: sw a0, 60(sp)
; RV32I-NEXT: sw a3, 32(sp)
+; RV32I-NEXT: sw a4, 36(sp)
+; RV32I-NEXT: sw a5, 40(sp)
+; RV32I-NEXT: sw a6, 44(sp)
; RV32I-NEXT: andi a0, a1, 28
; RV32I-NEXT: addi a3, sp, 32
-; RV32I-NEXT: sub a5, a3, a0
-; RV32I-NEXT: lw a6, 0(a5)
-; RV32I-NEXT: lw a3, 4(a5)
-; RV32I-NEXT: slli a7, a1, 3
-; RV32I-NEXT: lw t0, 8(a5)
-; RV32I-NEXT: lw t1, 12(a5)
-; RV32I-NEXT: sll a4, a3, a7
-; RV32I-NEXT: andi a0, a7, 24
-; RV32I-NEXT: xori t2, a0, 31
-; RV32I-NEXT: srli a0, a6, 1
-; RV32I-NEXT: srl a0, a0, t2
-; RV32I-NEXT: or a0, a4, a0
-; RV32I-NEXT: sll t3, t1, a7
-; RV32I-NEXT: srli a1, t0, 1
-; RV32I-NEXT: srl a1, a1, t2
-; RV32I-NEXT: or a1, t3, a1
-; RV32I-NEXT: sll t0, t0, a7
-; RV32I-NEXT: srli a3, a3, 1
-; RV32I-NEXT: srl a3, a3, t2
-; RV32I-NEXT: lw t4, 16(a5)
-; RV32I-NEXT: lw t5, 20(a5)
-; RV32I-NEXT: or a3, t0, a3
-; RV32I-NEXT: lw t6, 24(a5)
-; RV32I-NEXT: lw a5, 28(a5)
-; RV32I-NEXT: sll s0, t5, a7
-; RV32I-NEXT: srli s1, t4, 1
-; RV32I-NEXT: srl s1, s1, t2
-; RV32I-NEXT: or s1, s0, s1
-; RV32I-NEXT: sll t4, t4, a7
-; RV32I-NEXT: srli t1, t1, 1
-; RV32I-NEXT: srl t1, t1, t2
-; RV32I-NEXT: or t1, t4, t1
-; RV32I-NEXT: sll a5, a5, a7
-; RV32I-NEXT: srli s2, t6, 1
-; RV32I-NEXT: srl s2, s2, t2
-; RV32I-NEXT: or s2, a5, s2
-; RV32I-NEXT: sll t6, t6, a7
-; RV32I-NEXT: srli t5, t5, 1
-; RV32I-NEXT: srl t2, t5, t2
+; RV32I-NEXT: sub a7, a3, a0
+; RV32I-NEXT: lw t3, 0(a7)
+; RV32I-NEXT: lw a6, 4(a7)
+; RV32I-NEXT: slli t4, a1, 3
+; RV32I-NEXT: lw a5, 8(a7)
+; RV32I-NEXT: lw t2, 12(a7)
+; RV32I-NEXT: sll a0, a6, t4
+; RV32I-NEXT: andi a1, t4, 24
+; RV32I-NEXT: xori t5, a1, 31
+; RV32I-NEXT: srli a1, t3, 1
+; RV32I-NEXT: srl a1, a1, t5
+; RV32I-NEXT: or a1, a0, a1
+; RV32I-NEXT: sll a4, t2, t4
+; RV32I-NEXT: srli a3, a5, 1
+; RV32I-NEXT: srl a3, a3, t5
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: sll a5, a5, t4
+; RV32I-NEXT: srli a6, a6, 1
+; RV32I-NEXT: srl a6, a6, t5
+; RV32I-NEXT: lw t6, 16(a7)
+; RV32I-NEXT: lw s0, 20(a7)
+; RV32I-NEXT: or a6, a5, a6
+; RV32I-NEXT: lw s1, 24(a7)
+; RV32I-NEXT: lw a7, 28(a7)
+; RV32I-NEXT: sll t1, s0, t4
+; RV32I-NEXT: srli t0, t6, 1
+; RV32I-NEXT: srl t0, t0, t5
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: sll t6, t6, t4
+; RV32I-NEXT: srli t2, t2, 1
+; RV32I-NEXT: srl t2, t2, t5
; RV32I-NEXT: or t2, t6, t2
-; RV32I-NEXT: sll a6, a6, a7
-; RV32I-NEXT: sb a6, 0(a2)
-; RV32I-NEXT: srli a7, t6, 24
-; RV32I-NEXT: sb a7, 27(a2)
+; RV32I-NEXT: sll s2, a7, t4
+; RV32I-NEXT: srli a7, s1, 1
+; RV32I-NEXT: srl a7, a7, t5
+; RV32I-NEXT: or s3, s2, a7
+; RV32I-NEXT: sll s1, s1, t4
+; RV32I-NEXT: srli s0, s0, 1
+; RV32I-NEXT: srl a7, s0, t5
+; RV32I-NEXT: or t5, s1, a7
+; RV32I-NEXT: sll a7, t3, t4
+; RV32I-NEXT: srli s1, s1, 24
+; RV32I-NEXT: srli t3, t5, 16
+; RV32I-NEXT: srli t4, t5, 8
+; RV32I-NEXT: sb t5, 24(a2)
+; RV32I-NEXT: sb t4, 25(a2)
+; RV32I-NEXT: sb t3, 26(a2)
+; RV32I-NEXT: sb s1, 27(a2)
+; RV32I-NEXT: srli t3, s2, 24
+; RV32I-NEXT: srli t4, s3, 16
+; RV32I-NEXT: srli t5, s3, 8
+; RV32I-NEXT: sb s3, 28(a2)
+; RV32I-NEXT: sb t5, 29(a2)
+; RV32I-NEXT: sb t4, 30(a2)
+; RV32I-NEXT: sb t3, 31(a2)
+; RV32I-NEXT: srli t3, t6, 24
+; RV32I-NEXT: srli t4, t2, 16
+; RV32I-NEXT: srli t5, t2, 8
+; RV32I-NEXT: sb t2, 16(a2)
+; RV32I-NEXT: sb t5, 17(a2)
+; RV32I-NEXT: sb t4, 18(a2)
+; RV32I-NEXT: sb t3, 19(a2)
+; RV32I-NEXT: srli t1, t1, 24
+; RV32I-NEXT: srli t2, t0, 16
+; RV32I-NEXT: srli t3, t0, 8
+; RV32I-NEXT: sb t0, 20(a2)
+; RV32I-NEXT: sb t3, 21(a2)
+; RV32I-NEXT: sb t2, 22(a2)
+; RV32I-NEXT: sb t1, 23(a2)
; RV32I-NEXT: srli a5, a5, 24
-; RV32I-NEXT: sb a5, 31(a2)
-; RV32I-NEXT: srli a5, t4, 24
-; RV32I-NEXT: sb a5, 19(a2)
-; RV32I-NEXT: srli s0, s0, 24
-; RV32I-NEXT: sb s0, 23(a2)
-; RV32I-NEXT: srli a5, t0, 24
+; RV32I-NEXT: srli t0, a6, 16
+; RV32I-NEXT: srli t1, a6, 8
+; RV32I-NEXT: sb a6, 8(a2)
+; RV32I-NEXT: sb t1, 9(a2)
+; RV32I-NEXT: sb t0, 10(a2)
; RV32I-NEXT: sb a5, 11(a2)
-; RV32I-NEXT: srli a5, t3, 24
-; RV32I-NEXT: sb a5, 15(a2)
-; RV32I-NEXT: srli a5, a6, 24
-; RV32I-NEXT: sb a5, 3(a2)
-; RV32I-NEXT: srli a5, a6, 16
-; RV32I-NEXT: sb a5, 2(a2)
-; RV32I-NEXT: srli a5, a6, 8
-; RV32I-NEXT: sb a5, 1(a2)
; RV32I-NEXT: srli a4, a4, 24
-; RV32I-NEXT: sb a4, 7(a2)
-; RV32I-NEXT: sb t2, 24(a2)
-; RV32I-NEXT: sb s2, 28(a2)
-; RV32I-NEXT: sb t1, 16(a2)
-; RV32I-NEXT: sb s1, 20(a2)
-; RV32I-NEXT: sb a3, 8(a2)
-; RV32I-NEXT: sb a1, 12(a2)
-; RV32I-NEXT: sb a0, 4(a2)
-; RV32I-NEXT: srli a4, t2, 16
-; RV32I-NEXT: sb a4, 26(a2)
-; RV32I-NEXT: srli a4, t2, 8
-; RV32I-NEXT: sb a4, 25(a2)
-; RV32I-NEXT: srli a4, s2, 16
-; RV32I-NEXT: sb a4, 30(a2)
-; RV32I-NEXT: srli a4, s2, 8
-; RV32I-NEXT: sb a4, 29(a2)
-; RV32I-NEXT: srli a4, t1, 16
-; RV32I-NEXT: sb a4, 18(a2)
-; RV32I-NEXT: srli a4, t1, 8
-; RV32I-NEXT: sb a4, 17(a2)
-; RV32I-NEXT: srli a4, s1, 16
-; RV32I-NEXT: sb a4, 22(a2)
-; RV32I-NEXT: srli s1, s1, 8
-; RV32I-NEXT: sb s1, 21(a2)
-; RV32I-NEXT: srli a4, a3, 16
-; RV32I-NEXT: sb a4, 10(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 9(a2)
+; RV32I-NEXT: srli a5, a3, 16
+; RV32I-NEXT: srli a6, a3, 8
+; RV32I-NEXT: sb a3, 12(a2)
+; RV32I-NEXT: sb a6, 13(a2)
+; RV32I-NEXT: sb a5, 14(a2)
+; RV32I-NEXT: sb a4, 15(a2)
+; RV32I-NEXT: srli a3, a7, 24
+; RV32I-NEXT: srli a4, a7, 16
+; RV32I-NEXT: srli a5, a7, 8
+; RV32I-NEXT: sb a7, 0(a2)
+; RV32I-NEXT: sb a5, 1(a2)
+; RV32I-NEXT: sb a4, 2(a2)
+; RV32I-NEXT: sb a3, 3(a2)
+; RV32I-NEXT: srli a0, a0, 24
; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 14(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 13(a2)
-; RV32I-NEXT: srli a1, a0, 16
-; RV32I-NEXT: sb a1, 6(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 5(a2)
+; RV32I-NEXT: srli a4, a1, 8
+; RV32I-NEXT: sb a1, 4(a2)
+; RV32I-NEXT: sb a4, 5(a2)
+; RV32I-NEXT: sb a3, 6(a2)
+; RV32I-NEXT: sb a0, 7(a2)
; RV32I-NEXT: lw s0, 76(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 72(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 68(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 64(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 80
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
@@ -3684,27 +3688,27 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a6
-; RV64I-NEXT: sd zero, 24(sp)
-; RV64I-NEXT: sd zero, 16(sp)
-; RV64I-NEXT: sd zero, 8(sp)
; RV64I-NEXT: sd zero, 0(sp)
-; RV64I-NEXT: sd a0, 56(sp)
-; RV64I-NEXT: sd a5, 48(sp)
-; RV64I-NEXT: sd a4, 40(sp)
+; RV64I-NEXT: sd zero, 8(sp)
+; RV64I-NEXT: sd zero, 16(sp)
+; RV64I-NEXT: sd zero, 24(sp)
; RV64I-NEXT: sd a3, 32(sp)
+; RV64I-NEXT: sd a4, 40(sp)
+; RV64I-NEXT: sd a5, 48(sp)
+; RV64I-NEXT: sd a0, 56(sp)
; RV64I-NEXT: slli a0, a1, 2
; RV64I-NEXT: andi a0, a0, 24
; RV64I-NEXT: addi a3, sp, 32
; RV64I-NEXT: sub a3, a3, a0
-; RV64I-NEXT: ld a4, 0(a3)
-; RV64I-NEXT: ld a5, 8(a3)
+; RV64I-NEXT: ld a5, 0(a3)
+; RV64I-NEXT: ld a4, 8(a3)
; RV64I-NEXT: slli a6, a1, 5
; RV64I-NEXT: ld a7, 16(a3)
; RV64I-NEXT: ld a1, 24(a3)
-; RV64I-NEXT: sll a3, a5, a6
+; RV64I-NEXT: sll a3, a4, a6
; RV64I-NEXT: andi a0, a6, 32
; RV64I-NEXT: xori t0, a0, 63
-; RV64I-NEXT: srli a0, a4, 1
+; RV64I-NEXT: srli a0, a5, 1
; RV64I-NEXT: srl a0, a0, t0
; RV64I-NEXT: or a0, a3, a0
; RV64I-NEXT: sll t1, a1, a6
@@ -3712,70 +3716,70 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV64I-NEXT: srl a1, a1, t0
; RV64I-NEXT: or a1, t1, a1
; RV64I-NEXT: sll a7, a7, a6
-; RV64I-NEXT: srli a5, a5, 1
-; RV64I-NEXT: srl a5, a5, t0
-; RV64I-NEXT: or a5, a7, a5
-; RV64I-NEXT: sll a4, a4, a6
-; RV64I-NEXT: sb a4, 0(a2)
+; RV64I-NEXT: srli a4, a4, 1
+; RV64I-NEXT: srl a4, a4, t0
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: sll a5, a5, a6
; RV64I-NEXT: srli a6, a7, 56
+; RV64I-NEXT: srli t0, a7, 48
+; RV64I-NEXT: srli t2, a7, 40
+; RV64I-NEXT: srli a7, a7, 32
+; RV64I-NEXT: sb a7, 20(a2)
+; RV64I-NEXT: sb t2, 21(a2)
+; RV64I-NEXT: sb t0, 22(a2)
; RV64I-NEXT: sb a6, 23(a2)
-; RV64I-NEXT: srli a6, a7, 48
-; RV64I-NEXT: sb a6, 22(a2)
-; RV64I-NEXT: srli a6, a7, 40
-; RV64I-NEXT: sb a6, 21(a2)
-; RV64I-NEXT: srli a6, a7, 32
-; RV64I-NEXT: sb a6, 20(a2)
; RV64I-NEXT: srli a6, t1, 56
+; RV64I-NEXT: srli a7, t1, 48
+; RV64I-NEXT: srli t0, t1, 40
+; RV64I-NEXT: srli t1, t1, 32
+; RV64I-NEXT: sb t1, 28(a2)
+; RV64I-NEXT: sb t0, 29(a2)
+; RV64I-NEXT: sb a7, 30(a2)
; RV64I-NEXT: sb a6, 31(a2)
-; RV64I-NEXT: srli a6, t1, 48
-; RV64I-NEXT: sb a6, 30(a2)
-; RV64I-NEXT: srli a6, t1, 40
-; RV64I-NEXT: sb a6, 29(a2)
-; RV64I-NEXT: srli a6, t1, 32
-; RV64I-NEXT: sb a6, 28(a2)
-; RV64I-NEXT: srli a6, a4, 56
+; RV64I-NEXT: srli a6, a5, 56
+; RV64I-NEXT: srli a7, a5, 48
+; RV64I-NEXT: srli t0, a5, 40
+; RV64I-NEXT: srli t1, a5, 32
+; RV64I-NEXT: sb t1, 4(a2)
+; RV64I-NEXT: sb t0, 5(a2)
+; RV64I-NEXT: sb a7, 6(a2)
; RV64I-NEXT: sb a6, 7(a2)
-; RV64I-NEXT: srli a6, a4, 48
-; RV64I-NEXT: sb a6, 6(a2)
-; RV64I-NEXT: srli a6, a4, 40
-; RV64I-NEXT: sb a6, 5(a2)
-; RV64I-NEXT: srli a6, a4, 32
-; RV64I-NEXT: sb a6, 4(a2)
-; RV64I-NEXT: srli a6, a4, 24
+; RV64I-NEXT: srli a6, a5, 24
+; RV64I-NEXT: srli a7, a5, 16
+; RV64I-NEXT: srli t0, a5, 8
+; RV64I-NEXT: sb a5, 0(a2)
+; RV64I-NEXT: sb t0, 1(a2)
+; RV64I-NEXT: sb a7, 2(a2)
; RV64I-NEXT: sb a6, 3(a2)
-; RV64I-NEXT: srli a6, a4, 16
-; RV64I-NEXT: sb a6, 2(a2)
-; RV64I-NEXT: srli a4, a4, 8
-; RV64I-NEXT: sb a4, 1(a2)
-; RV64I-NEXT: srli a4, a3, 56
-; RV64I-NEXT: sb a4, 15(a2)
-; RV64I-NEXT: srli a4, a3, 48
-; RV64I-NEXT: sb a4, 14(a2)
-; RV64I-NEXT: srli a4, a3, 40
-; RV64I-NEXT: sb a4, 13(a2)
+; RV64I-NEXT: srli a5, a3, 56
+; RV64I-NEXT: srli a6, a3, 48
+; RV64I-NEXT: srli a7, a3, 40
; RV64I-NEXT: srli a3, a3, 32
; RV64I-NEXT: sb a3, 12(a2)
-; RV64I-NEXT: sb a5, 16(a2)
-; RV64I-NEXT: sb a1, 24(a2)
-; RV64I-NEXT: sb a0, 8(a2)
-; RV64I-NEXT: srli a3, a5, 24
+; RV64I-NEXT: sb a7, 13(a2)
+; RV64I-NEXT: sb a6, 14(a2)
+; RV64I-NEXT: sb a5, 15(a2)
+; RV64I-NEXT: srli a3, a4, 24
+; RV64I-NEXT: srli a5, a4, 16
+; RV64I-NEXT: srli a6, a4, 8
+; RV64I-NEXT: sb a4, 16(a2)
+; RV64I-NEXT: sb a6, 17(a2)
+; RV64I-NEXT: sb a5, 18(a2)
; RV64I-NEXT: sb a3, 19(a2)
-; RV64I-NEXT: srli a3, a5, 16
-; RV64I-NEXT: sb a3, 18(a2)
-; RV64I-NEXT: srli a5, a5, 8
-; RV64I-NEXT: sb a5, 17(a2)
; RV64I-NEXT: srli a3, a1, 24
+; RV64I-NEXT: srli a4, a1, 16
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: sb a1, 24(a2)
+; RV64I-NEXT: sb a5, 25(a2)
+; RV64I-NEXT: sb a4, 26(a2)
; RV64I-NEXT: sb a3, 27(a2)
-; RV64I-NEXT: srli a3, a1, 16
-; RV64I-NEXT: sb a3, 26(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 25(a2)
; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 8(a2)
+; RV64I-NEXT: sb a4, 9(a2)
+; RV64I-NEXT: sb a3, 10(a2)
; RV64I-NEXT: sb a1, 11(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 10(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 9(a2)
; RV64I-NEXT: addi sp, sp, 64
; RV64I-NEXT: ret
;
@@ -3863,90 +3867,90 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV32I-NEXT: or a0, a0, t2
; RV32I-NEXT: or a0, a0, t3
; RV32I-NEXT: lbu a1, 0(a1)
-; RV32I-NEXT: sw zero, 28(sp)
-; RV32I-NEXT: sw zero, 24(sp)
-; RV32I-NEXT: sw zero, 20(sp)
; RV32I-NEXT: sw zero, 16(sp)
-; RV32I-NEXT: sw zero, 12(sp)
-; RV32I-NEXT: sw zero, 8(sp)
-; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 20(sp)
+; RV32I-NEXT: sw zero, 24(sp)
+; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw zero, 0(sp)
-; RV32I-NEXT: sw a0, 60(sp)
-; RV32I-NEXT: sw t1, 56(sp)
-; RV32I-NEXT: sw t0, 52(sp)
+; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 8(sp)
+; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: sw a7, 48(sp)
-; RV32I-NEXT: sw a6, 44(sp)
-; RV32I-NEXT: sw a5, 40(sp)
-; RV32I-NEXT: sw a4, 36(sp)
+; RV32I-NEXT: sw t0, 52(sp)
+; RV32I-NEXT: sw t1, 56(sp)
+; RV32I-NEXT: sw a0, 60(sp)
; RV32I-NEXT: sw a3, 32(sp)
+; RV32I-NEXT: sw a4, 36(sp)
+; RV32I-NEXT: sw a5, 40(sp)
+; RV32I-NEXT: sw a6, 44(sp)
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: andi a1, a1, 28
; RV32I-NEXT: addi a0, sp, 32
-; RV32I-NEXT: sub a1, a0, a1
-; RV32I-NEXT: lw a3, 16(a1)
-; RV32I-NEXT: lw a4, 20(a1)
-; RV32I-NEXT: lw a5, 24(a1)
-; RV32I-NEXT: lw a6, 28(a1)
-; RV32I-NEXT: lw a7, 0(a1)
-; RV32I-NEXT: lw a0, 4(a1)
-; RV32I-NEXT: lw t0, 8(a1)
-; RV32I-NEXT: lw a1, 12(a1)
-; RV32I-NEXT: sb a5, 24(a2)
-; RV32I-NEXT: sb a6, 28(a2)
-; RV32I-NEXT: sb a3, 16(a2)
-; RV32I-NEXT: sb a4, 20(a2)
-; RV32I-NEXT: sb t0, 8(a2)
-; RV32I-NEXT: sb a1, 12(a2)
-; RV32I-NEXT: sb a7, 0(a2)
-; RV32I-NEXT: sb a0, 4(a2)
-; RV32I-NEXT: srli t1, a5, 24
+; RV32I-NEXT: sub a4, a0, a1
+; RV32I-NEXT: lw a5, 16(a4)
+; RV32I-NEXT: lw a6, 20(a4)
+; RV32I-NEXT: lw a7, 24(a4)
+; RV32I-NEXT: lw a1, 0(a4)
+; RV32I-NEXT: lw a0, 4(a4)
+; RV32I-NEXT: lw t0, 8(a4)
+; RV32I-NEXT: lw a3, 12(a4)
+; RV32I-NEXT: lw a4, 28(a4)
+; RV32I-NEXT: srli t1, a7, 24
+; RV32I-NEXT: srli t2, a7, 16
+; RV32I-NEXT: srli t3, a7, 8
+; RV32I-NEXT: sb a7, 24(a2)
+; RV32I-NEXT: sb t3, 25(a2)
+; RV32I-NEXT: sb t2, 26(a2)
; RV32I-NEXT: sb t1, 27(a2)
-; RV32I-NEXT: srli t1, a5, 16
-; RV32I-NEXT: sb t1, 26(a2)
-; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 25(a2)
-; RV32I-NEXT: srli a5, a6, 24
-; RV32I-NEXT: sb a5, 31(a2)
+; RV32I-NEXT: srli a7, a4, 24
+; RV32I-NEXT: srli t1, a4, 16
+; RV32I-NEXT: srli t2, a4, 8
+; RV32I-NEXT: sb a4, 28(a2)
+; RV32I-NEXT: sb t2, 29(a2)
+; RV32I-NEXT: sb t1, 30(a2)
+; RV32I-NEXT: sb a7, 31(a2)
+; RV32I-NEXT: srli a4, a5, 24
+; RV32I-NEXT: srli a7, a5, 16
+; RV32I-NEXT: srli t1, a5, 8
+; RV32I-NEXT: sb a5, 16(a2)
+; RV32I-NEXT: sb t1, 17(a2)
+; RV32I-NEXT: sb a7, 18(a2)
+; RV32I-NEXT: sb a4, 19(a2)
+; RV32I-NEXT: srli a4, a6, 24
; RV32I-NEXT: srli a5, a6, 16
-; RV32I-NEXT: sb a5, 30(a2)
-; RV32I-NEXT: srli a5, a6, 8
-; RV32I-NEXT: sb a5, 29(a2)
-; RV32I-NEXT: srli a5, a3, 24
-; RV32I-NEXT: sb a5, 19(a2)
+; RV32I-NEXT: srli a7, a6, 8
+; RV32I-NEXT: sb a6, 20(a2)
+; RV32I-NEXT: sb a7, 21(a2)
+; RV32I-NEXT: sb a5, 22(a2)
+; RV32I-NEXT: sb a4, 23(a2)
+; RV32I-NEXT: srli a4, t0, 24
+; RV32I-NEXT: srli a5, t0, 16
+; RV32I-NEXT: srli a6, t0, 8
+; RV32I-NEXT: sb t0, 8(a2)
+; RV32I-NEXT: sb a6, 9(a2)
+; RV32I-NEXT: sb a5, 10(a2)
+; RV32I-NEXT: sb a4, 11(a2)
+; RV32I-NEXT: srli a4, a3, 24
; RV32I-NEXT: srli a5, a3, 16
-; RV32I-NEXT: sb a5, 18(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 17(a2)
-; RV32I-NEXT: srli a3, a4, 24
-; RV32I-NEXT: sb a3, 23(a2)
-; RV32I-NEXT: srli a3, a4, 16
-; RV32I-NEXT: sb a3, 22(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 21(a2)
-; RV32I-NEXT: srli a3, t0, 24
-; RV32I-NEXT: sb a3, 11(a2)
-; RV32I-NEXT: srli a3, t0, 16
-; RV32I-NEXT: sb a3, 10(a2)
-; RV32I-NEXT: srli a3, t0, 8
-; RV32I-NEXT: sb a3, 9(a2)
+; RV32I-NEXT: srli a6, a3, 8
+; RV32I-NEXT: sb a3, 12(a2)
+; RV32I-NEXT: sb a6, 13(a2)
+; RV32I-NEXT: sb a5, 14(a2)
+; RV32I-NEXT: sb a4, 15(a2)
; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 15(a2)
-; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 14(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 13(a2)
-; RV32I-NEXT: srli a1, a7, 24
-; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a1, a7, 16
-; RV32I-NEXT: sb a1, 2(a2)
-; RV32I-NEXT: srli a1, a7, 8
-; RV32I-NEXT: sb a1, 1(a2)
+; RV32I-NEXT: srli a4, a1, 16
+; RV32I-NEXT: srli a5, a1, 8
+; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: sb a5, 1(a2)
+; RV32I-NEXT: sb a4, 2(a2)
+; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: srli a1, a0, 24
+; RV32I-NEXT: srli a3, a0, 16
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 4(a2)
+; RV32I-NEXT: sb a4, 5(a2)
+; RV32I-NEXT: sb a3, 6(a2)
; RV32I-NEXT: sb a1, 7(a2)
-; RV32I-NEXT: srli a1, a0, 16
-; RV32I-NEXT: sb a1, 6(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: addi sp, sp, 64
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
@@ -4050,82 +4054,82 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: lbu a1, 0(a1)
-; RV64I-NEXT: sd zero, 24(sp)
-; RV64I-NEXT: sd zero, 16(sp)
-; RV64I-NEXT: sd zero, 8(sp)
; RV64I-NEXT: sd zero, 0(sp)
-; RV64I-NEXT: sd a0, 56(sp)
-; RV64I-NEXT: sd a5, 48(sp)
-; RV64I-NEXT: sd a4, 40(sp)
+; RV64I-NEXT: sd zero, 8(sp)
+; RV64I-NEXT: sd zero, 16(sp)
+; RV64I-NEXT: sd zero, 24(sp)
; RV64I-NEXT: sd a3, 32(sp)
+; RV64I-NEXT: sd a4, 40(sp)
+; RV64I-NEXT: sd a5, 48(sp)
+; RV64I-NEXT: sd a0, 56(sp)
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: andi a1, a1, 24
; RV64I-NEXT: addi a0, sp, 32
-; RV64I-NEXT: sub a0, a0, a1
-; RV64I-NEXT: ld a1, 16(a0)
-; RV64I-NEXT: ld a3, 24(a0)
-; RV64I-NEXT: ld a4, 0(a0)
-; RV64I-NEXT: ld a0, 8(a0)
-; RV64I-NEXT: sb a1, 16(a2)
-; RV64I-NEXT: sb a3, 24(a2)
-; RV64I-NEXT: sb a4, 0(a2)
-; RV64I-NEXT: sb a0, 8(a2)
-; RV64I-NEXT: srli a5, a1, 56
+; RV64I-NEXT: sub a3, a0, a1
+; RV64I-NEXT: ld a4, 16(a3)
+; RV64I-NEXT: ld a0, 8(a3)
+; RV64I-NEXT: ld a1, 0(a3)
+; RV64I-NEXT: ld a3, 24(a3)
+; RV64I-NEXT: srli a5, a4, 56
+; RV64I-NEXT: srli a6, a4, 48
+; RV64I-NEXT: srli a7, a4, 40
+; RV64I-NEXT: srli t0, a4, 32
+; RV64I-NEXT: sb t0, 20(a2)
+; RV64I-NEXT: sb a7, 21(a2)
+; RV64I-NEXT: sb a6, 22(a2)
; RV64I-NEXT: sb a5, 23(a2)
-; RV64I-NEXT: srli a5, a1, 48
-; RV64I-NEXT: sb a5, 22(a2)
-; RV64I-NEXT: srli a5, a1, 40
-; RV64I-NEXT: sb a5, 21(a2)
-; RV64I-NEXT: srli a5, a1, 32
-; RV64I-NEXT: sb a5, 20(a2)
-; RV64I-NEXT: srli a5, a1, 24
+; RV64I-NEXT: srli a5, a4, 24
+; RV64I-NEXT: srli a6, a4, 16
+; RV64I-NEXT: srli a7, a4, 8
+; RV64I-NEXT: sb a4, 16(a2)
+; RV64I-NEXT: sb a7, 17(a2)
+; RV64I-NEXT: sb a6, 18(a2)
; RV64I-NEXT: sb a5, 19(a2)
-; RV64I-NEXT: srli a5, a1, 16
-; RV64I-NEXT: sb a5, 18(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 17(a2)
-; RV64I-NEXT: srli a1, a3, 56
-; RV64I-NEXT: sb a1, 31(a2)
-; RV64I-NEXT: srli a1, a3, 48
-; RV64I-NEXT: sb a1, 30(a2)
-; RV64I-NEXT: srli a1, a3, 40
-; RV64I-NEXT: sb a1, 29(a2)
-; RV64I-NEXT: srli a1, a3, 32
-; RV64I-NEXT: sb a1, 28(a2)
-; RV64I-NEXT: srli a1, a3, 24
-; RV64I-NEXT: sb a1, 27(a2)
-; RV64I-NEXT: srli a1, a3, 16
-; RV64I-NEXT: sb a1, 26(a2)
-; RV64I-NEXT: srli a3, a3, 8
-; RV64I-NEXT: sb a3, 25(a2)
-; RV64I-NEXT: srli a1, a4, 56
-; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a4, 48
-; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a4, 40
-; RV64I-NEXT: sb a1, 5(a2)
-; RV64I-NEXT: srli a1, a4, 32
-; RV64I-NEXT: sb a1, 4(a2)
-; RV64I-NEXT: srli a1, a4, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a1, a4, 16
-; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a4, a4, 8
-; RV64I-NEXT: sb a4, 1(a2)
+; RV64I-NEXT: srli a4, a3, 56
+; RV64I-NEXT: srli a5, a3, 48
+; RV64I-NEXT: srli a6, a3, 40
+; RV64I-NEXT: srli a7, a3, 32
+; RV64I-NEXT: sb a7, 28(a2)
+; RV64I-NEXT: sb a6, 29(a2)
+; RV64I-NEXT: sb a5, 30(a2)
+; RV64I-NEXT: sb a4, 31(a2)
+; RV64I-NEXT: srli a4, a3, 24
+; RV64I-NEXT: srli a5, a3, 16
+; RV64I-NEXT: srli a6, a3, 8
+; RV64I-NEXT: sb a3, 24(a2)
+; RV64I-NEXT: sb a6, 25(a2)
+; RV64I-NEXT: sb a5, 26(a2)
+; RV64I-NEXT: sb a4, 27(a2)
+; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a6, a1, 32
+; RV64I-NEXT: sb a6, 4(a2)
+; RV64I-NEXT: sb a5, 5(a2)
+; RV64I-NEXT: sb a4, 6(a2)
+; RV64I-NEXT: sb a3, 7(a2)
+; RV64I-NEXT: srli a3, a1, 24
+; RV64I-NEXT: srli a4, a1, 16
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: sb a1, 0(a2)
+; RV64I-NEXT: sb a5, 1(a2)
+; RV64I-NEXT: sb a4, 2(a2)
+; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: srli a1, a0, 56
+; RV64I-NEXT: srli a3, a0, 48
+; RV64I-NEXT: srli a4, a0, 40
+; RV64I-NEXT: srli a5, a0, 32
+; RV64I-NEXT: sb a5, 12(a2)
+; RV64I-NEXT: sb a4, 13(a2)
+; RV64I-NEXT: sb a3, 14(a2)
; RV64I-NEXT: sb a1, 15(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 14(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 13(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 12(a2)
; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 8(a2)
+; RV64I-NEXT: sb a4, 9(a2)
+; RV64I-NEXT: sb a3, 10(a2)
; RV64I-NEXT: sb a1, 11(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 10(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 9(a2)
; RV64I-NEXT: addi sp, sp, 64
; RV64I-NEXT: ret
;
@@ -4213,90 +4217,90 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV32I-NEXT: or a0, a0, t2
; RV32I-NEXT: or a0, a0, t3
; RV32I-NEXT: lbu a1, 0(a1)
-; RV32I-NEXT: sw zero, 28(sp)
-; RV32I-NEXT: sw zero, 24(sp)
-; RV32I-NEXT: sw zero, 20(sp)
; RV32I-NEXT: sw zero, 16(sp)
-; RV32I-NEXT: sw zero, 12(sp)
-; RV32I-NEXT: sw zero, 8(sp)
-; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 20(sp)
+; RV32I-NEXT: sw zero, 24(sp)
+; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw zero, 0(sp)
-; RV32I-NEXT: sw a0, 60(sp)
-; RV32I-NEXT: sw t1, 56(sp)
-; RV32I-NEXT: sw t0, 52(sp)
+; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 8(sp)
+; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: sw a7, 48(sp)
-; RV32I-NEXT: sw a6, 44(sp)
-; RV32I-NEXT: sw a5, 40(sp)
-; RV32I-NEXT: sw a4, 36(sp)
+; RV32I-NEXT: sw t0, 52(sp)
+; RV32I-NEXT: sw t1, 56(sp)
+; RV32I-NEXT: sw a0, 60(sp)
; RV32I-NEXT: sw a3, 32(sp)
+; RV32I-NEXT: sw a4, 36(sp)
+; RV32I-NEXT: sw a5, 40(sp)
+; RV32I-NEXT: sw a6, 44(sp)
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: andi a1, a1, 24
; RV32I-NEXT: addi a0, sp, 32
-; RV32I-NEXT: sub a1, a0, a1
-; RV32I-NEXT: lw a3, 16(a1)
-; RV32I-NEXT: lw a4, 20(a1)
-; RV32I-NEXT: lw a5, 24(a1)
-; RV32I-NEXT: lw a6, 28(a1)
-; RV32I-NEXT: lw a7, 0(a1)
-; RV32I-NEXT: lw a0, 4(a1)
-; RV32I-NEXT: lw t0, 8(a1)
-; RV32I-NEXT: lw a1, 12(a1)
-; RV32I-NEXT: sb a5, 24(a2)
-; RV32I-NEXT: sb a6, 28(a2)
-; RV32I-NEXT: sb a3, 16(a2)
-; RV32I-NEXT: sb a4, 20(a2)
-; RV32I-NEXT: sb t0, 8(a2)
-; RV32I-NEXT: sb a1, 12(a2)
-; RV32I-NEXT: sb a7, 0(a2)
-; RV32I-NEXT: sb a0, 4(a2)
-; RV32I-NEXT: srli t1, a5, 24
+; RV32I-NEXT: sub a4, a0, a1
+; RV32I-NEXT: lw a5, 16(a4)
+; RV32I-NEXT: lw a6, 20(a4)
+; RV32I-NEXT: lw a7, 24(a4)
+; RV32I-NEXT: lw a1, 0(a4)
+; RV32I-NEXT: lw a0, 4(a4)
+; RV32I-NEXT: lw t0, 8(a4)
+; RV32I-NEXT: lw a3, 12(a4)
+; RV32I-NEXT: lw a4, 28(a4)
+; RV32I-NEXT: srli t1, a7, 24
+; RV32I-NEXT: srli t2, a7, 16
+; RV32I-NEXT: srli t3, a7, 8
+; RV32I-NEXT: sb a7, 24(a2)
+; RV32I-NEXT: sb t3, 25(a2)
+; RV32I-NEXT: sb t2, 26(a2)
; RV32I-NEXT: sb t1, 27(a2)
-; RV32I-NEXT: srli t1, a5, 16
-; RV32I-NEXT: sb t1, 26(a2)
-; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 25(a2)
-; RV32I-NEXT: srli a5, a6, 24
-; RV32I-NEXT: sb a5, 31(a2)
+; RV32I-NEXT: srli a7, a4, 24
+; RV32I-NEXT: srli t1, a4, 16
+; RV32I-NEXT: srli t2, a4, 8
+; RV32I-NEXT: sb a4, 28(a2)
+; RV32I-NEXT: sb t2, 29(a2)
+; RV32I-NEXT: sb t1, 30(a2)
+; RV32I-NEXT: sb a7, 31(a2)
+; RV32I-NEXT: srli a4, a5, 24
+; RV32I-NEXT: srli a7, a5, 16
+; RV32I-NEXT: srli t1, a5, 8
+; RV32I-NEXT: sb a5, 16(a2)
+; RV32I-NEXT: sb t1, 17(a2)
+; RV32I-NEXT: sb a7, 18(a2)
+; RV32I-NEXT: sb a4, 19(a2)
+; RV32I-NEXT: srli a4, a6, 24
; RV32I-NEXT: srli a5, a6, 16
-; RV32I-NEXT: sb a5, 30(a2)
-; RV32I-NEXT: srli a5, a6, 8
-; RV32I-NEXT: sb a5, 29(a2)
-; RV32I-NEXT: srli a5, a3, 24
-; RV32I-NEXT: sb a5, 19(a2)
+; RV32I-NEXT: srli a7, a6, 8
+; RV32I-NEXT: sb a6, 20(a2)
+; RV32I-NEXT: sb a7, 21(a2)
+; RV32I-NEXT: sb a5, 22(a2)
+; RV32I-NEXT: sb a4, 23(a2)
+; RV32I-NEXT: srli a4, t0, 24
+; RV32I-NEXT: srli a5, t0, 16
+; RV32I-NEXT: srli a6, t0, 8
+; RV32I-NEXT: sb t0, 8(a2)
+; RV32I-NEXT: sb a6, 9(a2)
+; RV32I-NEXT: sb a5, 10(a2)
+; RV32I-NEXT: sb a4, 11(a2)
+; RV32I-NEXT: srli a4, a3, 24
; RV32I-NEXT: srli a5, a3, 16
-; RV32I-NEXT: sb a5, 18(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 17(a2)
-; RV32I-NEXT: srli a3, a4, 24
-; RV32I-NEXT: sb a3, 23(a2)
-; RV32I-NEXT: srli a3, a4, 16
-; RV32I-NEXT: sb a3, 22(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 21(a2)
-; RV32I-NEXT: srli a3, t0, 24
-; RV32I-NEXT: sb a3, 11(a2)
-; RV32I-NEXT: srli a3, t0, 16
-; RV32I-NEXT: sb a3, 10(a2)
-; RV32I-NEXT: srli a3, t0, 8
-; RV32I-NEXT: sb a3, 9(a2)
+; RV32I-NEXT: srli a6, a3, 8
+; RV32I-NEXT: sb a3, 12(a2)
+; RV32I-NEXT: sb a6, 13(a2)
+; RV32I-NEXT: sb a5, 14(a2)
+; RV32I-NEXT: sb a4, 15(a2)
; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 15(a2)
-; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 14(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 13(a2)
-; RV32I-NEXT: srli a1, a7, 24
-; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a1, a7, 16
-; RV32I-NEXT: sb a1, 2(a2)
-; RV32I-NEXT: srli a1, a7, 8
-; RV32I-NEXT: sb a1, 1(a2)
+; RV32I-NEXT: srli a4, a1, 16
+; RV32I-NEXT: srli a5, a1, 8
+; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: sb a5, 1(a2)
+; RV32I-NEXT: sb a4, 2(a2)
+; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: srli a1, a0, 24
+; RV32I-NEXT: srli a3, a0, 16
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 4(a2)
+; RV32I-NEXT: sb a4, 5(a2)
+; RV32I-NEXT: sb a3, 6(a2)
; RV32I-NEXT: sb a1, 7(a2)
-; RV32I-NEXT: srli a1, a0, 16
-; RV32I-NEXT: sb a1, 6(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: addi sp, sp, 64
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
@@ -4422,97 +4426,97 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a7
; RV64I-NEXT: sraiw a0, a0, 31
-; RV64I-NEXT: sd a0, 56(sp)
-; RV64I-NEXT: sd a0, 48(sp)
-; RV64I-NEXT: sd a0, 40(sp)
; RV64I-NEXT: sd a0, 32(sp)
-; RV64I-NEXT: sd a6, 24(sp)
-; RV64I-NEXT: sd a5, 16(sp)
-; RV64I-NEXT: sd a4, 8(sp)
+; RV64I-NEXT: sd a0, 40(sp)
+; RV64I-NEXT: sd a0, 48(sp)
+; RV64I-NEXT: sd a0, 56(sp)
; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: sd a4, 8(sp)
+; RV64I-NEXT: sd a5, 16(sp)
+; RV64I-NEXT: sd a6, 24(sp)
; RV64I-NEXT: andi a0, a1, 24
; RV64I-NEXT: mv a3, sp
; RV64I-NEXT: add a0, a3, a0
; RV64I-NEXT: ld a3, 0(a0)
; RV64I-NEXT: ld a4, 8(a0)
-; RV64I-NEXT: slli a1, a1, 3
-; RV64I-NEXT: ld a5, 16(a0)
-; RV64I-NEXT: ld a6, 24(a0)
-; RV64I-NEXT: srl a7, a4, a1
-; RV64I-NEXT: andi a0, a1, 56
-; RV64I-NEXT: xori t0, a0, 63
-; RV64I-NEXT: slli a0, a5, 1
-; RV64I-NEXT: sll a0, a0, t0
-; RV64I-NEXT: or a0, a7, a0
-; RV64I-NEXT: srl a3, a3, a1
+; RV64I-NEXT: slli a5, a1, 3
+; RV64I-NEXT: ld a6, 16(a0)
+; RV64I-NEXT: ld a7, 24(a0)
+; RV64I-NEXT: srl a0, a4, a5
+; RV64I-NEXT: andi a1, a5, 56
+; RV64I-NEXT: xori t0, a1, 63
+; RV64I-NEXT: slli a1, a6, 1
+; RV64I-NEXT: sll a1, a1, t0
+; RV64I-NEXT: or a1, a0, a1
+; RV64I-NEXT: srl a3, a3, a5
; RV64I-NEXT: slli a4, a4, 1
; RV64I-NEXT: sll a4, a4, t0
; RV64I-NEXT: or a4, a3, a4
-; RV64I-NEXT: srl a5, a5, a1
-; RV64I-NEXT: slli t1, a6, 1
+; RV64I-NEXT: srl a6, a6, a5
+; RV64I-NEXT: slli t1, a7, 1
; RV64I-NEXT: sll t0, t1, t0
-; RV64I-NEXT: or t0, a5, t0
-; RV64I-NEXT: sra a1, a6, a1
-; RV64I-NEXT: sb a5, 16(a2)
-; RV64I-NEXT: sb a1, 24(a2)
+; RV64I-NEXT: or t0, a6, t0
+; RV64I-NEXT: sra a5, a7, a5
+; RV64I-NEXT: srli a7, a5, 56
+; RV64I-NEXT: srli t1, a5, 48
+; RV64I-NEXT: srli t2, a5, 40
+; RV64I-NEXT: srli t3, a5, 32
+; RV64I-NEXT: sb t3, 28(a2)
+; RV64I-NEXT: sb t2, 29(a2)
+; RV64I-NEXT: sb t1, 30(a2)
+; RV64I-NEXT: sb a7, 31(a2)
+; RV64I-NEXT: srli a7, a5, 24
+; RV64I-NEXT: srli t1, a5, 16
+; RV64I-NEXT: srli t2, a5, 8
+; RV64I-NEXT: sb a5, 24(a2)
+; RV64I-NEXT: sb t2, 25(a2)
+; RV64I-NEXT: sb t1, 26(a2)
+; RV64I-NEXT: sb a7, 27(a2)
+; RV64I-NEXT: srli a5, t0, 56
+; RV64I-NEXT: srli a7, t0, 48
+; RV64I-NEXT: srli t1, t0, 40
+; RV64I-NEXT: srli t2, t0, 32
+; RV64I-NEXT: sb t2, 20(a2)
+; RV64I-NEXT: sb t1, 21(a2)
+; RV64I-NEXT: sb a7, 22(a2)
+; RV64I-NEXT: sb a5, 23(a2)
+; RV64I-NEXT: srli a5, t0, 24
+; RV64I-NEXT: srli a7, t0, 16
+; RV64I-NEXT: srli t0, t0, 8
+; RV64I-NEXT: sb a6, 16(a2)
+; RV64I-NEXT: sb t0, 17(a2)
+; RV64I-NEXT: sb a7, 18(a2)
+; RV64I-NEXT: sb a5, 19(a2)
+; RV64I-NEXT: srli a5, a4, 56
+; RV64I-NEXT: srli a6, a4, 48
+; RV64I-NEXT: srli a7, a4, 40
+; RV64I-NEXT: srli t0, a4, 32
+; RV64I-NEXT: sb t0, 4(a2)
+; RV64I-NEXT: sb a7, 5(a2)
+; RV64I-NEXT: sb a6, 6(a2)
+; RV64I-NEXT: sb a5, 7(a2)
+; RV64I-NEXT: srli a5, a4, 24
+; RV64I-NEXT: srli a6, a4, 16
+; RV64I-NEXT: srli a4, a4, 8
; RV64I-NEXT: sb a3, 0(a2)
-; RV64I-NEXT: sb a7, 8(a2)
+; RV64I-NEXT: sb a4, 1(a2)
+; RV64I-NEXT: sb a6, 2(a2)
+; RV64I-NEXT: sb a5, 3(a2)
; RV64I-NEXT: srli a3, a1, 56
-; RV64I-NEXT: sb a3, 31(a2)
-; RV64I-NEXT: srli a3, a1, 48
-; RV64I-NEXT: sb a3, 30(a2)
-; RV64I-NEXT: srli a3, a1, 40
-; RV64I-NEXT: sb a3, 29(a2)
-; RV64I-NEXT: srli a3, a1, 32
-; RV64I-NEXT: sb a3, 28(a2)
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a6, a1, 32
+; RV64I-NEXT: sb a6, 12(a2)
+; RV64I-NEXT: sb a5, 13(a2)
+; RV64I-NEXT: sb a4, 14(a2)
+; RV64I-NEXT: sb a3, 15(a2)
; RV64I-NEXT: srli a3, a1, 24
-; RV64I-NEXT: sb a3, 27(a2)
-; RV64I-NEXT: srli a3, a1, 16
-; RV64I-NEXT: sb a3, 26(a2)
+; RV64I-NEXT: srli a4, a1, 16
; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 25(a2)
-; RV64I-NEXT: srli a1, t0, 56
-; RV64I-NEXT: sb a1, 23(a2)
-; RV64I-NEXT: srli a1, t0, 48
-; RV64I-NEXT: sb a1, 22(a2)
-; RV64I-NEXT: srli a1, t0, 40
-; RV64I-NEXT: sb a1, 21(a2)
-; RV64I-NEXT: srli a1, t0, 32
-; RV64I-NEXT: sb a1, 20(a2)
-; RV64I-NEXT: srli a1, t0, 24
-; RV64I-NEXT: sb a1, 19(a2)
-; RV64I-NEXT: srli a1, t0, 16
-; RV64I-NEXT: sb a1, 18(a2)
-; RV64I-NEXT: srli a1, t0, 8
-; RV64I-NEXT: sb a1, 17(a2)
-; RV64I-NEXT: srli a1, a4, 56
-; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a4, 48
-; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a4, 40
-; RV64I-NEXT: sb a1, 5(a2)
-; RV64I-NEXT: srli a1, a4, 32
-; RV64I-NEXT: sb a1, 4(a2)
-; RV64I-NEXT: srli a1, a4, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a1, a4, 16
-; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a4, a4, 8
-; RV64I-NEXT: sb a4, 1(a2)
-; RV64I-NEXT: srli a1, a0, 56
-; RV64I-NEXT: sb a1, 15(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 14(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 13(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 12(a2)
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: sb a1, 11(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 10(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 9(a2)
+; RV64I-NEXT: sb a0, 8(a2)
+; RV64I-NEXT: sb a1, 9(a2)
+; RV64I-NEXT: sb a4, 10(a2)
+; RV64I-NEXT: sb a3, 11(a2)
; RV64I-NEXT: addi sp, sp, 64
; RV64I-NEXT: ret
;
@@ -4522,6 +4526,7 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sw s0, 76(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 72(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 68(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sw s3, 64(sp) # 4-byte Folded Spill
; RV32I-NEXT: lbu a3, 1(a0)
; RV32I-NEXT: lbu a4, 0(a0)
; RV32I-NEXT: lbu a5, 2(a0)
@@ -4613,124 +4618,125 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: or a1, a1, t3
; RV32I-NEXT: or a1, a1, t4
; RV32I-NEXT: srai a0, a0, 31
-; RV32I-NEXT: sw a0, 60(sp)
-; RV32I-NEXT: sw a0, 56(sp)
-; RV32I-NEXT: sw a0, 52(sp)
; RV32I-NEXT: sw a0, 48(sp)
-; RV32I-NEXT: sw a0, 44(sp)
-; RV32I-NEXT: sw a0, 40(sp)
-; RV32I-NEXT: sw a0, 36(sp)
+; RV32I-NEXT: sw a0, 52(sp)
+; RV32I-NEXT: sw a0, 56(sp)
+; RV32I-NEXT: sw a0, 60(sp)
; RV32I-NEXT: sw a0, 32(sp)
-; RV32I-NEXT: sw t2, 28(sp)
-; RV32I-NEXT: sw t1, 24(sp)
-; RV32I-NEXT: sw t0, 20(sp)
+; RV32I-NEXT: sw a0, 36(sp)
+; RV32I-NEXT: sw a0, 40(sp)
+; RV32I-NEXT: sw a0, 44(sp)
; RV32I-NEXT: sw a7, 16(sp)
-; RV32I-NEXT: sw a6, 12(sp)
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw t0, 20(sp)
+; RV32I-NEXT: sw t1, 24(sp)
+; RV32I-NEXT: sw t2, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a6, 12(sp)
; RV32I-NEXT: andi a0, a1, 28
; RV32I-NEXT: mv a3, sp
-; RV32I-NEXT: add a3, a3, a0
-; RV32I-NEXT: lw a6, 0(a3)
-; RV32I-NEXT: lw a7, 4(a3)
-; RV32I-NEXT: slli a5, a1, 3
-; RV32I-NEXT: lw t0, 8(a3)
-; RV32I-NEXT: lw t1, 12(a3)
-; RV32I-NEXT: srl a4, a7, a5
-; RV32I-NEXT: andi a0, a5, 24
-; RV32I-NEXT: xori t2, a0, 31
-; RV32I-NEXT: slli a0, t0, 1
-; RV32I-NEXT: sll a0, a0, t2
-; RV32I-NEXT: or a0, a4, a0
-; RV32I-NEXT: srl a6, a6, a5
-; RV32I-NEXT: slli a7, a7, 1
-; RV32I-NEXT: sll a1, a7, t2
-; RV32I-NEXT: or a1, a6, a1
-; RV32I-NEXT: srl a7, t1, a5
-; RV32I-NEXT: lw t3, 16(a3)
-; RV32I-NEXT: lw t4, 20(a3)
-; RV32I-NEXT: lw t5, 24(a3)
-; RV32I-NEXT: lw t6, 28(a3)
-; RV32I-NEXT: slli a3, t3, 1
-; RV32I-NEXT: sll a3, a3, t2
-; RV32I-NEXT: or a3, a7, a3
-; RV32I-NEXT: srl t0, t0, a5
-; RV32I-NEXT: slli t1, t1, 1
-; RV32I-NEXT: sll t1, t1, t2
-; RV32I-NEXT: or t1, t0, t1
-; RV32I-NEXT: srl s0, t4, a5
+; RV32I-NEXT: add a6, a3, a0
+; RV32I-NEXT: lw a3, 0(a6)
+; RV32I-NEXT: lw a4, 4(a6)
+; RV32I-NEXT: slli t1, a1, 3
+; RV32I-NEXT: lw a7, 8(a6)
+; RV32I-NEXT: lw t0, 12(a6)
+; RV32I-NEXT: srl a0, a4, t1
+; RV32I-NEXT: andi a1, t1, 24
+; RV32I-NEXT: xori t2, a1, 31
+; RV32I-NEXT: slli a1, a7, 1
+; RV32I-NEXT: sll a1, a1, t2
+; RV32I-NEXT: or a1, a0, a1
+; RV32I-NEXT: srl a3, a3, t1
+; RV32I-NEXT: slli a4, a4, 1
+; RV32I-NEXT: sll a4, a4, t2
+; RV32I-NEXT: or a4, a3, a4
+; RV32I-NEXT: srl a5, t0, t1
+; RV32I-NEXT: lw t3, 16(a6)
+; RV32I-NEXT: lw t4, 20(a6)
+; RV32I-NEXT: lw t5, 24(a6)
+; RV32I-NEXT: lw t6, 28(a6)
+; RV32I-NEXT: slli a6, t3, 1
+; RV32I-NEXT: sll a6, a6, t2
+; RV32I-NEXT: or a6, a5, a6
+; RV32I-NEXT: srl a7, a7, t1
+; RV32I-NEXT: slli t0, t0, 1
+; RV32I-NEXT: sll t0, t0, t2
+; RV32I-NEXT: or t0, a7, t0
+; RV32I-NEXT: srl s0, t4, t1
; RV32I-NEXT: slli s1, t5, 1
; RV32I-NEXT: sll s1, s1, t2
; RV32I-NEXT: or s1, s0, s1
-; RV32I-NEXT: srl t3, t3, a5
+; RV32I-NEXT: srl t3, t3, t1
; RV32I-NEXT: slli t4, t4, 1
; RV32I-NEXT: sll t4, t4, t2
; RV32I-NEXT: or t4, t3, t4
-; RV32I-NEXT: srl t5, t5, a5
+; RV32I-NEXT: srl t5, t5, t1
; RV32I-NEXT: slli s2, t6, 1
; RV32I-NEXT: sll t2, s2, t2
; RV32I-NEXT: or t2, t5, t2
-; RV32I-NEXT: sra a5, t6, a5
+; RV32I-NEXT: sra t1, t6, t1
+; RV32I-NEXT: srli t6, t1, 24
+; RV32I-NEXT: srli s2, t1, 16
+; RV32I-NEXT: srli s3, t1, 8
+; RV32I-NEXT: sb t1, 28(a2)
+; RV32I-NEXT: sb s3, 29(a2)
+; RV32I-NEXT: sb s2, 30(a2)
+; RV32I-NEXT: sb t6, 31(a2)
+; RV32I-NEXT: srli t1, t2, 24
+; RV32I-NEXT: srli t6, t2, 16
+; RV32I-NEXT: srli t2, t2, 8
; RV32I-NEXT: sb t5, 24(a2)
-; RV32I-NEXT: sb a5, 28(a2)
+; RV32I-NEXT: sb t2, 25(a2)
+; RV32I-NEXT: sb t6, 26(a2)
+; RV32I-NEXT: sb t1, 27(a2)
+; RV32I-NEXT: srli t1, t4, 24
+; RV32I-NEXT: srli t2, t4, 16
+; RV32I-NEXT: srli t4, t4, 8
; RV32I-NEXT: sb t3, 16(a2)
-; RV32I-NEXT: sb s0, 20(a2)
-; RV32I-NEXT: sb t0, 8(a2)
-; RV32I-NEXT: sb a7, 12(a2)
-; RV32I-NEXT: sb a6, 0(a2)
-; RV32I-NEXT: sb a4, 4(a2)
-; RV32I-NEXT: srli a4, a5, 24
-; RV32I-NEXT: sb a4, 31(a2)
-; RV32I-NEXT: srli a4, a5, 16
-; RV32I-NEXT: sb a4, 30(a2)
-; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 29(a2)
-; RV32I-NEXT: srli a4, t2, 24
-; RV32I-NEXT: sb a4, 27(a2)
-; RV32I-NEXT: srli a4, t2, 16
-; RV32I-NEXT: sb a4, 26(a2)
-; RV32I-NEXT: srli a4, t2, 8
-; RV32I-NEXT: sb a4, 25(a2)
-; RV32I-NEXT: srli a4, t4, 24
-; RV32I-NEXT: sb a4, 19(a2)
-; RV32I-NEXT: srli a4, t4, 16
-; RV32I-NEXT: sb a4, 18(a2)
-; RV32I-NEXT: srli a4, t4, 8
-; RV32I-NEXT: sb a4, 17(a2)
-; RV32I-NEXT: srli a4, s1, 24
-; RV32I-NEXT: sb a4, 23(a2)
-; RV32I-NEXT: srli a4, s1, 16
-; RV32I-NEXT: sb a4, 22(a2)
+; RV32I-NEXT: sb t4, 17(a2)
+; RV32I-NEXT: sb t2, 18(a2)
+; RV32I-NEXT: sb t1, 19(a2)
+; RV32I-NEXT: srli t1, s1, 24
+; RV32I-NEXT: srli t2, s1, 16
; RV32I-NEXT: srli s1, s1, 8
+; RV32I-NEXT: sb s0, 20(a2)
; RV32I-NEXT: sb s1, 21(a2)
-; RV32I-NEXT: srli a4, t1, 24
-; RV32I-NEXT: sb a4, 11(a2)
-; RV32I-NEXT: srli a4, t1, 16
-; RV32I-NEXT: sb a4, 10(a2)
-; RV32I-NEXT: srli a4, t1, 8
-; RV32I-NEXT: sb a4, 9(a2)
-; RV32I-NEXT: srli a4, a3, 24
-; RV32I-NEXT: sb a4, 15(a2)
-; RV32I-NEXT: srli a4, a3, 16
-; RV32I-NEXT: sb a4, 14(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 13(a2)
+; RV32I-NEXT: sb t2, 22(a2)
+; RV32I-NEXT: sb t1, 23(a2)
+; RV32I-NEXT: srli t1, t0, 24
+; RV32I-NEXT: srli t2, t0, 16
+; RV32I-NEXT: srli t0, t0, 8
+; RV32I-NEXT: sb a7, 8(a2)
+; RV32I-NEXT: sb t0, 9(a2)
+; RV32I-NEXT: sb t2, 10(a2)
+; RV32I-NEXT: sb t1, 11(a2)
+; RV32I-NEXT: srli a7, a6, 24
+; RV32I-NEXT: srli t0, a6, 16
+; RV32I-NEXT: srli a6, a6, 8
+; RV32I-NEXT: sb a5, 12(a2)
+; RV32I-NEXT: sb a6, 13(a2)
+; RV32I-NEXT: sb t0, 14(a2)
+; RV32I-NEXT: sb a7, 15(a2)
+; RV32I-NEXT: srli a5, a4, 24
+; RV32I-NEXT: srli a6, a4, 16
+; RV32I-NEXT: srli a4, a4, 8
+; RV32I-NEXT: sb a3, 0(a2)
+; RV32I-NEXT: sb a4, 1(a2)
+; RV32I-NEXT: sb a6, 2(a2)
+; RV32I-NEXT: sb a5, 3(a2)
; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 3(a2)
-; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 2(a2)
+; RV32I-NEXT: srli a4, a1, 16
; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 1(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 7(a2)
-; RV32I-NEXT: srli a1, a0, 16
-; RV32I-NEXT: sb a1, 6(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 5(a2)
+; RV32I-NEXT: sb a0, 4(a2)
+; RV32I-NEXT: sb a1, 5(a2)
+; RV32I-NEXT: sb a4, 6(a2)
+; RV32I-NEXT: sb a3, 7(a2)
; RV32I-NEXT: lw s0, 76(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 72(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 68(sp) # 4-byte Folded Reload
+; RV32I-NEXT: lw s3, 64(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 80
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
@@ -4856,98 +4862,98 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a7
; RV64I-NEXT: sraiw a0, a0, 31
-; RV64I-NEXT: sd a0, 56(sp)
-; RV64I-NEXT: sd a0, 48(sp)
-; RV64I-NEXT: sd a0, 40(sp)
; RV64I-NEXT: sd a0, 32(sp)
-; RV64I-NEXT: sd a6, 24(sp)
-; RV64I-NEXT: sd a5, 16(sp)
-; RV64I-NEXT: sd a4, 8(sp)
+; RV64I-NEXT: sd a0, 40(sp)
+; RV64I-NEXT: sd a0, 48(sp)
+; RV64I-NEXT: sd a0, 56(sp)
; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: sd a4, 8(sp)
+; RV64I-NEXT: sd a5, 16(sp)
+; RV64I-NEXT: sd a6, 24(sp)
; RV64I-NEXT: slli a0, a1, 2
; RV64I-NEXT: andi a0, a0, 24
; RV64I-NEXT: mv a3, sp
; RV64I-NEXT: add a0, a3, a0
-; RV64I-NEXT: ld a3, 0(a0)
-; RV64I-NEXT: ld a4, 8(a0)
-; RV64I-NEXT: slli a1, a1, 5
-; RV64I-NEXT: ld a5, 16(a0)
-; RV64I-NEXT: ld a6, 24(a0)
-; RV64I-NEXT: srl a7, a4, a1
-; RV64I-NEXT: andi a0, a1, 32
-; RV64I-NEXT: xori t0, a0, 63
-; RV64I-NEXT: slli a0, a5, 1
-; RV64I-NEXT: sll a0, a0, t0
-; RV64I-NEXT: or a0, a7, a0
-; RV64I-NEXT: srl a3, a3, a1
-; RV64I-NEXT: slli a4, a4, 1
-; RV64I-NEXT: sll a4, a4, t0
-; RV64I-NEXT: or a4, a3, a4
-; RV64I-NEXT: srl a5, a5, a1
-; RV64I-NEXT: slli t1, a6, 1
-; RV64I-NEXT: sll t0, t1, t0
-; RV64I-NEXT: or t0, a5, t0
-; RV64I-NEXT: sra a1, a6, a1
+; RV64I-NEXT: ld a4, 0(a0)
+; RV64I-NEXT: ld a5, 8(a0)
+; RV64I-NEXT: slli a6, a1, 5
+; RV64I-NEXT: ld a7, 16(a0)
+; RV64I-NEXT: ld t0, 24(a0)
+; RV64I-NEXT: srl a3, a5, a6
+; RV64I-NEXT: andi a0, a6, 32
+; RV64I-NEXT: xori t1, a0, 63
+; RV64I-NEXT: slli a0, a7, 1
+; RV64I-NEXT: sll a0, a0, t1
+; RV64I-NEXT: or a0, a3, a0
+; RV64I-NEXT: srl t2, a4, a6
+; RV64I-NEXT: slli a5, a5, 1
+; RV64I-NEXT: sll a1, a5, t1
+; RV64I-NEXT: or a1, t2, a1
+; RV64I-NEXT: srl a5, a7, a6
+; RV64I-NEXT: slli a4, t0, 1
+; RV64I-NEXT: sll a4, a4, t1
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: sra a6, t0, a6
+; RV64I-NEXT: srli a7, a5, 24
+; RV64I-NEXT: srli t0, a5, 16
+; RV64I-NEXT: srli t1, a5, 8
; RV64I-NEXT: sb a5, 16(a2)
-; RV64I-NEXT: sb a1, 24(a2)
-; RV64I-NEXT: sb a3, 0(a2)
-; RV64I-NEXT: sb a7, 8(a2)
-; RV64I-NEXT: srli a6, a5, 24
-; RV64I-NEXT: sb a6, 19(a2)
-; RV64I-NEXT: srli a6, a5, 16
-; RV64I-NEXT: sb a6, 18(a2)
-; RV64I-NEXT: srli a5, a5, 8
-; RV64I-NEXT: sb a5, 17(a2)
-; RV64I-NEXT: srli a5, a1, 56
+; RV64I-NEXT: sb t1, 17(a2)
+; RV64I-NEXT: sb t0, 18(a2)
+; RV64I-NEXT: sb a7, 19(a2)
+; RV64I-NEXT: srli a5, a6, 56
+; RV64I-NEXT: srli a7, a6, 48
+; RV64I-NEXT: srli t0, a6, 40
+; RV64I-NEXT: srli t1, a6, 32
+; RV64I-NEXT: sb t1, 28(a2)
+; RV64I-NEXT: sb t0, 29(a2)
+; RV64I-NEXT: sb a7, 30(a2)
; RV64I-NEXT: sb a5, 31(a2)
-; RV64I-NEXT: srli a5, a1, 48
-; RV64I-NEXT: sb a5, 30(a2)
-; RV64I-NEXT: srli a5, a1, 40
-; RV64I-NEXT: sb a5, 29(a2)
-; RV64I-NEXT: srli a5, a1, 32
-; RV64I-NEXT: sb a5, 28(a2)
-; RV64I-NEXT: srli a5, a1, 24
+; RV64I-NEXT: srli a5, a6, 24
+; RV64I-NEXT: srli a7, a6, 16
+; RV64I-NEXT: srli t0, a6, 8
+; RV64I-NEXT: sb a6, 24(a2)
+; RV64I-NEXT: sb t0, 25(a2)
+; RV64I-NEXT: sb a7, 26(a2)
; RV64I-NEXT: sb a5, 27(a2)
-; RV64I-NEXT: srli a5, a1, 16
-; RV64I-NEXT: sb a5, 26(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 25(a2)
-; RV64I-NEXT: srli a1, a3, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a1, a3, 16
-; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a3, a3, 8
-; RV64I-NEXT: sb a3, 1(a2)
-; RV64I-NEXT: srli a1, a7, 24
-; RV64I-NEXT: sb a1, 11(a2)
-; RV64I-NEXT: srli a1, a7, 16
-; RV64I-NEXT: sb a1, 10(a2)
-; RV64I-NEXT: srli a1, a7, 8
-; RV64I-NEXT: sb a1, 9(a2)
-; RV64I-NEXT: srli a1, t0, 56
-; RV64I-NEXT: sb a1, 23(a2)
-; RV64I-NEXT: srli a1, t0, 48
-; RV64I-NEXT: sb a1, 22(a2)
-; RV64I-NEXT: srli a1, t0, 40
-; RV64I-NEXT: sb a1, 21(a2)
-; RV64I-NEXT: srli a1, t0, 32
-; RV64I-NEXT: sb a1, 20(a2)
-; RV64I-NEXT: srli a1, a4, 56
-; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a4, 48
-; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a4, 40
-; RV64I-NEXT: sb a1, 5(a2)
+; RV64I-NEXT: srli a5, t2, 24
+; RV64I-NEXT: srli a6, t2, 16
+; RV64I-NEXT: srli a7, t2, 8
+; RV64I-NEXT: sb t2, 0(a2)
+; RV64I-NEXT: sb a7, 1(a2)
+; RV64I-NEXT: sb a6, 2(a2)
+; RV64I-NEXT: sb a5, 3(a2)
+; RV64I-NEXT: srli a5, a3, 24
+; RV64I-NEXT: srli a6, a3, 16
+; RV64I-NEXT: srli a7, a3, 8
+; RV64I-NEXT: sb a3, 8(a2)
+; RV64I-NEXT: sb a7, 9(a2)
+; RV64I-NEXT: sb a6, 10(a2)
+; RV64I-NEXT: sb a5, 11(a2)
+; RV64I-NEXT: srli a3, a4, 56
+; RV64I-NEXT: srli a5, a4, 48
+; RV64I-NEXT: srli a6, a4, 40
; RV64I-NEXT: srli a4, a4, 32
-; RV64I-NEXT: sb a4, 4(a2)
+; RV64I-NEXT: sb a4, 20(a2)
+; RV64I-NEXT: sb a6, 21(a2)
+; RV64I-NEXT: sb a5, 22(a2)
+; RV64I-NEXT: sb a3, 23(a2)
+; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a1, a1, 32
+; RV64I-NEXT: sb a1, 4(a2)
+; RV64I-NEXT: sb a5, 5(a2)
+; RV64I-NEXT: sb a4, 6(a2)
+; RV64I-NEXT: sb a3, 7(a2)
; RV64I-NEXT: srli a1, a0, 56
-; RV64I-NEXT: sb a1, 15(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 14(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 13(a2)
+; RV64I-NEXT: srli a3, a0, 48
+; RV64I-NEXT: srli a4, a0, 40
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: sb a0, 12(a2)
+; RV64I-NEXT: sb a4, 13(a2)
+; RV64I-NEXT: sb a3, 14(a2)
+; RV64I-NEXT: sb a1, 15(a2)
; RV64I-NEXT: addi sp, sp, 64
; RV64I-NEXT: ret
;
@@ -5036,90 +5042,90 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: or t2, t2, t3
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: srai a0, a0, 31
-; RV32I-NEXT: sw a0, 60(sp)
-; RV32I-NEXT: sw a0, 56(sp)
-; RV32I-NEXT: sw a0, 52(sp)
; RV32I-NEXT: sw a0, 48(sp)
-; RV32I-NEXT: sw a0, 44(sp)
-; RV32I-NEXT: sw a0, 40(sp)
-; RV32I-NEXT: sw a0, 36(sp)
+; RV32I-NEXT: sw a0, 52(sp)
+; RV32I-NEXT: sw a0, 56(sp)
+; RV32I-NEXT: sw a0, 60(sp)
; RV32I-NEXT: sw a0, 32(sp)
-; RV32I-NEXT: sw t2, 28(sp)
-; RV32I-NEXT: sw t1, 24(sp)
-; RV32I-NEXT: sw t0, 20(sp)
+; RV32I-NEXT: sw a0, 36(sp)
+; RV32I-NEXT: sw a0, 40(sp)
+; RV32I-NEXT: sw a0, 44(sp)
; RV32I-NEXT: sw a7, 16(sp)
-; RV32I-NEXT: sw a6, 12(sp)
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw t0, 20(sp)
+; RV32I-NEXT: sw t1, 24(sp)
+; RV32I-NEXT: sw t2, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a6, 12(sp)
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: andi a1, a1, 28
; RV32I-NEXT: mv a0, sp
-; RV32I-NEXT: add a1, a0, a1
-; RV32I-NEXT: lw a3, 16(a1)
-; RV32I-NEXT: lw a4, 20(a1)
-; RV32I-NEXT: lw a5, 24(a1)
-; RV32I-NEXT: lw a6, 28(a1)
-; RV32I-NEXT: lw a7, 0(a1)
-; RV32I-NEXT: lw a0, 4(a1)
-; RV32I-NEXT: lw t0, 8(a1)
-; RV32I-NEXT: lw a1, 12(a1)
-; RV32I-NEXT: sb a5, 24(a2)
-; RV32I-NEXT: sb a6, 28(a2)
-; RV32I-NEXT: sb a3, 16(a2)
-; RV32I-NEXT: sb a4, 20(a2)
-; RV32I-NEXT: sb t0, 8(a2)
-; RV32I-NEXT: sb a1, 12(a2)
-; RV32I-NEXT: sb a7, 0(a2)
-; RV32I-NEXT: sb a0, 4(a2)
-; RV32I-NEXT: srli t1, a5, 24
+; RV32I-NEXT: add a4, a0, a1
+; RV32I-NEXT: lw a5, 16(a4)
+; RV32I-NEXT: lw a6, 20(a4)
+; RV32I-NEXT: lw a7, 24(a4)
+; RV32I-NEXT: lw a1, 0(a4)
+; RV32I-NEXT: lw a0, 4(a4)
+; RV32I-NEXT: lw t0, 8(a4)
+; RV32I-NEXT: lw a3, 12(a4)
+; RV32I-NEXT: lw a4, 28(a4)
+; RV32I-NEXT: srli t1, a7, 24
+; RV32I-NEXT: srli t2, a7, 16
+; RV32I-NEXT: srli t3, a7, 8
+; RV32I-NEXT: sb a7, 24(a2)
+; RV32I-NEXT: sb t3, 25(a2)
+; RV32I-NEXT: sb t2, 26(a2)
; RV32I-NEXT: sb t1, 27(a2)
-; RV32I-NEXT: srli t1, a5, 16
-; RV32I-NEXT: sb t1, 26(a2)
-; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 25(a2)
-; RV32I-NEXT: srli a5, a6, 24
-; RV32I-NEXT: sb a5, 31(a2)
+; RV32I-NEXT: srli a7, a4, 24
+; RV32I-NEXT: srli t1, a4, 16
+; RV32I-NEXT: srli t2, a4, 8
+; RV32I-NEXT: sb a4, 28(a2)
+; RV32I-NEXT: sb t2, 29(a2)
+; RV32I-NEXT: sb t1, 30(a2)
+; RV32I-NEXT: sb a7, 31(a2)
+; RV32I-NEXT: srli a4, a5, 24
+; RV32I-NEXT: srli a7, a5, 16
+; RV32I-NEXT: srli t1, a5, 8
+; RV32I-NEXT: sb a5, 16(a2)
+; RV32I-NEXT: sb t1, 17(a2)
+; RV32I-NEXT: sb a7, 18(a2)
+; RV32I-NEXT: sb a4, 19(a2)
+; RV32I-NEXT: srli a4, a6, 24
; RV32I-NEXT: srli a5, a6, 16
-; RV32I-NEXT: sb a5, 30(a2)
-; RV32I-NEXT: srli a5, a6, 8
-; RV32I-NEXT: sb a5, 29(a2)
-; RV32I-NEXT: srli a5, a3, 24
-; RV32I-NEXT: sb a5, 19(a2)
+; RV32I-NEXT: srli a7, a6, 8
+; RV32I-NEXT: sb a6, 20(a2)
+; RV32I-NEXT: sb a7, 21(a2)
+; RV32I-NEXT: sb a5, 22(a2)
+; RV32I-NEXT: sb a4, 23(a2)
+; RV32I-NEXT: srli a4, t0, 24
+; RV32I-NEXT: srli a5, t0, 16
+; RV32I-NEXT: srli a6, t0, 8
+; RV32I-NEXT: sb t0, 8(a2)
+; RV32I-NEXT: sb a6, 9(a2)
+; RV32I-NEXT: sb a5, 10(a2)
+; RV32I-NEXT: sb a4, 11(a2)
+; RV32I-NEXT: srli a4, a3, 24
; RV32I-NEXT: srli a5, a3, 16
-; RV32I-NEXT: sb a5, 18(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 17(a2)
-; RV32I-NEXT: srli a3, a4, 24
-; RV32I-NEXT: sb a3, 23(a2)
-; RV32I-NEXT: srli a3, a4, 16
-; RV32I-NEXT: sb a3, 22(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 21(a2)
-; RV32I-NEXT: srli a3, t0, 24
-; RV32I-NEXT: sb a3, 11(a2)
-; RV32I-NEXT: srli a3, t0, 16
-; RV32I-NEXT: sb a3, 10(a2)
-; RV32I-NEXT: srli a3, t0, 8
-; RV32I-NEXT: sb a3, 9(a2)
+; RV32I-NEXT: srli a6, a3, 8
+; RV32I-NEXT: sb a3, 12(a2)
+; RV32I-NEXT: sb a6, 13(a2)
+; RV32I-NEXT: sb a5, 14(a2)
+; RV32I-NEXT: sb a4, 15(a2)
; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 15(a2)
-; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 14(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 13(a2)
-; RV32I-NEXT: srli a1, a7, 24
-; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a1, a7, 16
-; RV32I-NEXT: sb a1, 2(a2)
-; RV32I-NEXT: srli a1, a7, 8
-; RV32I-NEXT: sb a1, 1(a2)
+; RV32I-NEXT: srli a4, a1, 16
+; RV32I-NEXT: srli a5, a1, 8
+; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: sb a5, 1(a2)
+; RV32I-NEXT: sb a4, 2(a2)
+; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: srli a1, a0, 24
+; RV32I-NEXT: srli a3, a0, 16
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 4(a2)
+; RV32I-NEXT: sb a4, 5(a2)
+; RV32I-NEXT: sb a3, 6(a2)
; RV32I-NEXT: sb a1, 7(a2)
-; RV32I-NEXT: srli a1, a0, 16
-; RV32I-NEXT: sb a1, 6(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: addi sp, sp, 64
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
@@ -5224,82 +5230,82 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV64I-NEXT: or a6, a7, a6
; RV64I-NEXT: lbu a1, 0(a1)
; RV64I-NEXT: sraiw a0, a0, 31
-; RV64I-NEXT: sd a0, 56(sp)
-; RV64I-NEXT: sd a0, 48(sp)
-; RV64I-NEXT: sd a0, 40(sp)
; RV64I-NEXT: sd a0, 32(sp)
-; RV64I-NEXT: sd a6, 24(sp)
-; RV64I-NEXT: sd a5, 16(sp)
-; RV64I-NEXT: sd a4, 8(sp)
+; RV64I-NEXT: sd a0, 40(sp)
+; RV64I-NEXT: sd a0, 48(sp)
+; RV64I-NEXT: sd a0, 56(sp)
; RV64I-NEXT: sd a3, 0(sp)
+; RV64I-NEXT: sd a4, 8(sp)
+; RV64I-NEXT: sd a5, 16(sp)
+; RV64I-NEXT: sd a6, 24(sp)
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: andi a1, a1, 24
; RV64I-NEXT: mv a0, sp
-; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: ld a1, 16(a0)
-; RV64I-NEXT: ld a3, 24(a0)
-; RV64I-NEXT: ld a4, 0(a0)
-; RV64I-NEXT: ld a0, 8(a0)
-; RV64I-NEXT: sb a1, 16(a2)
-; RV64I-NEXT: sb a3, 24(a2)
-; RV64I-NEXT: sb a4, 0(a2)
-; RV64I-NEXT: sb a0, 8(a2)
-; RV64I-NEXT: srli a5, a1, 56
+; RV64I-NEXT: add a3, a0, a1
+; RV64I-NEXT: ld a4, 16(a3)
+; RV64I-NEXT: ld a0, 8(a3)
+; RV64I-NEXT: ld a1, 0(a3)
+; RV64I-NEXT: ld a3, 24(a3)
+; RV64I-NEXT: srli a5, a4, 56
+; RV64I-NEXT: srli a6, a4, 48
+; RV64I-NEXT: srli a7, a4, 40
+; RV64I-NEXT: srli t0, a4, 32
+; RV64I-NEXT: sb t0, 20(a2)
+; RV64I-NEXT: sb a7, 21(a2)
+; RV64I-NEXT: sb a6, 22(a2)
; RV64I-NEXT: sb a5, 23(a2)
-; RV64I-NEXT: srli a5, a1, 48
-; RV64I-NEXT: sb a5, 22(a2)
-; RV64I-NEXT: srli a5, a1, 40
-; RV64I-NEXT: sb a5, 21(a2)
-; RV64I-NEXT: srli a5, a1, 32
-; RV64I-NEXT: sb a5, 20(a2)
-; RV64I-NEXT: srli a5, a1, 24
+; RV64I-NEXT: srli a5, a4, 24
+; RV64I-NEXT: srli a6, a4, 16
+; RV64I-NEXT: srli a7, a4, 8
+; RV64I-NEXT: sb a4, 16(a2)
+; RV64I-NEXT: sb a7, 17(a2)
+; RV64I-NEXT: sb a6, 18(a2)
; RV64I-NEXT: sb a5, 19(a2)
-; RV64I-NEXT: srli a5, a1, 16
-; RV64I-NEXT: sb a5, 18(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 17(a2)
-; RV64I-NEXT: srli a1, a3, 56
-; RV64I-NEXT: sb a1, 31(a2)
-; RV64I-NEXT: srli a1, a3, 48
-; RV64I-NEXT: sb a1, 30(a2)
-; RV64I-NEXT: srli a1, a3, 40
-; RV64I-NEXT: sb a1, 29(a2)
-; RV64I-NEXT: srli a1, a3, 32
-; RV64I-NEXT: sb a1, 28(a2)
-; RV64I-NEXT: srli a1, a3, 24
-; RV64I-NEXT: sb a1, 27(a2)
-; RV64I-NEXT: srli a1, a3, 16
-; RV64I-NEXT: sb a1, 26(a2)
-; RV64I-NEXT: srli a3, a3, 8
-; RV64I-NEXT: sb a3, 25(a2)
-; RV64I-NEXT: srli a1, a4, 56
-; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a4, 48
-; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a4, 40
-; RV64I-NEXT: sb a1, 5(a2)
-; RV64I-NEXT: srli a1, a4, 32
-; RV64I-NEXT: sb a1, 4(a2)
-; RV64I-NEXT: srli a1, a4, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a1, a4, 16
-; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a4, a4, 8
-; RV64I-NEXT: sb a4, 1(a2)
+; RV64I-NEXT: srli a4, a3, 56
+; RV64I-NEXT: srli a5, a3, 48
+; RV64I-NEXT: srli a6, a3, 40
+; RV64I-NEXT: srli a7, a3, 32
+; RV64I-NEXT: sb a7, 28(a2)
+; RV64I-NEXT: sb a6, 29(a2)
+; RV64I-NEXT: sb a5, 30(a2)
+; RV64I-NEXT: sb a4, 31(a2)
+; RV64I-NEXT: srli a4, a3, 24
+; RV64I-NEXT: srli a5, a3, 16
+; RV64I-NEXT: srli a6, a3, 8
+; RV64I-NEXT: sb a3, 24(a2)
+; RV64I-NEXT: sb a6, 25(a2)
+; RV64I-NEXT: sb a5, 26(a2)
+; RV64I-NEXT: sb a4, 27(a2)
+; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a6, a1, 32
+; RV64I-NEXT: sb a6, 4(a2)
+; RV64I-NEXT: sb a5, 5(a2)
+; RV64I-NEXT: sb a4, 6(a2)
+; RV64I-NEXT: sb a3, 7(a2)
+; RV64I-NEXT: srli a3, a1, 24
+; RV64I-NEXT: srli a4, a1, 16
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: sb a1, 0(a2)
+; RV64I-NEXT: sb a5, 1(a2)
+; RV64I-NEXT: sb a4, 2(a2)
+; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: srli a1, a0, 56
+; RV64I-NEXT: srli a3, a0, 48
+; RV64I-NEXT: srli a4, a0, 40
+; RV64I-NEXT: srli a5, a0, 32
+; RV64I-NEXT: sb a5, 12(a2)
+; RV64I-NEXT: sb a4, 13(a2)
+; RV64I-NEXT: sb a3, 14(a2)
; RV64I-NEXT: sb a1, 15(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 14(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 13(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 12(a2)
; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 8(a2)
+; RV64I-NEXT: sb a4, 9(a2)
+; RV64I-NEXT: sb a3, 10(a2)
; RV64I-NEXT: sb a1, 11(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 10(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 9(a2)
; RV64I-NEXT: addi sp, sp, 64
; RV64I-NEXT: ret
;
@@ -5388,90 +5394,90 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: or t2, t2, t3
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: srai a0, a0, 31
-; RV32I-NEXT: sw a0, 60(sp)
-; RV32I-NEXT: sw a0, 56(sp)
-; RV32I-NEXT: sw a0, 52(sp)
; RV32I-NEXT: sw a0, 48(sp)
-; RV32I-NEXT: sw a0, 44(sp)
-; RV32I-NEXT: sw a0, 40(sp)
-; RV32I-NEXT: sw a0, 36(sp)
+; RV32I-NEXT: sw a0, 52(sp)
+; RV32I-NEXT: sw a0, 56(sp)
+; RV32I-NEXT: sw a0, 60(sp)
; RV32I-NEXT: sw a0, 32(sp)
-; RV32I-NEXT: sw t2, 28(sp)
-; RV32I-NEXT: sw t1, 24(sp)
-; RV32I-NEXT: sw t0, 20(sp)
+; RV32I-NEXT: sw a0, 36(sp)
+; RV32I-NEXT: sw a0, 40(sp)
+; RV32I-NEXT: sw a0, 44(sp)
; RV32I-NEXT: sw a7, 16(sp)
-; RV32I-NEXT: sw a6, 12(sp)
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw t0, 20(sp)
+; RV32I-NEXT: sw t1, 24(sp)
+; RV32I-NEXT: sw t2, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a6, 12(sp)
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: andi a1, a1, 24
; RV32I-NEXT: mv a0, sp
-; RV32I-NEXT: add a1, a0, a1
-; RV32I-NEXT: lw a3, 16(a1)
-; RV32I-NEXT: lw a4, 20(a1)
-; RV32I-NEXT: lw a5, 24(a1)
-; RV32I-NEXT: lw a6, 28(a1)
-; RV32I-NEXT: lw a7, 0(a1)
-; RV32I-NEXT: lw a0, 4(a1)
-; RV32I-NEXT: lw t0, 8(a1)
-; RV32I-NEXT: lw a1, 12(a1)
-; RV32I-NEXT: sb a5, 24(a2)
-; RV32I-NEXT: sb a6, 28(a2)
-; RV32I-NEXT: sb a3, 16(a2)
-; RV32I-NEXT: sb a4, 20(a2)
-; RV32I-NEXT: sb t0, 8(a2)
-; RV32I-NEXT: sb a1, 12(a2)
-; RV32I-NEXT: sb a7, 0(a2)
-; RV32I-NEXT: sb a0, 4(a2)
-; RV32I-NEXT: srli t1, a5, 24
+; RV32I-NEXT: add a4, a0, a1
+; RV32I-NEXT: lw a5, 16(a4)
+; RV32I-NEXT: lw a6, 20(a4)
+; RV32I-NEXT: lw a7, 24(a4)
+; RV32I-NEXT: lw a1, 0(a4)
+; RV32I-NEXT: lw a0, 4(a4)
+; RV32I-NEXT: lw t0, 8(a4)
+; RV32I-NEXT: lw a3, 12(a4)
+; RV32I-NEXT: lw a4, 28(a4)
+; RV32I-NEXT: srli t1, a7, 24
+; RV32I-NEXT: srli t2, a7, 16
+; RV32I-NEXT: srli t3, a7, 8
+; RV32I-NEXT: sb a7, 24(a2)
+; RV32I-NEXT: sb t3, 25(a2)
+; RV32I-NEXT: sb t2, 26(a2)
; RV32I-NEXT: sb t1, 27(a2)
-; RV32I-NEXT: srli t1, a5, 16
-; RV32I-NEXT: sb t1, 26(a2)
-; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 25(a2)
-; RV32I-NEXT: srli a5, a6, 24
-; RV32I-NEXT: sb a5, 31(a2)
+; RV32I-NEXT: srli a7, a4, 24
+; RV32I-NEXT: srli t1, a4, 16
+; RV32I-NEXT: srli t2, a4, 8
+; RV32I-NEXT: sb a4, 28(a2)
+; RV32I-NEXT: sb t2, 29(a2)
+; RV32I-NEXT: sb t1, 30(a2)
+; RV32I-NEXT: sb a7, 31(a2)
+; RV32I-NEXT: srli a4, a5, 24
+; RV32I-NEXT: srli a7, a5, 16
+; RV32I-NEXT: srli t1, a5, 8
+; RV32I-NEXT: sb a5, 16(a2)
+; RV32I-NEXT: sb t1, 17(a2)
+; RV32I-NEXT: sb a7, 18(a2)
+; RV32I-NEXT: sb a4, 19(a2)
+; RV32I-NEXT: srli a4, a6, 24
; RV32I-NEXT: srli a5, a6, 16
-; RV32I-NEXT: sb a5, 30(a2)
-; RV32I-NEXT: srli a5, a6, 8
-; RV32I-NEXT: sb a5, 29(a2)
-; RV32I-NEXT: srli a5, a3, 24
-; RV32I-NEXT: sb a5, 19(a2)
+; RV32I-NEXT: srli a7, a6, 8
+; RV32I-NEXT: sb a6, 20(a2)
+; RV32I-NEXT: sb a7, 21(a2)
+; RV32I-NEXT: sb a5, 22(a2)
+; RV32I-NEXT: sb a4, 23(a2)
+; RV32I-NEXT: srli a4, t0, 24
+; RV32I-NEXT: srli a5, t0, 16
+; RV32I-NEXT: srli a6, t0, 8
+; RV32I-NEXT: sb t0, 8(a2)
+; RV32I-NEXT: sb a6, 9(a2)
+; RV32I-NEXT: sb a5, 10(a2)
+; RV32I-NEXT: sb a4, 11(a2)
+; RV32I-NEXT: srli a4, a3, 24
; RV32I-NEXT: srli a5, a3, 16
-; RV32I-NEXT: sb a5, 18(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 17(a2)
-; RV32I-NEXT: srli a3, a4, 24
-; RV32I-NEXT: sb a3, 23(a2)
-; RV32I-NEXT: srli a3, a4, 16
-; RV32I-NEXT: sb a3, 22(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 21(a2)
-; RV32I-NEXT: srli a3, t0, 24
-; RV32I-NEXT: sb a3, 11(a2)
-; RV32I-NEXT: srli a3, t0, 16
-; RV32I-NEXT: sb a3, 10(a2)
-; RV32I-NEXT: srli a3, t0, 8
-; RV32I-NEXT: sb a3, 9(a2)
+; RV32I-NEXT: srli a6, a3, 8
+; RV32I-NEXT: sb a3, 12(a2)
+; RV32I-NEXT: sb a6, 13(a2)
+; RV32I-NEXT: sb a5, 14(a2)
+; RV32I-NEXT: sb a4, 15(a2)
; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 15(a2)
-; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 14(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 13(a2)
-; RV32I-NEXT: srli a1, a7, 24
-; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a1, a7, 16
-; RV32I-NEXT: sb a1, 2(a2)
-; RV32I-NEXT: srli a1, a7, 8
-; RV32I-NEXT: sb a1, 1(a2)
+; RV32I-NEXT: srli a4, a1, 16
+; RV32I-NEXT: srli a5, a1, 8
+; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: sb a5, 1(a2)
+; RV32I-NEXT: sb a4, 2(a2)
+; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: srli a1, a0, 24
+; RV32I-NEXT: srli a3, a0, 16
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 4(a2)
+; RV32I-NEXT: sb a4, 5(a2)
+; RV32I-NEXT: sb a3, 6(a2)
; RV32I-NEXT: sb a1, 7(a2)
-; RV32I-NEXT: srli a1, a0, 16
-; RV32I-NEXT: sb a1, 6(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: addi sp, sp, 64
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll
index 190d67a5d8c118..d36c660b3b1421 100644
--- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll
+++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll
@@ -17,13 +17,13 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: or a0, a0, a5
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: srlw a0, a0, a1
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
+; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: lshr_4bytes:
@@ -49,13 +49,13 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: or a1, a1, a3
; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: srl a0, a0, a1
-; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb a1, 2(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 1(a2)
+; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: ret
%src = load i32, ptr %src.ptr, align 1
%bitOff = load i32, ptr %bitOff.ptr, align 1
@@ -78,13 +78,13 @@ define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: or a0, a0, a5
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: sllw a0, a0, a1
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
+; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: shl_4bytes:
@@ -110,13 +110,13 @@ define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: or a1, a1, a3
; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: sll a0, a0, a1
-; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb a1, 2(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 1(a2)
+; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: ret
%src = load i32, ptr %src.ptr, align 1
%bitOff = load i32, ptr %bitOff.ptr, align 1
@@ -139,13 +139,13 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: or a0, a0, a5
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: sraw a0, a0, a1
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
+; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: ashr_4bytes:
@@ -171,13 +171,13 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: or a1, a1, a3
; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: sra a0, a0, a1
-; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb a1, 2(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 1(a2)
+; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: ret
%src = load i32, ptr %src.ptr, align 1
%bitOff = load i32, ptr %bitOff.ptr, align 1
@@ -234,21 +234,21 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a3
; RV64I-NEXT: srl a0, a0, a1
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 48
+; RV64I-NEXT: srli a3, a0, 56
+; RV64I-NEXT: srli a4, a0, 32
+; RV64I-NEXT: srli a5, a0, 40
+; RV64I-NEXT: sb a4, 4(a2)
+; RV64I-NEXT: sb a5, 5(a2)
; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a0, 56
-; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 4(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 5(a2)
+; RV64I-NEXT: sb a3, 7(a2)
; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
+; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: lshr_8bytes:
@@ -298,20 +298,20 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: .LBB3_3:
; RV32I-NEXT: srai a4, a4, 31
; RV32I-NEXT: and a1, a4, a1
-; RV32I-NEXT: sb a1, 4(a2)
; RV32I-NEXT: srli a3, a1, 16
+; RV32I-NEXT: srli a4, a1, 24
+; RV32I-NEXT: srli a5, a1, 8
+; RV32I-NEXT: sb a1, 4(a2)
+; RV32I-NEXT: sb a5, 5(a2)
; RV32I-NEXT: sb a3, 6(a2)
-; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 7(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 5(a2)
-; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: sb a4, 7(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb a1, 2(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 1(a2)
+; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: ret
%src = load i64, ptr %src.ptr, align 1
%bitOff = load i64, ptr %bitOff.ptr, align 1
@@ -367,21 +367,21 @@ define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a3
; RV64I-NEXT: sll a0, a0, a1
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 48
+; RV64I-NEXT: srli a3, a0, 56
+; RV64I-NEXT: srli a4, a0, 32
+; RV64I-NEXT: srli a5, a0, 40
+; RV64I-NEXT: sb a4, 4(a2)
+; RV64I-NEXT: sb a5, 5(a2)
; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a0, 56
-; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 4(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 5(a2)
+; RV64I-NEXT: sb a3, 7(a2)
; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
+; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: shl_8bytes:
@@ -431,20 +431,20 @@ define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: .LBB4_3:
; RV32I-NEXT: srai a4, a4, 31
; RV32I-NEXT: and a1, a4, a1
-; RV32I-NEXT: sb a1, 0(a2)
-; RV32I-NEXT: sb a0, 4(a2)
; RV32I-NEXT: srli a3, a1, 16
+; RV32I-NEXT: srli a4, a1, 24
+; RV32I-NEXT: srli a5, a1, 8
+; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: sb a5, 1(a2)
; RV32I-NEXT: sb a3, 2(a2)
-; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 3(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 1(a2)
+; RV32I-NEXT: sb a4, 3(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 4(a2)
+; RV32I-NEXT: sb a4, 5(a2)
; RV32I-NEXT: sb a1, 6(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 7(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 5(a2)
+; RV32I-NEXT: sb a3, 7(a2)
; RV32I-NEXT: ret
%src = load i64, ptr %src.ptr, align 1
%bitOff = load i64, ptr %bitOff.ptr, align 1
@@ -500,21 +500,21 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a3
; RV64I-NEXT: sra a0, a0, a1
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 48
+; RV64I-NEXT: srli a3, a0, 56
+; RV64I-NEXT: srli a4, a0, 32
+; RV64I-NEXT: srli a5, a0, 40
+; RV64I-NEXT: sb a4, 4(a2)
+; RV64I-NEXT: sb a5, 5(a2)
; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a0, 56
-; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 4(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 5(a2)
+; RV64I-NEXT: sb a3, 7(a2)
; RV64I-NEXT: srli a1, a0, 16
+; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a1, a0, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
+; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: ashr_8bytes:
@@ -564,20 +564,20 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sll a3, a3, a4
; RV32I-NEXT: or a0, a0, a3
; RV32I-NEXT: .LBB5_3:
-; RV32I-NEXT: sb a1, 4(a2)
; RV32I-NEXT: srli a3, a1, 16
+; RV32I-NEXT: srli a4, a1, 24
+; RV32I-NEXT: srli a5, a1, 8
+; RV32I-NEXT: sb a1, 4(a2)
+; RV32I-NEXT: sb a5, 5(a2)
; RV32I-NEXT: sb a3, 6(a2)
-; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 7(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 5(a2)
-; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: sb a4, 7(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a3, a0, 24
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb a1, 2(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 3(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 1(a2)
+; RV32I-NEXT: sb a3, 3(a2)
; RV32I-NEXT: ret
%src = load i64, ptr %src.ptr, align 1
%bitOff = load i64, ptr %bitOff.ptr, align 1
@@ -670,36 +670,36 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: .LBB6_3:
; RV64I-NEXT: srai a4, a4, 63
; RV64I-NEXT: and a1, a4, a1
-; RV64I-NEXT: sb a1, 8(a2)
; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a6, a1, 32
+; RV64I-NEXT: sb a6, 12(a2)
+; RV64I-NEXT: sb a5, 13(a2)
+; RV64I-NEXT: sb a4, 14(a2)
; RV64I-NEXT: sb a3, 15(a2)
-; RV64I-NEXT: srli a3, a1, 48
-; RV64I-NEXT: sb a3, 14(a2)
-; RV64I-NEXT: srli a3, a1, 40
-; RV64I-NEXT: sb a3, 13(a2)
-; RV64I-NEXT: srli a3, a1, 32
-; RV64I-NEXT: sb a3, 12(a2)
; RV64I-NEXT: srli a3, a1, 24
+; RV64I-NEXT: srli a4, a1, 16
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: sb a1, 8(a2)
+; RV64I-NEXT: sb a5, 9(a2)
+; RV64I-NEXT: sb a4, 10(a2)
; RV64I-NEXT: sb a3, 11(a2)
-; RV64I-NEXT: srli a3, a1, 16
-; RV64I-NEXT: sb a3, 10(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 9(a2)
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 56
+; RV64I-NEXT: srli a3, a0, 48
+; RV64I-NEXT: srli a4, a0, 40
+; RV64I-NEXT: srli a5, a0, 32
+; RV64I-NEXT: sb a5, 4(a2)
+; RV64I-NEXT: sb a4, 5(a2)
+; RV64I-NEXT: sb a3, 6(a2)
; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 5(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 4(a2)
; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
+; RV64I-NEXT: sb a3, 2(a2)
; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: lshr_16bytes:
@@ -755,14 +755,14 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, a7
; RV32I-NEXT: or a1, a1, a6
-; RV32I-NEXT: sw zero, 28(sp)
-; RV32I-NEXT: sw zero, 24(sp)
-; RV32I-NEXT: sw zero, 20(sp)
; RV32I-NEXT: sw zero, 16(sp)
-; RV32I-NEXT: sw a0, 12(sp)
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw zero, 20(sp)
+; RV32I-NEXT: sw zero, 24(sp)
+; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a0, 12(sp)
; RV32I-NEXT: srli a0, a1, 3
; RV32I-NEXT: andi a0, a0, 12
; RV32I-NEXT: mv a3, sp
@@ -786,34 +786,34 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sll a5, a5, a7
; RV32I-NEXT: or a4, a4, a5
; RV32I-NEXT: srl a0, a0, a1
-; RV32I-NEXT: sb a0, 12(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a5, a0, 24
+; RV32I-NEXT: srli a7, a0, 8
+; RV32I-NEXT: sb a0, 12(a2)
+; RV32I-NEXT: sb a7, 13(a2)
; RV32I-NEXT: sb a1, 14(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 15(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 13(a2)
-; RV32I-NEXT: sb a4, 8(a2)
-; RV32I-NEXT: sb a3, 0(a2)
-; RV32I-NEXT: sb a6, 4(a2)
+; RV32I-NEXT: sb a5, 15(a2)
; RV32I-NEXT: srli a0, a4, 16
+; RV32I-NEXT: srli a1, a4, 24
+; RV32I-NEXT: srli a5, a4, 8
+; RV32I-NEXT: sb a4, 8(a2)
+; RV32I-NEXT: sb a5, 9(a2)
; RV32I-NEXT: sb a0, 10(a2)
-; RV32I-NEXT: srli a0, a4, 24
-; RV32I-NEXT: sb a0, 11(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 9(a2)
+; RV32I-NEXT: sb a1, 11(a2)
; RV32I-NEXT: srli a0, a3, 16
+; RV32I-NEXT: srli a1, a3, 24
+; RV32I-NEXT: srli a4, a3, 8
+; RV32I-NEXT: sb a3, 0(a2)
+; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb a0, 2(a2)
-; RV32I-NEXT: srli a0, a3, 24
-; RV32I-NEXT: sb a0, 3(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 1(a2)
+; RV32I-NEXT: sb a1, 3(a2)
; RV32I-NEXT: srli a0, a6, 16
+; RV32I-NEXT: srli a1, a6, 24
+; RV32I-NEXT: srli a3, a6, 8
+; RV32I-NEXT: sb a6, 4(a2)
+; RV32I-NEXT: sb a3, 5(a2)
; RV32I-NEXT: sb a0, 6(a2)
-; RV32I-NEXT: srli a0, a6, 24
-; RV32I-NEXT: sb a0, 7(a2)
-; RV32I-NEXT: srli a0, a6, 8
-; RV32I-NEXT: sb a0, 5(a2)
+; RV32I-NEXT: sb a1, 7(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
@@ -906,36 +906,36 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: .LBB7_3:
; RV64I-NEXT: srai a4, a4, 63
; RV64I-NEXT: and a1, a4, a1
-; RV64I-NEXT: sb a1, 0(a2)
-; RV64I-NEXT: sb a0, 8(a2)
; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a6, a1, 32
+; RV64I-NEXT: sb a6, 4(a2)
+; RV64I-NEXT: sb a5, 5(a2)
+; RV64I-NEXT: sb a4, 6(a2)
; RV64I-NEXT: sb a3, 7(a2)
-; RV64I-NEXT: srli a3, a1, 48
-; RV64I-NEXT: sb a3, 6(a2)
-; RV64I-NEXT: srli a3, a1, 40
-; RV64I-NEXT: sb a3, 5(a2)
-; RV64I-NEXT: srli a3, a1, 32
-; RV64I-NEXT: sb a3, 4(a2)
; RV64I-NEXT: srli a3, a1, 24
+; RV64I-NEXT: srli a4, a1, 16
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: sb a1, 0(a2)
+; RV64I-NEXT: sb a5, 1(a2)
+; RV64I-NEXT: sb a4, 2(a2)
; RV64I-NEXT: sb a3, 3(a2)
-; RV64I-NEXT: srli a3, a1, 16
-; RV64I-NEXT: sb a3, 2(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 1(a2)
; RV64I-NEXT: srli a1, a0, 56
+; RV64I-NEXT: srli a3, a0, 48
+; RV64I-NEXT: srli a4, a0, 40
+; RV64I-NEXT: srli a5, a0, 32
+; RV64I-NEXT: sb a5, 12(a2)
+; RV64I-NEXT: sb a4, 13(a2)
+; RV64I-NEXT: sb a3, 14(a2)
; RV64I-NEXT: sb a1, 15(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 14(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 13(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 12(a2)
; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 8(a2)
+; RV64I-NEXT: sb a4, 9(a2)
+; RV64I-NEXT: sb a3, 10(a2)
; RV64I-NEXT: sb a1, 11(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 10(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 9(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: shl_16bytes:
@@ -991,14 +991,14 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, a7
; RV32I-NEXT: or a1, a1, a6
-; RV32I-NEXT: sw zero, 12(sp)
-; RV32I-NEXT: sw zero, 8(sp)
-; RV32I-NEXT: sw zero, 4(sp)
; RV32I-NEXT: sw zero, 0(sp)
-; RV32I-NEXT: sw a0, 28(sp)
-; RV32I-NEXT: sw a5, 24(sp)
-; RV32I-NEXT: sw a4, 20(sp)
+; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 8(sp)
+; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a4, 20(sp)
+; RV32I-NEXT: sw a5, 24(sp)
+; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: srli a0, a1, 3
; RV32I-NEXT: andi a0, a0, 12
; RV32I-NEXT: addi a3, sp, 16
@@ -1022,34 +1022,34 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: srl a0, a0, a7
; RV32I-NEXT: or a0, a5, a0
; RV32I-NEXT: sll a1, a4, a1
-; RV32I-NEXT: sb a1, 0(a2)
; RV32I-NEXT: srli a4, a1, 16
+; RV32I-NEXT: srli a5, a1, 24
+; RV32I-NEXT: srli a7, a1, 8
+; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: sb a7, 1(a2)
; RV32I-NEXT: sb a4, 2(a2)
-; RV32I-NEXT: srli a4, a1, 24
-; RV32I-NEXT: sb a4, 3(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 1(a2)
-; RV32I-NEXT: sb a0, 8(a2)
-; RV32I-NEXT: sb a3, 12(a2)
-; RV32I-NEXT: sb a6, 4(a2)
+; RV32I-NEXT: sb a5, 3(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a4, a0, 24
+; RV32I-NEXT: srli a5, a0, 8
+; RV32I-NEXT: sb a0, 8(a2)
+; RV32I-NEXT: sb a5, 9(a2)
; RV32I-NEXT: sb a1, 10(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 11(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 9(a2)
+; RV32I-NEXT: sb a4, 11(a2)
; RV32I-NEXT: srli a0, a3, 16
+; RV32I-NEXT: srli a1, a3, 24
+; RV32I-NEXT: srli a4, a3, 8
+; RV32I-NEXT: sb a3, 12(a2)
+; RV32I-NEXT: sb a4, 13(a2)
; RV32I-NEXT: sb a0, 14(a2)
-; RV32I-NEXT: srli a0, a3, 24
-; RV32I-NEXT: sb a0, 15(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 13(a2)
+; RV32I-NEXT: sb a1, 15(a2)
; RV32I-NEXT: srli a0, a6, 16
+; RV32I-NEXT: srli a1, a6, 24
+; RV32I-NEXT: srli a3, a6, 8
+; RV32I-NEXT: sb a6, 4(a2)
+; RV32I-NEXT: sb a3, 5(a2)
; RV32I-NEXT: sb a0, 6(a2)
-; RV32I-NEXT: srli a0, a6, 24
-; RV32I-NEXT: sb a0, 7(a2)
-; RV32I-NEXT: srli a0, a6, 8
-; RV32I-NEXT: sb a0, 5(a2)
+; RV32I-NEXT: sb a1, 7(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
@@ -1142,36 +1142,36 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: sll a3, a3, a4
; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: .LBB8_3:
-; RV64I-NEXT: sb a1, 8(a2)
; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a6, a1, 32
+; RV64I-NEXT: sb a6, 12(a2)
+; RV64I-NEXT: sb a5, 13(a2)
+; RV64I-NEXT: sb a4, 14(a2)
; RV64I-NEXT: sb a3, 15(a2)
-; RV64I-NEXT: srli a3, a1, 48
-; RV64I-NEXT: sb a3, 14(a2)
-; RV64I-NEXT: srli a3, a1, 40
-; RV64I-NEXT: sb a3, 13(a2)
-; RV64I-NEXT: srli a3, a1, 32
-; RV64I-NEXT: sb a3, 12(a2)
; RV64I-NEXT: srli a3, a1, 24
+; RV64I-NEXT: srli a4, a1, 16
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: sb a1, 8(a2)
+; RV64I-NEXT: sb a5, 9(a2)
+; RV64I-NEXT: sb a4, 10(a2)
; RV64I-NEXT: sb a3, 11(a2)
-; RV64I-NEXT: srli a3, a1, 16
-; RV64I-NEXT: sb a3, 10(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 9(a2)
-; RV64I-NEXT: sb a0, 0(a2)
; RV64I-NEXT: srli a1, a0, 56
+; RV64I-NEXT: srli a3, a0, 48
+; RV64I-NEXT: srli a4, a0, 40
+; RV64I-NEXT: srli a5, a0, 32
+; RV64I-NEXT: sb a5, 4(a2)
+; RV64I-NEXT: sb a4, 5(a2)
+; RV64I-NEXT: sb a3, 6(a2)
; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 5(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 4(a2)
; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 0(a2)
+; RV64I-NEXT: sb a4, 1(a2)
+; RV64I-NEXT: sb a3, 2(a2)
; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 1(a2)
; RV64I-NEXT: ret
;
; RV32I-LABEL: ashr_16bytes:
@@ -1228,14 +1228,14 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: or a1, a1, a7
; RV32I-NEXT: or a1, a1, t0
; RV32I-NEXT: srai a0, a0, 31
-; RV32I-NEXT: sw a0, 28(sp)
-; RV32I-NEXT: sw a0, 24(sp)
-; RV32I-NEXT: sw a0, 20(sp)
; RV32I-NEXT: sw a0, 16(sp)
-; RV32I-NEXT: sw a6, 12(sp)
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a0, 20(sp)
+; RV32I-NEXT: sw a0, 24(sp)
+; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a6, 12(sp)
; RV32I-NEXT: srli a0, a1, 3
; RV32I-NEXT: andi a0, a0, 12
; RV32I-NEXT: mv a3, sp
@@ -1259,34 +1259,34 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sll a5, a5, a7
; RV32I-NEXT: or a4, a4, a5
; RV32I-NEXT: sra a0, a0, a1
-; RV32I-NEXT: sb a0, 12(a2)
; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: srli a5, a0, 24
+; RV32I-NEXT: srli a7, a0, 8
+; RV32I-NEXT: sb a0, 12(a2)
+; RV32I-NEXT: sb a7, 13(a2)
; RV32I-NEXT: sb a1, 14(a2)
-; RV32I-NEXT: srli a1, a0, 24
-; RV32I-NEXT: sb a1, 15(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 13(a2)
-; RV32I-NEXT: sb a4, 8(a2)
-; RV32I-NEXT: sb a3, 0(a2)
-; RV32I-NEXT: sb a6, 4(a2)
+; RV32I-NEXT: sb a5, 15(a2)
; RV32I-NEXT: srli a0, a4, 16
+; RV32I-NEXT: srli a1, a4, 24
+; RV32I-NEXT: srli a5, a4, 8
+; RV32I-NEXT: sb a4, 8(a2)
+; RV32I-NEXT: sb a5, 9(a2)
; RV32I-NEXT: sb a0, 10(a2)
-; RV32I-NEXT: srli a0, a4, 24
-; RV32I-NEXT: sb a0, 11(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 9(a2)
+; RV32I-NEXT: sb a1, 11(a2)
; RV32I-NEXT: srli a0, a3, 16
+; RV32I-NEXT: srli a1, a3, 24
+; RV32I-NEXT: srli a4, a3, 8
+; RV32I-NEXT: sb a3, 0(a2)
+; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: sb a0, 2(a2)
-; RV32I-NEXT: srli a0, a3, 24
-; RV32I-NEXT: sb a0, 3(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 1(a2)
+; RV32I-NEXT: sb a1, 3(a2)
; RV32I-NEXT: srli a0, a6, 16
+; RV32I-NEXT: srli a1, a6, 24
+; RV32I-NEXT: srli a3, a6, 8
+; RV32I-NEXT: sb a6, 4(a2)
+; RV32I-NEXT: sb a3, 5(a2)
; RV32I-NEXT: sb a0, 6(a2)
-; RV32I-NEXT: srli a0, a6, 24
-; RV32I-NEXT: sb a0, 7(a2)
-; RV32I-NEXT: srli a0, a6, 8
-; RV32I-NEXT: sb a0, 5(a2)
+; RV32I-NEXT: sb a1, 7(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
@@ -1409,98 +1409,98 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: or a1, a1, a7
; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a1, a1, 32
-; RV64I-NEXT: or a1, a1, a6
-; RV64I-NEXT: sd zero, 56(sp)
-; RV64I-NEXT: sd zero, 48(sp)
-; RV64I-NEXT: sd zero, 40(sp)
+; RV64I-NEXT: or a6, a1, a6
; RV64I-NEXT: sd zero, 32(sp)
-; RV64I-NEXT: sd a0, 24(sp)
-; RV64I-NEXT: sd a5, 16(sp)
-; RV64I-NEXT: sd a4, 8(sp)
+; RV64I-NEXT: sd zero, 40(sp)
+; RV64I-NEXT: sd zero, 48(sp)
+; RV64I-NEXT: sd zero, 56(sp)
; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: srli a0, a1, 3
+; RV64I-NEXT: sd a4, 8(sp)
+; RV64I-NEXT: sd a5, 16(sp)
+; RV64I-NEXT: sd a0, 24(sp)
+; RV64I-NEXT: srli a0, a6, 3
; RV64I-NEXT: andi a0, a0, 24
-; RV64I-NEXT: mv a3, sp
-; RV64I-NEXT: add a0, a3, a0
-; RV64I-NEXT: ld a3, 8(a0)
-; RV64I-NEXT: ld a4, 0(a0)
-; RV64I-NEXT: ld a5, 16(a0)
-; RV64I-NEXT: ld a6, 24(a0)
-; RV64I-NEXT: srl a0, a3, a1
-; RV64I-NEXT: andi a7, a1, 63
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: ld a1, 8(a0)
+; RV64I-NEXT: ld a3, 0(a0)
+; RV64I-NEXT: ld a4, 16(a0)
+; RV64I-NEXT: ld a5, 24(a0)
+; RV64I-NEXT: srl a0, a1, a6
+; RV64I-NEXT: andi a7, a6, 63
; RV64I-NEXT: xori a7, a7, 63
-; RV64I-NEXT: slli t0, a5, 1
+; RV64I-NEXT: slli t0, a4, 1
; RV64I-NEXT: sll t0, t0, a7
; RV64I-NEXT: or a0, a0, t0
-; RV64I-NEXT: srl a4, a4, a1
-; RV64I-NEXT: slli a3, a3, 1
-; RV64I-NEXT: sll a3, a3, a7
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: srl a4, a5, a1
-; RV64I-NEXT: slli a5, a6, 1
-; RV64I-NEXT: sll a5, a5, a7
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: srl a1, a6, a1
-; RV64I-NEXT: sb a1, 24(a2)
-; RV64I-NEXT: srli a5, a1, 56
+; RV64I-NEXT: srl a3, a3, a6
+; RV64I-NEXT: slli a1, a1, 1
+; RV64I-NEXT: sll a1, a1, a7
+; RV64I-NEXT: or a1, a3, a1
+; RV64I-NEXT: srl a3, a4, a6
+; RV64I-NEXT: slli a4, a5, 1
+; RV64I-NEXT: sll a4, a4, a7
+; RV64I-NEXT: or a3, a3, a4
+; RV64I-NEXT: srl a4, a5, a6
+; RV64I-NEXT: srli a5, a4, 56
+; RV64I-NEXT: srli a6, a4, 48
+; RV64I-NEXT: srli a7, a4, 40
+; RV64I-NEXT: srli t0, a4, 32
+; RV64I-NEXT: sb t0, 28(a2)
+; RV64I-NEXT: sb a7, 29(a2)
+; RV64I-NEXT: sb a6, 30(a2)
; RV64I-NEXT: sb a5, 31(a2)
-; RV64I-NEXT: srli a5, a1, 48
-; RV64I-NEXT: sb a5, 30(a2)
-; RV64I-NEXT: srli a5, a1, 40
-; RV64I-NEXT: sb a5, 29(a2)
-; RV64I-NEXT: srli a5, a1, 32
-; RV64I-NEXT: sb a5, 28(a2)
-; RV64I-NEXT: srli a5, a1, 24
+; RV64I-NEXT: srli a5, a4, 24
+; RV64I-NEXT: srli a6, a4, 16
+; RV64I-NEXT: srli a7, a4, 8
+; RV64I-NEXT: sb a4, 24(a2)
+; RV64I-NEXT: sb a7, 25(a2)
+; RV64I-NEXT: sb a6, 26(a2)
; RV64I-NEXT: sb a5, 27(a2)
-; RV64I-NEXT: srli a5, a1, 16
-; RV64I-NEXT: sb a5, 26(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 25(a2)
-; RV64I-NEXT: sb a4, 16(a2)
-; RV64I-NEXT: sb a3, 0(a2)
-; RV64I-NEXT: sb a0, 8(a2)
-; RV64I-NEXT: srli a1, a4, 56
-; RV64I-NEXT: sb a1, 23(a2)
-; RV64I-NEXT: srli a1, a4, 48
-; RV64I-NEXT: sb a1, 22(a2)
-; RV64I-NEXT: srli a1, a4, 40
-; RV64I-NEXT: sb a1, 21(a2)
-; RV64I-NEXT: srli a1, a4, 32
-; RV64I-NEXT: sb a1, 20(a2)
-; RV64I-NEXT: srli a1, a4, 24
-; RV64I-NEXT: sb a1, 19(a2)
-; RV64I-NEXT: srli a1, a4, 16
-; RV64I-NEXT: sb a1, 18(a2)
-; RV64I-NEXT: srli a4, a4, 8
-; RV64I-NEXT: sb a4, 17(a2)
-; RV64I-NEXT: srli a1, a3, 56
-; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a3, 48
-; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a3, 40
-; RV64I-NEXT: sb a1, 5(a2)
-; RV64I-NEXT: srli a1, a3, 32
-; RV64I-NEXT: sb a1, 4(a2)
-; RV64I-NEXT: srli a1, a3, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a1, a3, 16
-; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a3, a3, 8
-; RV64I-NEXT: sb a3, 1(a2)
+; RV64I-NEXT: srli a4, a3, 56
+; RV64I-NEXT: srli a5, a3, 48
+; RV64I-NEXT: srli a6, a3, 40
+; RV64I-NEXT: srli a7, a3, 32
+; RV64I-NEXT: sb a7, 20(a2)
+; RV64I-NEXT: sb a6, 21(a2)
+; RV64I-NEXT: sb a5, 22(a2)
+; RV64I-NEXT: sb a4, 23(a2)
+; RV64I-NEXT: srli a4, a3, 24
+; RV64I-NEXT: srli a5, a3, 16
+; RV64I-NEXT: srli a6, a3, 8
+; RV64I-NEXT: sb a3, 16(a2)
+; RV64I-NEXT: sb a6, 17(a2)
+; RV64I-NEXT: sb a5, 18(a2)
+; RV64I-NEXT: sb a4, 19(a2)
+; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a6, a1, 32
+; RV64I-NEXT: sb a6, 4(a2)
+; RV64I-NEXT: sb a5, 5(a2)
+; RV64I-NEXT: sb a4, 6(a2)
+; RV64I-NEXT: sb a3, 7(a2)
+; RV64I-NEXT: srli a3, a1, 24
+; RV64I-NEXT: srli a4, a1, 16
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: sb a1, 0(a2)
+; RV64I-NEXT: sb a5, 1(a2)
+; RV64I-NEXT: sb a4, 2(a2)
+; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: srli a1, a0, 56
+; RV64I-NEXT: srli a3, a0, 48
+; RV64I-NEXT: srli a4, a0, 40
+; RV64I-NEXT: srli a5, a0, 32
+; RV64I-NEXT: sb a5, 12(a2)
+; RV64I-NEXT: sb a4, 13(a2)
+; RV64I-NEXT: sb a3, 14(a2)
; RV64I-NEXT: sb a1, 15(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 14(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 13(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 12(a2)
; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 8(a2)
+; RV64I-NEXT: sb a4, 9(a2)
+; RV64I-NEXT: sb a3, 10(a2)
; RV64I-NEXT: sb a1, 11(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 10(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 9(a2)
; RV64I-NEXT: addi sp, sp, 64
; RV64I-NEXT: ret
;
@@ -1597,22 +1597,22 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, t3
; RV32I-NEXT: or a6, a1, a6
-; RV32I-NEXT: sw zero, 60(sp)
-; RV32I-NEXT: sw zero, 56(sp)
-; RV32I-NEXT: sw zero, 52(sp)
; RV32I-NEXT: sw zero, 48(sp)
-; RV32I-NEXT: sw zero, 44(sp)
-; RV32I-NEXT: sw zero, 40(sp)
-; RV32I-NEXT: sw zero, 36(sp)
+; RV32I-NEXT: sw zero, 52(sp)
+; RV32I-NEXT: sw zero, 56(sp)
+; RV32I-NEXT: sw zero, 60(sp)
; RV32I-NEXT: sw zero, 32(sp)
-; RV32I-NEXT: sw a0, 28(sp)
-; RV32I-NEXT: sw t2, 24(sp)
-; RV32I-NEXT: sw t1, 20(sp)
+; RV32I-NEXT: sw zero, 36(sp)
+; RV32I-NEXT: sw zero, 40(sp)
+; RV32I-NEXT: sw zero, 44(sp)
; RV32I-NEXT: sw t0, 16(sp)
-; RV32I-NEXT: sw a7, 12(sp)
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw t1, 20(sp)
+; RV32I-NEXT: sw t2, 24(sp)
+; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a7, 12(sp)
; RV32I-NEXT: srli a0, a6, 3
; RV32I-NEXT: andi a0, a0, 28
; RV32I-NEXT: mv a1, sp
@@ -1656,62 +1656,62 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sll t0, t2, t0
; RV32I-NEXT: or t0, t1, t0
; RV32I-NEXT: srl a6, t4, a6
-; RV32I-NEXT: sb a6, 28(a2)
; RV32I-NEXT: srli t1, a6, 24
+; RV32I-NEXT: srli t2, a6, 16
+; RV32I-NEXT: srli t3, a6, 8
+; RV32I-NEXT: sb a6, 28(a2)
+; RV32I-NEXT: sb t3, 29(a2)
+; RV32I-NEXT: sb t2, 30(a2)
; RV32I-NEXT: sb t1, 31(a2)
-; RV32I-NEXT: srli t1, a6, 16
-; RV32I-NEXT: sb t1, 30(a2)
-; RV32I-NEXT: srli a6, a6, 8
-; RV32I-NEXT: sb a6, 29(a2)
-; RV32I-NEXT: sb t0, 24(a2)
-; RV32I-NEXT: sb a7, 16(a2)
-; RV32I-NEXT: sb a5, 20(a2)
-; RV32I-NEXT: sb a4, 8(a2)
-; RV32I-NEXT: sb a3, 12(a2)
-; RV32I-NEXT: sb a1, 0(a2)
-; RV32I-NEXT: sb a0, 4(a2)
; RV32I-NEXT: srli a6, t0, 24
+; RV32I-NEXT: srli t1, t0, 16
+; RV32I-NEXT: srli t2, t0, 8
+; RV32I-NEXT: sb t0, 24(a2)
+; RV32I-NEXT: sb t2, 25(a2)
+; RV32I-NEXT: sb t1, 26(a2)
; RV32I-NEXT: sb a6, 27(a2)
-; RV32I-NEXT: srli a6, t0, 16
-; RV32I-NEXT: sb a6, 26(a2)
-; RV32I-NEXT: srli a6, t0, 8
-; RV32I-NEXT: sb a6, 25(a2)
; RV32I-NEXT: srli a6, a7, 24
+; RV32I-NEXT: srli t0, a7, 16
+; RV32I-NEXT: srli t1, a7, 8
+; RV32I-NEXT: sb a7, 16(a2)
+; RV32I-NEXT: sb t1, 17(a2)
+; RV32I-NEXT: sb t0, 18(a2)
; RV32I-NEXT: sb a6, 19(a2)
-; RV32I-NEXT: srli a6, a7, 16
-; RV32I-NEXT: sb a6, 18(a2)
-; RV32I-NEXT: srli a6, a7, 8
-; RV32I-NEXT: sb a6, 17(a2)
; RV32I-NEXT: srli a6, a5, 24
+; RV32I-NEXT: srli a7, a5, 16
+; RV32I-NEXT: srli t0, a5, 8
+; RV32I-NEXT: sb a5, 20(a2)
+; RV32I-NEXT: sb t0, 21(a2)
+; RV32I-NEXT: sb a7, 22(a2)
; RV32I-NEXT: sb a6, 23(a2)
-; RV32I-NEXT: srli a6, a5, 16
-; RV32I-NEXT: sb a6, 22(a2)
-; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 21(a2)
; RV32I-NEXT: srli a5, a4, 24
+; RV32I-NEXT: srli a6, a4, 16
+; RV32I-NEXT: srli a7, a4, 8
+; RV32I-NEXT: sb a4, 8(a2)
+; RV32I-NEXT: sb a7, 9(a2)
+; RV32I-NEXT: sb a6, 10(a2)
; RV32I-NEXT: sb a5, 11(a2)
-; RV32I-NEXT: srli a5, a4, 16
-; RV32I-NEXT: sb a5, 10(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 9(a2)
; RV32I-NEXT: srli a4, a3, 24
+; RV32I-NEXT: srli a5, a3, 16
+; RV32I-NEXT: srli a6, a3, 8
+; RV32I-NEXT: sb a3, 12(a2)
+; RV32I-NEXT: sb a6, 13(a2)
+; RV32I-NEXT: sb a5, 14(a2)
; RV32I-NEXT: sb a4, 15(a2)
-; RV32I-NEXT: srli a4, a3, 16
-; RV32I-NEXT: sb a4, 14(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 13(a2)
; RV32I-NEXT: srli a3, a1, 24
+; RV32I-NEXT: srli a4, a1, 16
+; RV32I-NEXT: srli a5, a1, 8
+; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: sb a5, 1(a2)
+; RV32I-NEXT: sb a4, 2(a2)
; RV32I-NEXT: sb a3, 3(a2)
-; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 2(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 1(a2)
; RV32I-NEXT: srli a1, a0, 24
+; RV32I-NEXT: srli a3, a0, 16
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 4(a2)
+; RV32I-NEXT: sb a4, 5(a2)
+; RV32I-NEXT: sb a3, 6(a2)
; RV32I-NEXT: sb a1, 7(a2)
-; RV32I-NEXT: srli a1, a0, 16
-; RV32I-NEXT: sb a1, 6(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: addi sp, sp, 64
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
@@ -1833,98 +1833,98 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: or a1, a1, a7
; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a1, a1, 32
-; RV64I-NEXT: or a1, a1, a6
-; RV64I-NEXT: sd zero, 24(sp)
-; RV64I-NEXT: sd zero, 16(sp)
-; RV64I-NEXT: sd zero, 8(sp)
+; RV64I-NEXT: or a6, a1, a6
; RV64I-NEXT: sd zero, 0(sp)
-; RV64I-NEXT: sd a0, 56(sp)
-; RV64I-NEXT: sd a5, 48(sp)
-; RV64I-NEXT: sd a4, 40(sp)
+; RV64I-NEXT: sd zero, 8(sp)
+; RV64I-NEXT: sd zero, 16(sp)
+; RV64I-NEXT: sd zero, 24(sp)
; RV64I-NEXT: sd a3, 32(sp)
-; RV64I-NEXT: srli a0, a1, 3
+; RV64I-NEXT: sd a4, 40(sp)
+; RV64I-NEXT: sd a5, 48(sp)
+; RV64I-NEXT: sd a0, 56(sp)
+; RV64I-NEXT: srli a0, a6, 3
; RV64I-NEXT: andi a0, a0, 24
-; RV64I-NEXT: addi a3, sp, 32
-; RV64I-NEXT: sub a3, a3, a0
-; RV64I-NEXT: ld a4, 8(a3)
-; RV64I-NEXT: ld a5, 0(a3)
-; RV64I-NEXT: ld a6, 16(a3)
-; RV64I-NEXT: ld a3, 24(a3)
-; RV64I-NEXT: sll a0, a4, a1
-; RV64I-NEXT: andi a7, a1, 63
+; RV64I-NEXT: addi a1, sp, 32
+; RV64I-NEXT: sub a1, a1, a0
+; RV64I-NEXT: ld a3, 8(a1)
+; RV64I-NEXT: ld a4, 0(a1)
+; RV64I-NEXT: ld a5, 16(a1)
+; RV64I-NEXT: ld a1, 24(a1)
+; RV64I-NEXT: sll a0, a3, a6
+; RV64I-NEXT: andi a7, a6, 63
; RV64I-NEXT: xori a7, a7, 63
-; RV64I-NEXT: srli t0, a5, 1
+; RV64I-NEXT: srli t0, a4, 1
; RV64I-NEXT: srl t0, t0, a7
; RV64I-NEXT: or a0, a0, t0
-; RV64I-NEXT: sll a3, a3, a1
-; RV64I-NEXT: srli t0, a6, 1
+; RV64I-NEXT: sll a1, a1, a6
+; RV64I-NEXT: srli t0, a5, 1
; RV64I-NEXT: srl t0, t0, a7
-; RV64I-NEXT: or a3, a3, t0
-; RV64I-NEXT: sll a6, a6, a1
-; RV64I-NEXT: srli a4, a4, 1
-; RV64I-NEXT: srl a4, a4, a7
-; RV64I-NEXT: or a4, a6, a4
-; RV64I-NEXT: sll a1, a5, a1
-; RV64I-NEXT: sb a1, 0(a2)
-; RV64I-NEXT: srli a5, a1, 56
+; RV64I-NEXT: or a1, a1, t0
+; RV64I-NEXT: sll a5, a5, a6
+; RV64I-NEXT: srli a3, a3, 1
+; RV64I-NEXT: srl a3, a3, a7
+; RV64I-NEXT: or a3, a5, a3
+; RV64I-NEXT: sll a4, a4, a6
+; RV64I-NEXT: srli a5, a4, 56
+; RV64I-NEXT: srli a6, a4, 48
+; RV64I-NEXT: srli a7, a4, 40
+; RV64I-NEXT: srli t0, a4, 32
+; RV64I-NEXT: sb t0, 4(a2)
+; RV64I-NEXT: sb a7, 5(a2)
+; RV64I-NEXT: sb a6, 6(a2)
; RV64I-NEXT: sb a5, 7(a2)
-; RV64I-NEXT: srli a5, a1, 48
-; RV64I-NEXT: sb a5, 6(a2)
-; RV64I-NEXT: srli a5, a1, 40
-; RV64I-NEXT: sb a5, 5(a2)
-; RV64I-NEXT: srli a5, a1, 32
-; RV64I-NEXT: sb a5, 4(a2)
-; RV64I-NEXT: srli a5, a1, 24
+; RV64I-NEXT: srli a5, a4, 24
+; RV64I-NEXT: srli a6, a4, 16
+; RV64I-NEXT: srli a7, a4, 8
+; RV64I-NEXT: sb a4, 0(a2)
+; RV64I-NEXT: sb a7, 1(a2)
+; RV64I-NEXT: sb a6, 2(a2)
; RV64I-NEXT: sb a5, 3(a2)
-; RV64I-NEXT: srli a5, a1, 16
-; RV64I-NEXT: sb a5, 2(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 1(a2)
-; RV64I-NEXT: sb a4, 16(a2)
-; RV64I-NEXT: sb a3, 24(a2)
-; RV64I-NEXT: sb a0, 8(a2)
-; RV64I-NEXT: srli a1, a4, 56
-; RV64I-NEXT: sb a1, 23(a2)
-; RV64I-NEXT: srli a1, a4, 48
-; RV64I-NEXT: sb a1, 22(a2)
-; RV64I-NEXT: srli a1, a4, 40
-; RV64I-NEXT: sb a1, 21(a2)
-; RV64I-NEXT: srli a1, a4, 32
-; RV64I-NEXT: sb a1, 20(a2)
-; RV64I-NEXT: srli a1, a4, 24
-; RV64I-NEXT: sb a1, 19(a2)
-; RV64I-NEXT: srli a1, a4, 16
-; RV64I-NEXT: sb a1, 18(a2)
-; RV64I-NEXT: srli a4, a4, 8
-; RV64I-NEXT: sb a4, 17(a2)
-; RV64I-NEXT: srli a1, a3, 56
-; RV64I-NEXT: sb a1, 31(a2)
-; RV64I-NEXT: srli a1, a3, 48
-; RV64I-NEXT: sb a1, 30(a2)
-; RV64I-NEXT: srli a1, a3, 40
-; RV64I-NEXT: sb a1, 29(a2)
-; RV64I-NEXT: srli a1, a3, 32
-; RV64I-NEXT: sb a1, 28(a2)
-; RV64I-NEXT: srli a1, a3, 24
-; RV64I-NEXT: sb a1, 27(a2)
-; RV64I-NEXT: srli a1, a3, 16
-; RV64I-NEXT: sb a1, 26(a2)
-; RV64I-NEXT: srli a3, a3, 8
-; RV64I-NEXT: sb a3, 25(a2)
+; RV64I-NEXT: srli a4, a3, 56
+; RV64I-NEXT: srli a5, a3, 48
+; RV64I-NEXT: srli a6, a3, 40
+; RV64I-NEXT: srli a7, a3, 32
+; RV64I-NEXT: sb a7, 20(a2)
+; RV64I-NEXT: sb a6, 21(a2)
+; RV64I-NEXT: sb a5, 22(a2)
+; RV64I-NEXT: sb a4, 23(a2)
+; RV64I-NEXT: srli a4, a3, 24
+; RV64I-NEXT: srli a5, a3, 16
+; RV64I-NEXT: srli a6, a3, 8
+; RV64I-NEXT: sb a3, 16(a2)
+; RV64I-NEXT: sb a6, 17(a2)
+; RV64I-NEXT: sb a5, 18(a2)
+; RV64I-NEXT: sb a4, 19(a2)
+; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a6, a1, 32
+; RV64I-NEXT: sb a6, 28(a2)
+; RV64I-NEXT: sb a5, 29(a2)
+; RV64I-NEXT: sb a4, 30(a2)
+; RV64I-NEXT: sb a3, 31(a2)
+; RV64I-NEXT: srli a3, a1, 24
+; RV64I-NEXT: srli a4, a1, 16
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: sb a1, 24(a2)
+; RV64I-NEXT: sb a5, 25(a2)
+; RV64I-NEXT: sb a4, 26(a2)
+; RV64I-NEXT: sb a3, 27(a2)
; RV64I-NEXT: srli a1, a0, 56
+; RV64I-NEXT: srli a3, a0, 48
+; RV64I-NEXT: srli a4, a0, 40
+; RV64I-NEXT: srli a5, a0, 32
+; RV64I-NEXT: sb a5, 12(a2)
+; RV64I-NEXT: sb a4, 13(a2)
+; RV64I-NEXT: sb a3, 14(a2)
; RV64I-NEXT: sb a1, 15(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 14(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 13(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 12(a2)
; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 8(a2)
+; RV64I-NEXT: sb a4, 9(a2)
+; RV64I-NEXT: sb a3, 10(a2)
; RV64I-NEXT: sb a1, 11(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 10(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 9(a2)
; RV64I-NEXT: addi sp, sp, 64
; RV64I-NEXT: ret
;
@@ -2021,22 +2021,22 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a1, a1, 24
; RV32I-NEXT: or a1, a1, t3
; RV32I-NEXT: or a7, a1, a7
-; RV32I-NEXT: sw zero, 28(sp)
-; RV32I-NEXT: sw zero, 24(sp)
-; RV32I-NEXT: sw zero, 20(sp)
; RV32I-NEXT: sw zero, 16(sp)
-; RV32I-NEXT: sw zero, 12(sp)
-; RV32I-NEXT: sw zero, 8(sp)
-; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 20(sp)
+; RV32I-NEXT: sw zero, 24(sp)
+; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw zero, 0(sp)
-; RV32I-NEXT: sw a0, 60(sp)
-; RV32I-NEXT: sw t2, 56(sp)
-; RV32I-NEXT: sw t1, 52(sp)
+; RV32I-NEXT: sw zero, 4(sp)
+; RV32I-NEXT: sw zero, 8(sp)
+; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: sw t0, 48(sp)
-; RV32I-NEXT: sw a6, 44(sp)
-; RV32I-NEXT: sw a5, 40(sp)
-; RV32I-NEXT: sw a4, 36(sp)
+; RV32I-NEXT: sw t1, 52(sp)
+; RV32I-NEXT: sw t2, 56(sp)
+; RV32I-NEXT: sw a0, 60(sp)
; RV32I-NEXT: sw a3, 32(sp)
+; RV32I-NEXT: sw a4, 36(sp)
+; RV32I-NEXT: sw a5, 40(sp)
+; RV32I-NEXT: sw a6, 44(sp)
; RV32I-NEXT: srli a0, a7, 3
; RV32I-NEXT: andi a0, a0, 28
; RV32I-NEXT: addi a1, sp, 32
@@ -2062,80 +2062,80 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: lw t3, 20(a4)
; RV32I-NEXT: or a3, a6, a3
; RV32I-NEXT: lw a6, 24(a4)
-; RV32I-NEXT: lw a4, 28(a4)
-; RV32I-NEXT: sll t4, t3, a7
+; RV32I-NEXT: lw t4, 28(a4)
+; RV32I-NEXT: sll a4, t3, a7
; RV32I-NEXT: srli t5, t2, 1
; RV32I-NEXT: srl t5, t5, t1
-; RV32I-NEXT: or t4, t4, t5
+; RV32I-NEXT: or a4, a4, t5
; RV32I-NEXT: sll t2, t2, a7
; RV32I-NEXT: srli t0, t0, 1
; RV32I-NEXT: srl t0, t0, t1
; RV32I-NEXT: or t0, t2, t0
-; RV32I-NEXT: sll a4, a4, a7
-; RV32I-NEXT: srli t2, a6, 1
-; RV32I-NEXT: srl t2, t2, t1
-; RV32I-NEXT: or a4, a4, t2
+; RV32I-NEXT: sll t2, t4, a7
+; RV32I-NEXT: srli t4, a6, 1
+; RV32I-NEXT: srl t4, t4, t1
+; RV32I-NEXT: or t2, t2, t4
; RV32I-NEXT: sll a6, a6, a7
-; RV32I-NEXT: srli t2, t3, 1
-; RV32I-NEXT: srl t1, t2, t1
+; RV32I-NEXT: srli t3, t3, 1
+; RV32I-NEXT: srl t1, t3, t1
; RV32I-NEXT: or a6, a6, t1
; RV32I-NEXT: sll a5, a5, a7
-; RV32I-NEXT: sb a5, 0(a2)
; RV32I-NEXT: srli a7, a5, 24
+; RV32I-NEXT: srli t1, a5, 16
+; RV32I-NEXT: srli t3, a5, 8
+; RV32I-NEXT: sb a5, 0(a2)
+; RV32I-NEXT: sb t3, 1(a2)
+; RV32I-NEXT: sb t1, 2(a2)
; RV32I-NEXT: sb a7, 3(a2)
-; RV32I-NEXT: srli a7, a5, 16
-; RV32I-NEXT: sb a7, 2(a2)
-; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 1(a2)
-; RV32I-NEXT: sb a6, 24(a2)
-; RV32I-NEXT: sb a4, 28(a2)
-; RV32I-NEXT: sb t0, 16(a2)
-; RV32I-NEXT: sb t4, 20(a2)
-; RV32I-NEXT: sb a3, 8(a2)
-; RV32I-NEXT: sb a1, 12(a2)
-; RV32I-NEXT: sb a0, 4(a2)
; RV32I-NEXT: srli a5, a6, 24
+; RV32I-NEXT: srli a7, a6, 16
+; RV32I-NEXT: srli t1, a6, 8
+; RV32I-NEXT: sb a6, 24(a2)
+; RV32I-NEXT: sb t1, 25(a2)
+; RV32I-NEXT: sb a7, 26(a2)
; RV32I-NEXT: sb a5, 27(a2)
-; RV32I-NEXT: srli a5, a6, 16
-; RV32I-NEXT: sb a5, 26(a2)
-; RV32I-NEXT: srli a5, a6, 8
-; RV32I-NEXT: sb a5, 25(a2)
-; RV32I-NEXT: srli a5, a4, 24
+; RV32I-NEXT: srli a5, t2, 24
+; RV32I-NEXT: srli a6, t2, 16
+; RV32I-NEXT: srli a7, t2, 8
+; RV32I-NEXT: sb t2, 28(a2)
+; RV32I-NEXT: sb a7, 29(a2)
+; RV32I-NEXT: sb a6, 30(a2)
; RV32I-NEXT: sb a5, 31(a2)
-; RV32I-NEXT: srli a5, a4, 16
-; RV32I-NEXT: sb a5, 30(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 29(a2)
-; RV32I-NEXT: srli a4, t0, 24
-; RV32I-NEXT: sb a4, 19(a2)
-; RV32I-NEXT: srli a4, t0, 16
-; RV32I-NEXT: sb a4, 18(a2)
-; RV32I-NEXT: srli a4, t0, 8
-; RV32I-NEXT: sb a4, 17(a2)
-; RV32I-NEXT: srli a4, t4, 24
-; RV32I-NEXT: sb a4, 23(a2)
-; RV32I-NEXT: srli a4, t4, 16
-; RV32I-NEXT: sb a4, 22(a2)
-; RV32I-NEXT: srli a4, t4, 8
-; RV32I-NEXT: sb a4, 21(a2)
+; RV32I-NEXT: srli a5, t0, 24
+; RV32I-NEXT: srli a6, t0, 16
+; RV32I-NEXT: srli a7, t0, 8
+; RV32I-NEXT: sb t0, 16(a2)
+; RV32I-NEXT: sb a7, 17(a2)
+; RV32I-NEXT: sb a6, 18(a2)
+; RV32I-NEXT: sb a5, 19(a2)
+; RV32I-NEXT: srli a5, a4, 24
+; RV32I-NEXT: srli a6, a4, 16
+; RV32I-NEXT: srli a7, a4, 8
+; RV32I-NEXT: sb a4, 20(a2)
+; RV32I-NEXT: sb a7, 21(a2)
+; RV32I-NEXT: sb a6, 22(a2)
+; RV32I-NEXT: sb a5, 23(a2)
; RV32I-NEXT: srli a4, a3, 24
+; RV32I-NEXT: srli a5, a3, 16
+; RV32I-NEXT: srli a6, a3, 8
+; RV32I-NEXT: sb a3, 8(a2)
+; RV32I-NEXT: sb a6, 9(a2)
+; RV32I-NEXT: sb a5, 10(a2)
; RV32I-NEXT: sb a4, 11(a2)
-; RV32I-NEXT: srli a4, a3, 16
-; RV32I-NEXT: sb a4, 10(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 9(a2)
; RV32I-NEXT: srli a3, a1, 24
+; RV32I-NEXT: srli a4, a1, 16
+; RV32I-NEXT: srli a5, a1, 8
+; RV32I-NEXT: sb a1, 12(a2)
+; RV32I-NEXT: sb a5, 13(a2)
+; RV32I-NEXT: sb a4, 14(a2)
; RV32I-NEXT: sb a3, 15(a2)
-; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 14(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 13(a2)
; RV32I-NEXT: srli a1, a0, 24
+; RV32I-NEXT: srli a3, a0, 16
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 4(a2)
+; RV32I-NEXT: sb a4, 5(a2)
+; RV32I-NEXT: sb a3, 6(a2)
; RV32I-NEXT: sb a1, 7(a2)
-; RV32I-NEXT: srli a1, a0, 16
-; RV32I-NEXT: sb a1, 6(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: addi sp, sp, 64
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
@@ -2257,99 +2257,99 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: or a1, a1, t1
; RV64I-NEXT: slli a1, a1, 32
-; RV64I-NEXT: or a1, a1, a7
+; RV64I-NEXT: or a7, a1, a7
; RV64I-NEXT: sraiw a0, a0, 31
-; RV64I-NEXT: sd a0, 56(sp)
-; RV64I-NEXT: sd a0, 48(sp)
-; RV64I-NEXT: sd a0, 40(sp)
; RV64I-NEXT: sd a0, 32(sp)
-; RV64I-NEXT: sd a6, 24(sp)
-; RV64I-NEXT: sd a5, 16(sp)
-; RV64I-NEXT: sd a4, 8(sp)
+; RV64I-NEXT: sd a0, 40(sp)
+; RV64I-NEXT: sd a0, 48(sp)
+; RV64I-NEXT: sd a0, 56(sp)
; RV64I-NEXT: sd a3, 0(sp)
-; RV64I-NEXT: srli a0, a1, 3
+; RV64I-NEXT: sd a4, 8(sp)
+; RV64I-NEXT: sd a5, 16(sp)
+; RV64I-NEXT: sd a6, 24(sp)
+; RV64I-NEXT: srli a0, a7, 3
; RV64I-NEXT: andi a0, a0, 24
-; RV64I-NEXT: mv a3, sp
-; RV64I-NEXT: add a0, a3, a0
-; RV64I-NEXT: ld a3, 8(a0)
-; RV64I-NEXT: ld a4, 0(a0)
-; RV64I-NEXT: ld a5, 16(a0)
-; RV64I-NEXT: ld a6, 24(a0)
-; RV64I-NEXT: srl a0, a3, a1
-; RV64I-NEXT: andi a7, a1, 63
-; RV64I-NEXT: xori a7, a7, 63
-; RV64I-NEXT: slli t0, a5, 1
-; RV64I-NEXT: sll t0, t0, a7
+; RV64I-NEXT: mv a1, sp
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: ld a1, 8(a0)
+; RV64I-NEXT: ld a3, 0(a0)
+; RV64I-NEXT: ld a4, 16(a0)
+; RV64I-NEXT: ld a5, 24(a0)
+; RV64I-NEXT: srl a0, a1, a7
+; RV64I-NEXT: andi a6, a7, 63
+; RV64I-NEXT: xori a6, a6, 63
+; RV64I-NEXT: slli t0, a4, 1
+; RV64I-NEXT: sll t0, t0, a6
; RV64I-NEXT: or a0, a0, t0
-; RV64I-NEXT: srl a4, a4, a1
-; RV64I-NEXT: slli a3, a3, 1
-; RV64I-NEXT: sll a3, a3, a7
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: srl a4, a5, a1
-; RV64I-NEXT: slli a5, a6, 1
-; RV64I-NEXT: sll a5, a5, a7
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: sra a1, a6, a1
-; RV64I-NEXT: sb a1, 24(a2)
-; RV64I-NEXT: srli a5, a1, 56
+; RV64I-NEXT: srl a3, a3, a7
+; RV64I-NEXT: slli a1, a1, 1
+; RV64I-NEXT: sll a1, a1, a6
+; RV64I-NEXT: or a1, a3, a1
+; RV64I-NEXT: srl a3, a4, a7
+; RV64I-NEXT: slli a4, a5, 1
+; RV64I-NEXT: sll a4, a4, a6
+; RV64I-NEXT: or a3, a3, a4
+; RV64I-NEXT: sra a4, a5, a7
+; RV64I-NEXT: srli a5, a4, 56
+; RV64I-NEXT: srli a6, a4, 48
+; RV64I-NEXT: srli a7, a4, 40
+; RV64I-NEXT: srli t0, a4, 32
+; RV64I-NEXT: sb t0, 28(a2)
+; RV64I-NEXT: sb a7, 29(a2)
+; RV64I-NEXT: sb a6, 30(a2)
; RV64I-NEXT: sb a5, 31(a2)
-; RV64I-NEXT: srli a5, a1, 48
-; RV64I-NEXT: sb a5, 30(a2)
-; RV64I-NEXT: srli a5, a1, 40
-; RV64I-NEXT: sb a5, 29(a2)
-; RV64I-NEXT: srli a5, a1, 32
-; RV64I-NEXT: sb a5, 28(a2)
-; RV64I-NEXT: srli a5, a1, 24
+; RV64I-NEXT: srli a5, a4, 24
+; RV64I-NEXT: srli a6, a4, 16
+; RV64I-NEXT: srli a7, a4, 8
+; RV64I-NEXT: sb a4, 24(a2)
+; RV64I-NEXT: sb a7, 25(a2)
+; RV64I-NEXT: sb a6, 26(a2)
; RV64I-NEXT: sb a5, 27(a2)
-; RV64I-NEXT: srli a5, a1, 16
-; RV64I-NEXT: sb a5, 26(a2)
-; RV64I-NEXT: srli a1, a1, 8
-; RV64I-NEXT: sb a1, 25(a2)
-; RV64I-NEXT: sb a4, 16(a2)
-; RV64I-NEXT: sb a3, 0(a2)
-; RV64I-NEXT: sb a0, 8(a2)
-; RV64I-NEXT: srli a1, a4, 56
-; RV64I-NEXT: sb a1, 23(a2)
-; RV64I-NEXT: srli a1, a4, 48
-; RV64I-NEXT: sb a1, 22(a2)
-; RV64I-NEXT: srli a1, a4, 40
-; RV64I-NEXT: sb a1, 21(a2)
-; RV64I-NEXT: srli a1, a4, 32
-; RV64I-NEXT: sb a1, 20(a2)
-; RV64I-NEXT: srli a1, a4, 24
-; RV64I-NEXT: sb a1, 19(a2)
-; RV64I-NEXT: srli a1, a4, 16
-; RV64I-NEXT: sb a1, 18(a2)
-; RV64I-NEXT: srli a4, a4, 8
-; RV64I-NEXT: sb a4, 17(a2)
-; RV64I-NEXT: srli a1, a3, 56
-; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a3, 48
-; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a3, 40
-; RV64I-NEXT: sb a1, 5(a2)
-; RV64I-NEXT: srli a1, a3, 32
-; RV64I-NEXT: sb a1, 4(a2)
-; RV64I-NEXT: srli a1, a3, 24
-; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a1, a3, 16
-; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a3, a3, 8
-; RV64I-NEXT: sb a3, 1(a2)
+; RV64I-NEXT: srli a4, a3, 56
+; RV64I-NEXT: srli a5, a3, 48
+; RV64I-NEXT: srli a6, a3, 40
+; RV64I-NEXT: srli a7, a3, 32
+; RV64I-NEXT: sb a7, 20(a2)
+; RV64I-NEXT: sb a6, 21(a2)
+; RV64I-NEXT: sb a5, 22(a2)
+; RV64I-NEXT: sb a4, 23(a2)
+; RV64I-NEXT: srli a4, a3, 24
+; RV64I-NEXT: srli a5, a3, 16
+; RV64I-NEXT: srli a6, a3, 8
+; RV64I-NEXT: sb a3, 16(a2)
+; RV64I-NEXT: sb a6, 17(a2)
+; RV64I-NEXT: sb a5, 18(a2)
+; RV64I-NEXT: sb a4, 19(a2)
+; RV64I-NEXT: srli a3, a1, 56
+; RV64I-NEXT: srli a4, a1, 48
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: srli a6, a1, 32
+; RV64I-NEXT: sb a6, 4(a2)
+; RV64I-NEXT: sb a5, 5(a2)
+; RV64I-NEXT: sb a4, 6(a2)
+; RV64I-NEXT: sb a3, 7(a2)
+; RV64I-NEXT: srli a3, a1, 24
+; RV64I-NEXT: srli a4, a1, 16
+; RV64I-NEXT: srli a5, a1, 8
+; RV64I-NEXT: sb a1, 0(a2)
+; RV64I-NEXT: sb a5, 1(a2)
+; RV64I-NEXT: sb a4, 2(a2)
+; RV64I-NEXT: sb a3, 3(a2)
; RV64I-NEXT: srli a1, a0, 56
+; RV64I-NEXT: srli a3, a0, 48
+; RV64I-NEXT: srli a4, a0, 40
+; RV64I-NEXT: srli a5, a0, 32
+; RV64I-NEXT: sb a5, 12(a2)
+; RV64I-NEXT: sb a4, 13(a2)
+; RV64I-NEXT: sb a3, 14(a2)
; RV64I-NEXT: sb a1, 15(a2)
-; RV64I-NEXT: srli a1, a0, 48
-; RV64I-NEXT: sb a1, 14(a2)
-; RV64I-NEXT: srli a1, a0, 40
-; RV64I-NEXT: sb a1, 13(a2)
-; RV64I-NEXT: srli a1, a0, 32
-; RV64I-NEXT: sb a1, 12(a2)
; RV64I-NEXT: srli a1, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: srli a4, a0, 8
+; RV64I-NEXT: sb a0, 8(a2)
+; RV64I-NEXT: sb a4, 9(a2)
+; RV64I-NEXT: sb a3, 10(a2)
; RV64I-NEXT: sb a1, 11(a2)
-; RV64I-NEXT: srli a1, a0, 16
-; RV64I-NEXT: sb a1, 10(a2)
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: sb a0, 9(a2)
; RV64I-NEXT: addi sp, sp, 64
; RV64I-NEXT: ret
;
@@ -2447,22 +2447,22 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: or a1, a1, a6
; RV32I-NEXT: or a6, a1, t4
; RV32I-NEXT: srai a0, a0, 31
-; RV32I-NEXT: sw a0, 60(sp)
-; RV32I-NEXT: sw a0, 56(sp)
-; RV32I-NEXT: sw a0, 52(sp)
; RV32I-NEXT: sw a0, 48(sp)
-; RV32I-NEXT: sw a0, 44(sp)
-; RV32I-NEXT: sw a0, 40(sp)
-; RV32I-NEXT: sw a0, 36(sp)
+; RV32I-NEXT: sw a0, 52(sp)
+; RV32I-NEXT: sw a0, 56(sp)
+; RV32I-NEXT: sw a0, 60(sp)
; RV32I-NEXT: sw a0, 32(sp)
-; RV32I-NEXT: sw t3, 28(sp)
-; RV32I-NEXT: sw t2, 24(sp)
-; RV32I-NEXT: sw t1, 20(sp)
+; RV32I-NEXT: sw a0, 36(sp)
+; RV32I-NEXT: sw a0, 40(sp)
+; RV32I-NEXT: sw a0, 44(sp)
; RV32I-NEXT: sw t0, 16(sp)
-; RV32I-NEXT: sw a7, 12(sp)
-; RV32I-NEXT: sw a5, 8(sp)
-; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw t1, 20(sp)
+; RV32I-NEXT: sw t2, 24(sp)
+; RV32I-NEXT: sw t3, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a7, 12(sp)
; RV32I-NEXT: srli a0, a6, 3
; RV32I-NEXT: andi a0, a0, 28
; RV32I-NEXT: mv a1, sp
@@ -2506,62 +2506,62 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sll t0, t2, t0
; RV32I-NEXT: or t0, t1, t0
; RV32I-NEXT: sra a6, t4, a6
-; RV32I-NEXT: sb a6, 28(a2)
; RV32I-NEXT: srli t1, a6, 24
+; RV32I-NEXT: srli t2, a6, 16
+; RV32I-NEXT: srli t3, a6, 8
+; RV32I-NEXT: sb a6, 28(a2)
+; RV32I-NEXT: sb t3, 29(a2)
+; RV32I-NEXT: sb t2, 30(a2)
; RV32I-NEXT: sb t1, 31(a2)
-; RV32I-NEXT: srli t1, a6, 16
-; RV32I-NEXT: sb t1, 30(a2)
-; RV32I-NEXT: srli a6, a6, 8
-; RV32I-NEXT: sb a6, 29(a2)
-; RV32I-NEXT: sb t0, 24(a2)
-; RV32I-NEXT: sb a7, 16(a2)
-; RV32I-NEXT: sb a5, 20(a2)
-; RV32I-NEXT: sb a4, 8(a2)
-; RV32I-NEXT: sb a3, 12(a2)
-; RV32I-NEXT: sb a1, 0(a2)
-; RV32I-NEXT: sb a0, 4(a2)
; RV32I-NEXT: srli a6, t0, 24
+; RV32I-NEXT: srli t1, t0, 16
+; RV32I-NEXT: srli t2, t0, 8
+; RV32I-NEXT: sb t0, 24(a2)
+; RV32I-NEXT: sb t2, 25(a2)
+; RV32I-NEXT: sb t1, 26(a2)
; RV32I-NEXT: sb a6, 27(a2)
-; RV32I-NEXT: srli a6, t0, 16
-; RV32I-NEXT: sb a6, 26(a2)
-; RV32I-NEXT: srli a6, t0, 8
-; RV32I-NEXT: sb a6, 25(a2)
; RV32I-NEXT: srli a6, a7, 24
+; RV32I-NEXT: srli t0, a7, 16
+; RV32I-NEXT: srli t1, a7, 8
+; RV32I-NEXT: sb a7, 16(a2)
+; RV32I-NEXT: sb t1, 17(a2)
+; RV32I-NEXT: sb t0, 18(a2)
; RV32I-NEXT: sb a6, 19(a2)
-; RV32I-NEXT: srli a6, a7, 16
-; RV32I-NEXT: sb a6, 18(a2)
-; RV32I-NEXT: srli a6, a7, 8
-; RV32I-NEXT: sb a6, 17(a2)
; RV32I-NEXT: srli a6, a5, 24
+; RV32I-NEXT: srli a7, a5, 16
+; RV32I-NEXT: srli t0, a5, 8
+; RV32I-NEXT: sb a5, 20(a2)
+; RV32I-NEXT: sb t0, 21(a2)
+; RV32I-NEXT: sb a7, 22(a2)
; RV32I-NEXT: sb a6, 23(a2)
-; RV32I-NEXT: srli a6, a5, 16
-; RV32I-NEXT: sb a6, 22(a2)
-; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 21(a2)
; RV32I-NEXT: srli a5, a4, 24
+; RV32I-NEXT: srli a6, a4, 16
+; RV32I-NEXT: srli a7, a4, 8
+; RV32I-NEXT: sb a4, 8(a2)
+; RV32I-NEXT: sb a7, 9(a2)
+; RV32I-NEXT: sb a6, 10(a2)
; RV32I-NEXT: sb a5, 11(a2)
-; RV32I-NEXT: srli a5, a4, 16
-; RV32I-NEXT: sb a5, 10(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 9(a2)
; RV32I-NEXT: srli a4, a3, 24
+; RV32I-NEXT: srli a5, a3, 16
+; RV32I-NEXT: srli a6, a3, 8
+; RV32I-NEXT: sb a3, 12(a2)
+; RV32I-NEXT: sb a6, 13(a2)
+; RV32I-NEXT: sb a5, 14(a2)
; RV32I-NEXT: sb a4, 15(a2)
-; RV32I-NEXT: srli a4, a3, 16
-; RV32I-NEXT: sb a4, 14(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 13(a2)
; RV32I-NEXT: srli a3, a1, 24
+; RV32I-NEXT: srli a4, a1, 16
+; RV32I-NEXT: srli a5, a1, 8
+; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: sb a5, 1(a2)
+; RV32I-NEXT: sb a4, 2(a2)
; RV32I-NEXT: sb a3, 3(a2)
-; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 2(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 1(a2)
; RV32I-NEXT: srli a1, a0, 24
+; RV32I-NEXT: srli a3, a0, 16
+; RV32I-NEXT: srli a4, a0, 8
+; RV32I-NEXT: sb a0, 4(a2)
+; RV32I-NEXT: sb a4, 5(a2)
+; RV32I-NEXT: sb a3, 6(a2)
; RV32I-NEXT: sb a1, 7(a2)
-; RV32I-NEXT: srli a1, a0, 16
-; RV32I-NEXT: sb a1, 6(a2)
-; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: addi sp, sp, 64
; RV32I-NEXT: ret
%src = load i256, ptr %src.ptr, align 1
diff --git a/llvm/test/CodeGen/RISCV/xtheadmempair.ll b/llvm/test/CodeGen/RISCV/xtheadmempair.ll
index 3a74bb66d9ec25..3525c40026064a 100644
--- a/llvm/test/CodeGen/RISCV/xtheadmempair.ll
+++ b/llvm/test/CodeGen/RISCV/xtheadmempair.ll
@@ -174,10 +174,10 @@ define void @swd(ptr %a, i32 %b, i32%c) {
define void @sdd(ptr %a, i64 %b, i64%c) {
; RV32XTHEADMEMPAIR-LABEL: sdd:
; RV32XTHEADMEMPAIR: # %bb.0:
-; RV32XTHEADMEMPAIR-NEXT: sw a2, 36(a0)
; RV32XTHEADMEMPAIR-NEXT: sw a1, 32(a0)
-; RV32XTHEADMEMPAIR-NEXT: sw a4, 44(a0)
+; RV32XTHEADMEMPAIR-NEXT: sw a2, 36(a0)
; RV32XTHEADMEMPAIR-NEXT: sw a3, 40(a0)
+; RV32XTHEADMEMPAIR-NEXT: sw a4, 44(a0)
; RV32XTHEADMEMPAIR-NEXT: ret
;
; RV64XTHEADMEMPAIR-LABEL: sdd:
More information about the llvm-commits
mailing list