[llvm] 14c4f28 - [RISCV] Enable load clustering by default (#73789)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 1 05:45:36 PDT 2024
Author: Alex Bradbury
Date: 2024-10-01T13:45:30+01:00
New Revision: 14c4f28ec109ec84158d60a74d3d1b7bfa411c77
URL: https://github.com/llvm/llvm-project/commit/14c4f28ec109ec84158d60a74d3d1b7bfa411c77
DIFF: https://github.com/llvm/llvm-project/commit/14c4f28ec109ec84158d60a74d3d1b7bfa411c77.diff
LOG: [RISCV] Enable load clustering by default (#73789)
We believe this is neutral or slightly better in the majority of cases.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll
llvm/test/CodeGen/RISCV/abds-neg.ll
llvm/test/CodeGen/RISCV/abds.ll
llvm/test/CodeGen/RISCV/abdu-neg.ll
llvm/test/CodeGen/RISCV/abdu.ll
llvm/test/CodeGen/RISCV/add-before-shl.ll
llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll
llvm/test/CodeGen/RISCV/atomic-rmw.ll
llvm/test/CodeGen/RISCV/atomic-signext.ll
llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll
llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll
llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll
llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll
llvm/test/CodeGen/RISCV/forced-atomics.ll
llvm/test/CodeGen/RISCV/fpclamptosat.ll
llvm/test/CodeGen/RISCV/legalize-fneg.ll
llvm/test/CodeGen/RISCV/llvm.exp10.ll
llvm/test/CodeGen/RISCV/llvm.frexp.ll
llvm/test/CodeGen/RISCV/memcpy.ll
llvm/test/CodeGen/RISCV/misched-load-clustering.ll
llvm/test/CodeGen/RISCV/mul.ll
llvm/test/CodeGen/RISCV/nontemporal.ll
llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
llvm/test/CodeGen/RISCV/push-pop-popret.ll
llvm/test/CodeGen/RISCV/reduction-formation.ll
llvm/test/CodeGen/RISCV/rv32zbb.ll
llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll
llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
llvm/test/CodeGen/RISCV/scmp.ll
llvm/test/CodeGen/RISCV/shifts.ll
llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
llvm/test/CodeGen/RISCV/stack-store-check.ll
llvm/test/CodeGen/RISCV/ucmp.ll
llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll
llvm/test/CodeGen/RISCV/unaligned-load-store.ll
llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
llvm/test/CodeGen/RISCV/vararg.ll
llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll
llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll
llvm/test/CodeGen/RISCV/xtheadmempair.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 6a72857b93b6c7..b9d35a924669f1 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -96,7 +96,7 @@ static cl::opt<bool>
static cl::opt<bool> EnableMISchedLoadClustering(
"riscv-misched-load-clustering", cl::Hidden,
cl::desc("Enable load clustering in the machine scheduler"),
- cl::init(false));
+ cl::init(true));
static cl::opt<bool> EnableVSETVLIAfterRVVRegAlloc(
"riscv-vsetvl-after-rvv-regalloc", cl::Hidden,
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll
index a49d4de6e9cf0d..01cab0d0e157bd 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll
@@ -69,15 +69,15 @@ define i32 @va1(ptr %fmt, ...) {
; RV64-NEXT: sd a2, 32(sp)
; RV64-NEXT: sd a3, 40(sp)
; RV64-NEXT: sd a4, 48(sp)
-; RV64-NEXT: sd a5, 56(sp)
; RV64-NEXT: addi a0, sp, 24
; RV64-NEXT: sd a0, 8(sp)
-; RV64-NEXT: lw a0, 12(sp)
-; RV64-NEXT: lwu a1, 8(sp)
+; RV64-NEXT: lwu a0, 8(sp)
+; RV64-NEXT: lw a1, 12(sp)
+; RV64-NEXT: sd a5, 56(sp)
; RV64-NEXT: sd a6, 64(sp)
; RV64-NEXT: sd a7, 72(sp)
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: or a0, a0, a1
+; RV64-NEXT: slli a1, a1, 32
+; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: addi a1, a0, 4
; RV64-NEXT: srli a2, a1, 32
; RV64-NEXT: sw a1, 8(sp)
@@ -128,15 +128,15 @@ define i32 @va1(ptr %fmt, ...) {
; RV64-WITHFP-NEXT: sd a2, 16(s0)
; RV64-WITHFP-NEXT: sd a3, 24(s0)
; RV64-WITHFP-NEXT: sd a4, 32(s0)
-; RV64-WITHFP-NEXT: sd a5, 40(s0)
; RV64-WITHFP-NEXT: addi a0, s0, 8
; RV64-WITHFP-NEXT: sd a0, -24(s0)
-; RV64-WITHFP-NEXT: lw a0, -20(s0)
-; RV64-WITHFP-NEXT: lwu a1, -24(s0)
+; RV64-WITHFP-NEXT: lwu a0, -24(s0)
+; RV64-WITHFP-NEXT: lw a1, -20(s0)
+; RV64-WITHFP-NEXT: sd a5, 40(s0)
; RV64-WITHFP-NEXT: sd a6, 48(s0)
; RV64-WITHFP-NEXT: sd a7, 56(s0)
-; RV64-WITHFP-NEXT: slli a0, a0, 32
-; RV64-WITHFP-NEXT: or a0, a0, a1
+; RV64-WITHFP-NEXT: slli a1, a1, 32
+; RV64-WITHFP-NEXT: or a0, a1, a0
; RV64-WITHFP-NEXT: addi a1, a0, 4
; RV64-WITHFP-NEXT: srli a2, a1, 32
; RV64-WITHFP-NEXT: sw a1, -24(s0)
@@ -1609,22 +1609,22 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; RV64-NEXT: add a0, sp, a0
; RV64-NEXT: sd a4, 304(a0)
; RV64-NEXT: lui a0, 24414
-; RV64-NEXT: add a0, sp, a0
-; RV64-NEXT: sd a5, 312(a0)
-; RV64-NEXT: lui a0, 24414
; RV64-NEXT: addiw a0, a0, 280
; RV64-NEXT: add a0, sp, a0
; RV64-NEXT: sd a0, 8(sp)
-; RV64-NEXT: lw a0, 12(sp)
-; RV64-NEXT: lwu a1, 8(sp)
+; RV64-NEXT: lwu a0, 8(sp)
+; RV64-NEXT: lw a1, 12(sp)
+; RV64-NEXT: lui a2, 24414
+; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: sd a5, 312(a2)
; RV64-NEXT: lui a2, 24414
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: sd a6, 320(a2)
; RV64-NEXT: lui a2, 24414
; RV64-NEXT: add a2, sp, a2
; RV64-NEXT: sd a7, 328(a2)
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: or a0, a0, a1
+; RV64-NEXT: slli a1, a1, 32
+; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: addi a1, a0, 4
; RV64-NEXT: srli a2, a1, 32
; RV64-NEXT: sw a1, 8(sp)
@@ -1692,15 +1692,15 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; RV64-WITHFP-NEXT: sd a2, 16(s0)
; RV64-WITHFP-NEXT: sd a3, 24(s0)
; RV64-WITHFP-NEXT: sd a4, 32(s0)
-; RV64-WITHFP-NEXT: sd a5, 40(s0)
; RV64-WITHFP-NEXT: addi a1, s0, 8
; RV64-WITHFP-NEXT: sd a1, 0(a0)
-; RV64-WITHFP-NEXT: lw a1, 4(a0)
-; RV64-WITHFP-NEXT: lwu a2, 0(a0)
+; RV64-WITHFP-NEXT: lwu a1, 0(a0)
+; RV64-WITHFP-NEXT: lw a2, 4(a0)
+; RV64-WITHFP-NEXT: sd a5, 40(s0)
; RV64-WITHFP-NEXT: sd a6, 48(s0)
; RV64-WITHFP-NEXT: sd a7, 56(s0)
-; RV64-WITHFP-NEXT: slli a1, a1, 32
-; RV64-WITHFP-NEXT: or a1, a1, a2
+; RV64-WITHFP-NEXT: slli a2, a2, 32
+; RV64-WITHFP-NEXT: or a1, a2, a1
; RV64-WITHFP-NEXT: addi a2, a1, 4
; RV64-WITHFP-NEXT: srli a3, a2, 32
; RV64-WITHFP-NEXT: sw a2, 0(a0)
diff --git a/llvm/test/CodeGen/RISCV/abds-neg.ll b/llvm/test/CodeGen/RISCV/abds-neg.ll
index 168615983d9709..b6064198935a61 100644
--- a/llvm/test/CodeGen/RISCV/abds-neg.ll
+++ b/llvm/test/CodeGen/RISCV/abds-neg.ll
@@ -622,23 +622,23 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_ext_i128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw a4, 0(a2)
-; RV32I-NEXT: lw a5, 4(a1)
+; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw a6, 8(a1)
-; RV32I-NEXT: lw a7, 8(a2)
; RV32I-NEXT: lw t1, 12(a1)
+; RV32I-NEXT: lw a7, 8(a2)
; RV32I-NEXT: lw t2, 12(a2)
-; RV32I-NEXT: lw a1, 4(a2)
+; RV32I-NEXT: lw a1, 0(a2)
+; RV32I-NEXT: lw a2, 4(a2)
; RV32I-NEXT: sltu t3, a7, a6
; RV32I-NEXT: mv t4, t3
; RV32I-NEXT: beq t1, t2, .LBB11_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: slt t4, t2, t1
; RV32I-NEXT: .LBB11_2:
-; RV32I-NEXT: sltu a2, a4, a3
-; RV32I-NEXT: sltu t6, a1, a5
-; RV32I-NEXT: mv t0, a2
-; RV32I-NEXT: beq a5, a1, .LBB11_4
+; RV32I-NEXT: sltu a5, a1, a3
+; RV32I-NEXT: sltu t6, a2, a4
+; RV32I-NEXT: mv t0, a5
+; RV32I-NEXT: beq a4, a2, .LBB11_4
; RV32I-NEXT: # %bb.3:
; RV32I-NEXT: mv t0, t6
; RV32I-NEXT: .LBB11_4:
@@ -651,16 +651,16 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: # %bb.5:
; RV32I-NEXT: mv t0, t4
; RV32I-NEXT: .LBB11_6:
-; RV32I-NEXT: mv t5, a2
-; RV32I-NEXT: beq a1, a5, .LBB11_8
+; RV32I-NEXT: mv t5, a5
+; RV32I-NEXT: beq a2, a4, .LBB11_8
; RV32I-NEXT: # %bb.7:
; RV32I-NEXT: mv t5, t6
; RV32I-NEXT: .LBB11_8:
-; RV32I-NEXT: sltu t4, a3, a4
+; RV32I-NEXT: sltu t4, a3, a1
; RV32I-NEXT: mv t6, t4
-; RV32I-NEXT: beq a5, a1, .LBB11_10
+; RV32I-NEXT: beq a4, a2, .LBB11_10
; RV32I-NEXT: # %bb.9:
-; RV32I-NEXT: sltu t6, a5, a1
+; RV32I-NEXT: sltu t6, a4, a2
; RV32I-NEXT: .LBB11_10:
; RV32I-NEXT: bnez t0, .LBB11_12
; RV32I-NEXT: # %bb.11:
@@ -684,29 +684,29 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: add a7, a7, t1
; RV32I-NEXT: bnez t0, .LBB11_15
; RV32I-NEXT: # %bb.14:
-; RV32I-NEXT: sub a1, a1, a5
-; RV32I-NEXT: sub a1, a1, a2
-; RV32I-NEXT: sub a3, a4, a3
+; RV32I-NEXT: sub a2, a2, a4
+; RV32I-NEXT: sub a2, a2, a5
+; RV32I-NEXT: sub a1, a1, a3
; RV32I-NEXT: j .LBB11_16
; RV32I-NEXT: .LBB11_15:
-; RV32I-NEXT: sub a5, a5, a1
-; RV32I-NEXT: sub a1, a5, t4
-; RV32I-NEXT: sub a3, a3, a4
+; RV32I-NEXT: sub a4, a4, a2
+; RV32I-NEXT: sub a2, a4, t4
+; RV32I-NEXT: sub a1, a3, a1
; RV32I-NEXT: .LBB11_16:
-; RV32I-NEXT: or a2, a3, a1
-; RV32I-NEXT: snez a2, a2
+; RV32I-NEXT: or a3, a1, a2
+; RV32I-NEXT: snez a3, a3
; RV32I-NEXT: neg a4, a6
-; RV32I-NEXT: sltu a5, a4, a2
+; RV32I-NEXT: sltu a5, a4, a3
; RV32I-NEXT: neg a6, a7
; RV32I-NEXT: sub a5, a6, a5
-; RV32I-NEXT: snez a6, a3
-; RV32I-NEXT: add a1, a1, a6
+; RV32I-NEXT: snez a6, a1
+; RV32I-NEXT: add a2, a2, a6
+; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: sub a4, a4, a3
; RV32I-NEXT: neg a1, a1
-; RV32I-NEXT: sub a4, a4, a2
-; RV32I-NEXT: neg a2, a3
-; RV32I-NEXT: sw a2, 0(a0)
+; RV32I-NEXT: sw a1, 0(a0)
; RV32I-NEXT: sw a4, 8(a0)
-; RV32I-NEXT: sw a1, 4(a0)
+; RV32I-NEXT: sw a2, 4(a0)
; RV32I-NEXT: sw a5, 12(a0)
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
@@ -741,23 +741,23 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-LABEL: abd_ext_i128:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a3, 0(a1)
-; RV32ZBB-NEXT: lw a4, 0(a2)
-; RV32ZBB-NEXT: lw a5, 4(a1)
+; RV32ZBB-NEXT: lw a4, 4(a1)
; RV32ZBB-NEXT: lw a6, 8(a1)
-; RV32ZBB-NEXT: lw a7, 8(a2)
; RV32ZBB-NEXT: lw t1, 12(a1)
+; RV32ZBB-NEXT: lw a7, 8(a2)
; RV32ZBB-NEXT: lw t2, 12(a2)
-; RV32ZBB-NEXT: lw a1, 4(a2)
+; RV32ZBB-NEXT: lw a1, 0(a2)
+; RV32ZBB-NEXT: lw a2, 4(a2)
; RV32ZBB-NEXT: sltu t3, a7, a6
; RV32ZBB-NEXT: mv t4, t3
; RV32ZBB-NEXT: beq t1, t2, .LBB11_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: slt t4, t2, t1
; RV32ZBB-NEXT: .LBB11_2:
-; RV32ZBB-NEXT: sltu a2, a4, a3
-; RV32ZBB-NEXT: sltu t6, a1, a5
-; RV32ZBB-NEXT: mv t0, a2
-; RV32ZBB-NEXT: beq a5, a1, .LBB11_4
+; RV32ZBB-NEXT: sltu a5, a1, a3
+; RV32ZBB-NEXT: sltu t6, a2, a4
+; RV32ZBB-NEXT: mv t0, a5
+; RV32ZBB-NEXT: beq a4, a2, .LBB11_4
; RV32ZBB-NEXT: # %bb.3:
; RV32ZBB-NEXT: mv t0, t6
; RV32ZBB-NEXT: .LBB11_4:
@@ -770,16 +770,16 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: # %bb.5:
; RV32ZBB-NEXT: mv t0, t4
; RV32ZBB-NEXT: .LBB11_6:
-; RV32ZBB-NEXT: mv t5, a2
-; RV32ZBB-NEXT: beq a1, a5, .LBB11_8
+; RV32ZBB-NEXT: mv t5, a5
+; RV32ZBB-NEXT: beq a2, a4, .LBB11_8
; RV32ZBB-NEXT: # %bb.7:
; RV32ZBB-NEXT: mv t5, t6
; RV32ZBB-NEXT: .LBB11_8:
-; RV32ZBB-NEXT: sltu t4, a3, a4
+; RV32ZBB-NEXT: sltu t4, a3, a1
; RV32ZBB-NEXT: mv t6, t4
-; RV32ZBB-NEXT: beq a5, a1, .LBB11_10
+; RV32ZBB-NEXT: beq a4, a2, .LBB11_10
; RV32ZBB-NEXT: # %bb.9:
-; RV32ZBB-NEXT: sltu t6, a5, a1
+; RV32ZBB-NEXT: sltu t6, a4, a2
; RV32ZBB-NEXT: .LBB11_10:
; RV32ZBB-NEXT: bnez t0, .LBB11_12
; RV32ZBB-NEXT: # %bb.11:
@@ -803,29 +803,29 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: add a7, a7, t1
; RV32ZBB-NEXT: bnez t0, .LBB11_15
; RV32ZBB-NEXT: # %bb.14:
-; RV32ZBB-NEXT: sub a1, a1, a5
-; RV32ZBB-NEXT: sub a1, a1, a2
-; RV32ZBB-NEXT: sub a3, a4, a3
+; RV32ZBB-NEXT: sub a2, a2, a4
+; RV32ZBB-NEXT: sub a2, a2, a5
+; RV32ZBB-NEXT: sub a1, a1, a3
; RV32ZBB-NEXT: j .LBB11_16
; RV32ZBB-NEXT: .LBB11_15:
-; RV32ZBB-NEXT: sub a5, a5, a1
-; RV32ZBB-NEXT: sub a1, a5, t4
-; RV32ZBB-NEXT: sub a3, a3, a4
+; RV32ZBB-NEXT: sub a4, a4, a2
+; RV32ZBB-NEXT: sub a2, a4, t4
+; RV32ZBB-NEXT: sub a1, a3, a1
; RV32ZBB-NEXT: .LBB11_16:
-; RV32ZBB-NEXT: or a2, a3, a1
-; RV32ZBB-NEXT: snez a2, a2
+; RV32ZBB-NEXT: or a3, a1, a2
+; RV32ZBB-NEXT: snez a3, a3
; RV32ZBB-NEXT: neg a4, a6
-; RV32ZBB-NEXT: sltu a5, a4, a2
+; RV32ZBB-NEXT: sltu a5, a4, a3
; RV32ZBB-NEXT: neg a6, a7
; RV32ZBB-NEXT: sub a5, a6, a5
-; RV32ZBB-NEXT: snez a6, a3
-; RV32ZBB-NEXT: add a1, a1, a6
+; RV32ZBB-NEXT: snez a6, a1
+; RV32ZBB-NEXT: add a2, a2, a6
+; RV32ZBB-NEXT: neg a2, a2
+; RV32ZBB-NEXT: sub a4, a4, a3
; RV32ZBB-NEXT: neg a1, a1
-; RV32ZBB-NEXT: sub a4, a4, a2
-; RV32ZBB-NEXT: neg a2, a3
-; RV32ZBB-NEXT: sw a2, 0(a0)
+; RV32ZBB-NEXT: sw a1, 0(a0)
; RV32ZBB-NEXT: sw a4, 8(a0)
-; RV32ZBB-NEXT: sw a1, 4(a0)
+; RV32ZBB-NEXT: sw a2, 4(a0)
; RV32ZBB-NEXT: sw a5, 12(a0)
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBB-NEXT: addi sp, sp, 16
@@ -869,23 +869,23 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_ext_i128_undef:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw a4, 0(a2)
-; RV32I-NEXT: lw a5, 4(a1)
+; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw a6, 8(a1)
-; RV32I-NEXT: lw a7, 8(a2)
; RV32I-NEXT: lw t1, 12(a1)
+; RV32I-NEXT: lw a7, 8(a2)
; RV32I-NEXT: lw t2, 12(a2)
-; RV32I-NEXT: lw a1, 4(a2)
+; RV32I-NEXT: lw a1, 0(a2)
+; RV32I-NEXT: lw a2, 4(a2)
; RV32I-NEXT: sltu t3, a7, a6
; RV32I-NEXT: mv t4, t3
; RV32I-NEXT: beq t1, t2, .LBB12_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: slt t4, t2, t1
; RV32I-NEXT: .LBB12_2:
-; RV32I-NEXT: sltu a2, a4, a3
-; RV32I-NEXT: sltu t6, a1, a5
-; RV32I-NEXT: mv t0, a2
-; RV32I-NEXT: beq a5, a1, .LBB12_4
+; RV32I-NEXT: sltu a5, a1, a3
+; RV32I-NEXT: sltu t6, a2, a4
+; RV32I-NEXT: mv t0, a5
+; RV32I-NEXT: beq a4, a2, .LBB12_4
; RV32I-NEXT: # %bb.3:
; RV32I-NEXT: mv t0, t6
; RV32I-NEXT: .LBB12_4:
@@ -898,16 +898,16 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: # %bb.5:
; RV32I-NEXT: mv t0, t4
; RV32I-NEXT: .LBB12_6:
-; RV32I-NEXT: mv t5, a2
-; RV32I-NEXT: beq a1, a5, .LBB12_8
+; RV32I-NEXT: mv t5, a5
+; RV32I-NEXT: beq a2, a4, .LBB12_8
; RV32I-NEXT: # %bb.7:
; RV32I-NEXT: mv t5, t6
; RV32I-NEXT: .LBB12_8:
-; RV32I-NEXT: sltu t4, a3, a4
+; RV32I-NEXT: sltu t4, a3, a1
; RV32I-NEXT: mv t6, t4
-; RV32I-NEXT: beq a5, a1, .LBB12_10
+; RV32I-NEXT: beq a4, a2, .LBB12_10
; RV32I-NEXT: # %bb.9:
-; RV32I-NEXT: sltu t6, a5, a1
+; RV32I-NEXT: sltu t6, a4, a2
; RV32I-NEXT: .LBB12_10:
; RV32I-NEXT: bnez t0, .LBB12_12
; RV32I-NEXT: # %bb.11:
@@ -931,29 +931,29 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: add a7, a7, t1
; RV32I-NEXT: bnez t0, .LBB12_15
; RV32I-NEXT: # %bb.14:
-; RV32I-NEXT: sub a1, a1, a5
-; RV32I-NEXT: sub a1, a1, a2
-; RV32I-NEXT: sub a3, a4, a3
+; RV32I-NEXT: sub a2, a2, a4
+; RV32I-NEXT: sub a2, a2, a5
+; RV32I-NEXT: sub a1, a1, a3
; RV32I-NEXT: j .LBB12_16
; RV32I-NEXT: .LBB12_15:
-; RV32I-NEXT: sub a5, a5, a1
-; RV32I-NEXT: sub a1, a5, t4
-; RV32I-NEXT: sub a3, a3, a4
+; RV32I-NEXT: sub a4, a4, a2
+; RV32I-NEXT: sub a2, a4, t4
+; RV32I-NEXT: sub a1, a3, a1
; RV32I-NEXT: .LBB12_16:
-; RV32I-NEXT: or a2, a3, a1
-; RV32I-NEXT: snez a2, a2
+; RV32I-NEXT: or a3, a1, a2
+; RV32I-NEXT: snez a3, a3
; RV32I-NEXT: neg a4, a6
-; RV32I-NEXT: sltu a5, a4, a2
+; RV32I-NEXT: sltu a5, a4, a3
; RV32I-NEXT: neg a6, a7
; RV32I-NEXT: sub a5, a6, a5
-; RV32I-NEXT: snez a6, a3
-; RV32I-NEXT: add a1, a1, a6
+; RV32I-NEXT: snez a6, a1
+; RV32I-NEXT: add a2, a2, a6
+; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: sub a4, a4, a3
; RV32I-NEXT: neg a1, a1
-; RV32I-NEXT: sub a4, a4, a2
-; RV32I-NEXT: neg a2, a3
-; RV32I-NEXT: sw a2, 0(a0)
+; RV32I-NEXT: sw a1, 0(a0)
; RV32I-NEXT: sw a4, 8(a0)
-; RV32I-NEXT: sw a1, 4(a0)
+; RV32I-NEXT: sw a2, 4(a0)
; RV32I-NEXT: sw a5, 12(a0)
; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
@@ -988,23 +988,23 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-LABEL: abd_ext_i128_undef:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a3, 0(a1)
-; RV32ZBB-NEXT: lw a4, 0(a2)
-; RV32ZBB-NEXT: lw a5, 4(a1)
+; RV32ZBB-NEXT: lw a4, 4(a1)
; RV32ZBB-NEXT: lw a6, 8(a1)
-; RV32ZBB-NEXT: lw a7, 8(a2)
; RV32ZBB-NEXT: lw t1, 12(a1)
+; RV32ZBB-NEXT: lw a7, 8(a2)
; RV32ZBB-NEXT: lw t2, 12(a2)
-; RV32ZBB-NEXT: lw a1, 4(a2)
+; RV32ZBB-NEXT: lw a1, 0(a2)
+; RV32ZBB-NEXT: lw a2, 4(a2)
; RV32ZBB-NEXT: sltu t3, a7, a6
; RV32ZBB-NEXT: mv t4, t3
; RV32ZBB-NEXT: beq t1, t2, .LBB12_2
; RV32ZBB-NEXT: # %bb.1:
; RV32ZBB-NEXT: slt t4, t2, t1
; RV32ZBB-NEXT: .LBB12_2:
-; RV32ZBB-NEXT: sltu a2, a4, a3
-; RV32ZBB-NEXT: sltu t6, a1, a5
-; RV32ZBB-NEXT: mv t0, a2
-; RV32ZBB-NEXT: beq a5, a1, .LBB12_4
+; RV32ZBB-NEXT: sltu a5, a1, a3
+; RV32ZBB-NEXT: sltu t6, a2, a4
+; RV32ZBB-NEXT: mv t0, a5
+; RV32ZBB-NEXT: beq a4, a2, .LBB12_4
; RV32ZBB-NEXT: # %bb.3:
; RV32ZBB-NEXT: mv t0, t6
; RV32ZBB-NEXT: .LBB12_4:
@@ -1017,16 +1017,16 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: # %bb.5:
; RV32ZBB-NEXT: mv t0, t4
; RV32ZBB-NEXT: .LBB12_6:
-; RV32ZBB-NEXT: mv t5, a2
-; RV32ZBB-NEXT: beq a1, a5, .LBB12_8
+; RV32ZBB-NEXT: mv t5, a5
+; RV32ZBB-NEXT: beq a2, a4, .LBB12_8
; RV32ZBB-NEXT: # %bb.7:
; RV32ZBB-NEXT: mv t5, t6
; RV32ZBB-NEXT: .LBB12_8:
-; RV32ZBB-NEXT: sltu t4, a3, a4
+; RV32ZBB-NEXT: sltu t4, a3, a1
; RV32ZBB-NEXT: mv t6, t4
-; RV32ZBB-NEXT: beq a5, a1, .LBB12_10
+; RV32ZBB-NEXT: beq a4, a2, .LBB12_10
; RV32ZBB-NEXT: # %bb.9:
-; RV32ZBB-NEXT: sltu t6, a5, a1
+; RV32ZBB-NEXT: sltu t6, a4, a2
; RV32ZBB-NEXT: .LBB12_10:
; RV32ZBB-NEXT: bnez t0, .LBB12_12
; RV32ZBB-NEXT: # %bb.11:
@@ -1050,29 +1050,29 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-NEXT: add a7, a7, t1
; RV32ZBB-NEXT: bnez t0, .LBB12_15
; RV32ZBB-NEXT: # %bb.14:
-; RV32ZBB-NEXT: sub a1, a1, a5
-; RV32ZBB-NEXT: sub a1, a1, a2
-; RV32ZBB-NEXT: sub a3, a4, a3
+; RV32ZBB-NEXT: sub a2, a2, a4
+; RV32ZBB-NEXT: sub a2, a2, a5
+; RV32ZBB-NEXT: sub a1, a1, a3
; RV32ZBB-NEXT: j .LBB12_16
; RV32ZBB-NEXT: .LBB12_15:
-; RV32ZBB-NEXT: sub a5, a5, a1
-; RV32ZBB-NEXT: sub a1, a5, t4
-; RV32ZBB-NEXT: sub a3, a3, a4
+; RV32ZBB-NEXT: sub a4, a4, a2
+; RV32ZBB-NEXT: sub a2, a4, t4
+; RV32ZBB-NEXT: sub a1, a3, a1
; RV32ZBB-NEXT: .LBB12_16:
-; RV32ZBB-NEXT: or a2, a3, a1
-; RV32ZBB-NEXT: snez a2, a2
+; RV32ZBB-NEXT: or a3, a1, a2
+; RV32ZBB-NEXT: snez a3, a3
; RV32ZBB-NEXT: neg a4, a6
-; RV32ZBB-NEXT: sltu a5, a4, a2
+; RV32ZBB-NEXT: sltu a5, a4, a3
; RV32ZBB-NEXT: neg a6, a7
; RV32ZBB-NEXT: sub a5, a6, a5
-; RV32ZBB-NEXT: snez a6, a3
-; RV32ZBB-NEXT: add a1, a1, a6
+; RV32ZBB-NEXT: snez a6, a1
+; RV32ZBB-NEXT: add a2, a2, a6
+; RV32ZBB-NEXT: neg a2, a2
+; RV32ZBB-NEXT: sub a4, a4, a3
; RV32ZBB-NEXT: neg a1, a1
-; RV32ZBB-NEXT: sub a4, a4, a2
-; RV32ZBB-NEXT: neg a2, a3
-; RV32ZBB-NEXT: sw a2, 0(a0)
+; RV32ZBB-NEXT: sw a1, 0(a0)
; RV32ZBB-NEXT: sw a4, 8(a0)
-; RV32ZBB-NEXT: sw a1, 4(a0)
+; RV32ZBB-NEXT: sw a2, 4(a0)
; RV32ZBB-NEXT: sw a5, 12(a0)
; RV32ZBB-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZBB-NEXT: addi sp, sp, 16
@@ -1383,10 +1383,10 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_minmax_i128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a6, 4(a2)
-; RV32I-NEXT: lw a3, 4(a1)
; RV32I-NEXT: lw a7, 8(a2)
; RV32I-NEXT: lw t0, 12(a2)
; RV32I-NEXT: lw a5, 12(a1)
+; RV32I-NEXT: lw a3, 4(a1)
; RV32I-NEXT: lw a4, 8(a1)
; RV32I-NEXT: beq a5, t0, .LBB17_2
; RV32I-NEXT: # %bb.1:
@@ -1510,10 +1510,10 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-LABEL: abd_minmax_i128:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a6, 4(a2)
-; RV32ZBB-NEXT: lw a3, 4(a1)
; RV32ZBB-NEXT: lw a7, 8(a2)
; RV32ZBB-NEXT: lw t0, 12(a2)
; RV32ZBB-NEXT: lw a5, 12(a1)
+; RV32ZBB-NEXT: lw a3, 4(a1)
; RV32ZBB-NEXT: lw a4, 8(a1)
; RV32ZBB-NEXT: beq a5, t0, .LBB17_2
; RV32ZBB-NEXT: # %bb.1:
@@ -1861,67 +1861,67 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_cmp_i128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a3, 0(a2)
-; RV32I-NEXT: lw a4, 0(a1)
-; RV32I-NEXT: lw a5, 4(a2)
-; RV32I-NEXT: lw a6, 8(a2)
-; RV32I-NEXT: lw a7, 8(a1)
-; RV32I-NEXT: lw a2, 12(a2)
+; RV32I-NEXT: lw a4, 4(a2)
+; RV32I-NEXT: lw a5, 8(a2)
+; RV32I-NEXT: lw a7, 12(a2)
+; RV32I-NEXT: lw a6, 8(a1)
; RV32I-NEXT: lw t0, 12(a1)
+; RV32I-NEXT: lw a2, 0(a1)
; RV32I-NEXT: lw a1, 4(a1)
-; RV32I-NEXT: sltu t1, a7, a6
+; RV32I-NEXT: sltu t1, a6, a5
; RV32I-NEXT: mv t4, t1
-; RV32I-NEXT: beq t0, a2, .LBB22_2
+; RV32I-NEXT: beq t0, a7, .LBB22_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: slt t4, t0, a2
+; RV32I-NEXT: slt t4, t0, a7
; RV32I-NEXT: .LBB22_2:
-; RV32I-NEXT: sltu t2, a4, a3
+; RV32I-NEXT: sltu t2, a2, a3
; RV32I-NEXT: mv t3, t2
-; RV32I-NEXT: beq a1, a5, .LBB22_4
+; RV32I-NEXT: beq a1, a4, .LBB22_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: sltu t3, a1, a5
+; RV32I-NEXT: sltu t3, a1, a4
; RV32I-NEXT: .LBB22_4:
-; RV32I-NEXT: xor t5, t0, a2
-; RV32I-NEXT: xor t6, a7, a6
+; RV32I-NEXT: xor t5, t0, a7
+; RV32I-NEXT: xor t6, a6, a5
; RV32I-NEXT: or t5, t6, t5
; RV32I-NEXT: mv t6, t3
; RV32I-NEXT: beqz t5, .LBB22_6
; RV32I-NEXT: # %bb.5:
; RV32I-NEXT: mv t6, t4
; RV32I-NEXT: .LBB22_6:
-; RV32I-NEXT: sltu t4, a3, a4
+; RV32I-NEXT: sltu t4, a3, a2
; RV32I-NEXT: mv t5, t4
-; RV32I-NEXT: beq a1, a5, .LBB22_8
+; RV32I-NEXT: beq a1, a4, .LBB22_8
; RV32I-NEXT: # %bb.7:
-; RV32I-NEXT: sltu t5, a5, a1
+; RV32I-NEXT: sltu t5, a4, a1
; RV32I-NEXT: .LBB22_8:
; RV32I-NEXT: bnez t6, .LBB22_10
; RV32I-NEXT: # %bb.9:
-; RV32I-NEXT: sltu t1, a6, a7
-; RV32I-NEXT: sub a2, a2, t0
-; RV32I-NEXT: sub a2, a2, t1
-; RV32I-NEXT: sub a6, a6, a7
-; RV32I-NEXT: sltu a7, a6, t5
-; RV32I-NEXT: sub a2, a2, a7
+; RV32I-NEXT: sltu t1, a5, a6
+; RV32I-NEXT: sub a7, a7, t0
+; RV32I-NEXT: sub a7, a7, t1
+; RV32I-NEXT: sub a6, a5, a6
+; RV32I-NEXT: sltu a5, a6, t5
+; RV32I-NEXT: sub a5, a7, a5
; RV32I-NEXT: sub a6, a6, t5
-; RV32I-NEXT: sub a5, a5, a1
-; RV32I-NEXT: sub a1, a5, t4
-; RV32I-NEXT: sub a3, a3, a4
+; RV32I-NEXT: sub a4, a4, a1
+; RV32I-NEXT: sub a1, a4, t4
+; RV32I-NEXT: sub a2, a3, a2
; RV32I-NEXT: j .LBB22_11
; RV32I-NEXT: .LBB22_10:
-; RV32I-NEXT: sub a2, t0, a2
-; RV32I-NEXT: sub a6, a7, a6
-; RV32I-NEXT: sub a2, a2, t1
+; RV32I-NEXT: sub a7, t0, a7
+; RV32I-NEXT: sub a6, a6, a5
+; RV32I-NEXT: sub a5, a7, t1
; RV32I-NEXT: sltu a7, a6, t3
-; RV32I-NEXT: sub a1, a1, a5
-; RV32I-NEXT: sub a2, a2, a7
+; RV32I-NEXT: sub a1, a1, a4
+; RV32I-NEXT: sub a5, a5, a7
; RV32I-NEXT: sub a6, a6, t3
; RV32I-NEXT: sub a1, a1, t2
-; RV32I-NEXT: sub a3, a4, a3
+; RV32I-NEXT: sub a2, a2, a3
; RV32I-NEXT: .LBB22_11:
; RV32I-NEXT: sw a6, 8(a0)
; RV32I-NEXT: sw a1, 4(a0)
-; RV32I-NEXT: sw a3, 0(a0)
-; RV32I-NEXT: sw a2, 12(a0)
+; RV32I-NEXT: sw a2, 0(a0)
+; RV32I-NEXT: sw a5, 12(a0)
; RV32I-NEXT: ret
;
; RV64I-LABEL: abd_cmp_i128:
@@ -1948,67 +1948,67 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-LABEL: abd_cmp_i128:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a3, 0(a2)
-; RV32ZBB-NEXT: lw a4, 0(a1)
-; RV32ZBB-NEXT: lw a5, 4(a2)
-; RV32ZBB-NEXT: lw a6, 8(a2)
-; RV32ZBB-NEXT: lw a7, 8(a1)
-; RV32ZBB-NEXT: lw a2, 12(a2)
+; RV32ZBB-NEXT: lw a4, 4(a2)
+; RV32ZBB-NEXT: lw a5, 8(a2)
+; RV32ZBB-NEXT: lw a7, 12(a2)
+; RV32ZBB-NEXT: lw a6, 8(a1)
; RV32ZBB-NEXT: lw t0, 12(a1)
+; RV32ZBB-NEXT: lw a2, 0(a1)
; RV32ZBB-NEXT: lw a1, 4(a1)
-; RV32ZBB-NEXT: sltu t1, a7, a6
+; RV32ZBB-NEXT: sltu t1, a6, a5
; RV32ZBB-NEXT: mv t4, t1
-; RV32ZBB-NEXT: beq t0, a2, .LBB22_2
+; RV32ZBB-NEXT: beq t0, a7, .LBB22_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: slt t4, t0, a2
+; RV32ZBB-NEXT: slt t4, t0, a7
; RV32ZBB-NEXT: .LBB22_2:
-; RV32ZBB-NEXT: sltu t2, a4, a3
+; RV32ZBB-NEXT: sltu t2, a2, a3
; RV32ZBB-NEXT: mv t3, t2
-; RV32ZBB-NEXT: beq a1, a5, .LBB22_4
+; RV32ZBB-NEXT: beq a1, a4, .LBB22_4
; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: sltu t3, a1, a5
+; RV32ZBB-NEXT: sltu t3, a1, a4
; RV32ZBB-NEXT: .LBB22_4:
-; RV32ZBB-NEXT: xor t5, t0, a2
-; RV32ZBB-NEXT: xor t6, a7, a6
+; RV32ZBB-NEXT: xor t5, t0, a7
+; RV32ZBB-NEXT: xor t6, a6, a5
; RV32ZBB-NEXT: or t5, t6, t5
; RV32ZBB-NEXT: mv t6, t3
; RV32ZBB-NEXT: beqz t5, .LBB22_6
; RV32ZBB-NEXT: # %bb.5:
; RV32ZBB-NEXT: mv t6, t4
; RV32ZBB-NEXT: .LBB22_6:
-; RV32ZBB-NEXT: sltu t4, a3, a4
+; RV32ZBB-NEXT: sltu t4, a3, a2
; RV32ZBB-NEXT: mv t5, t4
-; RV32ZBB-NEXT: beq a1, a5, .LBB22_8
+; RV32ZBB-NEXT: beq a1, a4, .LBB22_8
; RV32ZBB-NEXT: # %bb.7:
-; RV32ZBB-NEXT: sltu t5, a5, a1
+; RV32ZBB-NEXT: sltu t5, a4, a1
; RV32ZBB-NEXT: .LBB22_8:
; RV32ZBB-NEXT: bnez t6, .LBB22_10
; RV32ZBB-NEXT: # %bb.9:
-; RV32ZBB-NEXT: sltu t1, a6, a7
-; RV32ZBB-NEXT: sub a2, a2, t0
-; RV32ZBB-NEXT: sub a2, a2, t1
-; RV32ZBB-NEXT: sub a6, a6, a7
-; RV32ZBB-NEXT: sltu a7, a6, t5
-; RV32ZBB-NEXT: sub a2, a2, a7
+; RV32ZBB-NEXT: sltu t1, a5, a6
+; RV32ZBB-NEXT: sub a7, a7, t0
+; RV32ZBB-NEXT: sub a7, a7, t1
+; RV32ZBB-NEXT: sub a6, a5, a6
+; RV32ZBB-NEXT: sltu a5, a6, t5
+; RV32ZBB-NEXT: sub a5, a7, a5
; RV32ZBB-NEXT: sub a6, a6, t5
-; RV32ZBB-NEXT: sub a5, a5, a1
-; RV32ZBB-NEXT: sub a1, a5, t4
-; RV32ZBB-NEXT: sub a3, a3, a4
+; RV32ZBB-NEXT: sub a4, a4, a1
+; RV32ZBB-NEXT: sub a1, a4, t4
+; RV32ZBB-NEXT: sub a2, a3, a2
; RV32ZBB-NEXT: j .LBB22_11
; RV32ZBB-NEXT: .LBB22_10:
-; RV32ZBB-NEXT: sub a2, t0, a2
-; RV32ZBB-NEXT: sub a6, a7, a6
-; RV32ZBB-NEXT: sub a2, a2, t1
+; RV32ZBB-NEXT: sub a7, t0, a7
+; RV32ZBB-NEXT: sub a6, a6, a5
+; RV32ZBB-NEXT: sub a5, a7, t1
; RV32ZBB-NEXT: sltu a7, a6, t3
-; RV32ZBB-NEXT: sub a1, a1, a5
-; RV32ZBB-NEXT: sub a2, a2, a7
+; RV32ZBB-NEXT: sub a1, a1, a4
+; RV32ZBB-NEXT: sub a5, a5, a7
; RV32ZBB-NEXT: sub a6, a6, t3
; RV32ZBB-NEXT: sub a1, a1, t2
-; RV32ZBB-NEXT: sub a3, a4, a3
+; RV32ZBB-NEXT: sub a2, a2, a3
; RV32ZBB-NEXT: .LBB22_11:
; RV32ZBB-NEXT: sw a6, 8(a0)
; RV32ZBB-NEXT: sw a1, 4(a0)
-; RV32ZBB-NEXT: sw a3, 0(a0)
-; RV32ZBB-NEXT: sw a2, 12(a0)
+; RV32ZBB-NEXT: sw a2, 0(a0)
+; RV32ZBB-NEXT: sw a5, 12(a0)
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: abd_cmp_i128:
@@ -2390,31 +2390,31 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_subnsw_i128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a3, 0(a2)
-; RV32I-NEXT: lw a4, 0(a1)
-; RV32I-NEXT: lw a7, 12(a2)
+; RV32I-NEXT: lw a4, 4(a2)
; RV32I-NEXT: lw a5, 8(a2)
+; RV32I-NEXT: lw a7, 12(a2)
; RV32I-NEXT: lw a6, 8(a1)
; RV32I-NEXT: lw t0, 12(a1)
-; RV32I-NEXT: lw a2, 4(a2)
+; RV32I-NEXT: lw a2, 0(a1)
; RV32I-NEXT: lw a1, 4(a1)
; RV32I-NEXT: sltu t1, a6, a5
; RV32I-NEXT: sub t0, t0, a7
-; RV32I-NEXT: sltu a7, a4, a3
+; RV32I-NEXT: sltu a7, a2, a3
; RV32I-NEXT: sub t1, t0, t1
; RV32I-NEXT: mv t0, a7
-; RV32I-NEXT: beq a1, a2, .LBB31_2
+; RV32I-NEXT: beq a1, a4, .LBB31_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu t0, a1, a2
+; RV32I-NEXT: sltu t0, a1, a4
; RV32I-NEXT: .LBB31_2:
; RV32I-NEXT: sub a5, a6, a5
; RV32I-NEXT: sltu a6, a5, t0
; RV32I-NEXT: sub a6, t1, a6
-; RV32I-NEXT: sub a1, a1, a2
+; RV32I-NEXT: sub a1, a1, a4
; RV32I-NEXT: sub t1, a1, a7
-; RV32I-NEXT: sub a2, a5, t0
-; RV32I-NEXT: sub a3, a4, a3
+; RV32I-NEXT: sub a4, a5, t0
+; RV32I-NEXT: sub a3, a2, a3
; RV32I-NEXT: srai a1, a6, 31
-; RV32I-NEXT: xor a2, a2, a1
+; RV32I-NEXT: xor a2, a4, a1
; RV32I-NEXT: sltu a4, a1, a2
; RV32I-NEXT: xor a5, a6, a1
; RV32I-NEXT: sub a5, a1, a5
@@ -2458,31 +2458,31 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-LABEL: abd_subnsw_i128:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a3, 0(a2)
-; RV32ZBB-NEXT: lw a4, 0(a1)
-; RV32ZBB-NEXT: lw a7, 12(a2)
+; RV32ZBB-NEXT: lw a4, 4(a2)
; RV32ZBB-NEXT: lw a5, 8(a2)
+; RV32ZBB-NEXT: lw a7, 12(a2)
; RV32ZBB-NEXT: lw a6, 8(a1)
; RV32ZBB-NEXT: lw t0, 12(a1)
-; RV32ZBB-NEXT: lw a2, 4(a2)
+; RV32ZBB-NEXT: lw a2, 0(a1)
; RV32ZBB-NEXT: lw a1, 4(a1)
; RV32ZBB-NEXT: sltu t1, a6, a5
; RV32ZBB-NEXT: sub t0, t0, a7
-; RV32ZBB-NEXT: sltu a7, a4, a3
+; RV32ZBB-NEXT: sltu a7, a2, a3
; RV32ZBB-NEXT: sub t1, t0, t1
; RV32ZBB-NEXT: mv t0, a7
-; RV32ZBB-NEXT: beq a1, a2, .LBB31_2
+; RV32ZBB-NEXT: beq a1, a4, .LBB31_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu t0, a1, a2
+; RV32ZBB-NEXT: sltu t0, a1, a4
; RV32ZBB-NEXT: .LBB31_2:
; RV32ZBB-NEXT: sub a5, a6, a5
; RV32ZBB-NEXT: sltu a6, a5, t0
; RV32ZBB-NEXT: sub a6, t1, a6
-; RV32ZBB-NEXT: sub a1, a1, a2
+; RV32ZBB-NEXT: sub a1, a1, a4
; RV32ZBB-NEXT: sub t1, a1, a7
-; RV32ZBB-NEXT: sub a2, a5, t0
-; RV32ZBB-NEXT: sub a3, a4, a3
+; RV32ZBB-NEXT: sub a4, a5, t0
+; RV32ZBB-NEXT: sub a3, a2, a3
; RV32ZBB-NEXT: srai a1, a6, 31
-; RV32ZBB-NEXT: xor a2, a2, a1
+; RV32ZBB-NEXT: xor a2, a4, a1
; RV32ZBB-NEXT: sltu a4, a1, a2
; RV32ZBB-NEXT: xor a5, a6, a1
; RV32ZBB-NEXT: sub a5, a1, a5
@@ -2532,31 +2532,31 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_subnsw_i128_undef:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a3, 0(a2)
-; RV32I-NEXT: lw a4, 0(a1)
-; RV32I-NEXT: lw a7, 12(a2)
+; RV32I-NEXT: lw a4, 4(a2)
; RV32I-NEXT: lw a5, 8(a2)
+; RV32I-NEXT: lw a7, 12(a2)
; RV32I-NEXT: lw a6, 8(a1)
; RV32I-NEXT: lw t0, 12(a1)
-; RV32I-NEXT: lw a2, 4(a2)
+; RV32I-NEXT: lw a2, 0(a1)
; RV32I-NEXT: lw a1, 4(a1)
; RV32I-NEXT: sltu t1, a6, a5
; RV32I-NEXT: sub t0, t0, a7
-; RV32I-NEXT: sltu a7, a4, a3
+; RV32I-NEXT: sltu a7, a2, a3
; RV32I-NEXT: sub t1, t0, t1
; RV32I-NEXT: mv t0, a7
-; RV32I-NEXT: beq a1, a2, .LBB32_2
+; RV32I-NEXT: beq a1, a4, .LBB32_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu t0, a1, a2
+; RV32I-NEXT: sltu t0, a1, a4
; RV32I-NEXT: .LBB32_2:
; RV32I-NEXT: sub a5, a6, a5
; RV32I-NEXT: sltu a6, a5, t0
; RV32I-NEXT: sub a6, t1, a6
-; RV32I-NEXT: sub a1, a1, a2
+; RV32I-NEXT: sub a1, a1, a4
; RV32I-NEXT: sub t1, a1, a7
-; RV32I-NEXT: sub a2, a5, t0
-; RV32I-NEXT: sub a3, a4, a3
+; RV32I-NEXT: sub a4, a5, t0
+; RV32I-NEXT: sub a3, a2, a3
; RV32I-NEXT: srai a1, a6, 31
-; RV32I-NEXT: xor a2, a2, a1
+; RV32I-NEXT: xor a2, a4, a1
; RV32I-NEXT: sltu a4, a1, a2
; RV32I-NEXT: xor a5, a6, a1
; RV32I-NEXT: sub a5, a1, a5
@@ -2600,31 +2600,31 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-LABEL: abd_subnsw_i128_undef:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a3, 0(a2)
-; RV32ZBB-NEXT: lw a4, 0(a1)
-; RV32ZBB-NEXT: lw a7, 12(a2)
+; RV32ZBB-NEXT: lw a4, 4(a2)
; RV32ZBB-NEXT: lw a5, 8(a2)
+; RV32ZBB-NEXT: lw a7, 12(a2)
; RV32ZBB-NEXT: lw a6, 8(a1)
; RV32ZBB-NEXT: lw t0, 12(a1)
-; RV32ZBB-NEXT: lw a2, 4(a2)
+; RV32ZBB-NEXT: lw a2, 0(a1)
; RV32ZBB-NEXT: lw a1, 4(a1)
; RV32ZBB-NEXT: sltu t1, a6, a5
; RV32ZBB-NEXT: sub t0, t0, a7
-; RV32ZBB-NEXT: sltu a7, a4, a3
+; RV32ZBB-NEXT: sltu a7, a2, a3
; RV32ZBB-NEXT: sub t1, t0, t1
; RV32ZBB-NEXT: mv t0, a7
-; RV32ZBB-NEXT: beq a1, a2, .LBB32_2
+; RV32ZBB-NEXT: beq a1, a4, .LBB32_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu t0, a1, a2
+; RV32ZBB-NEXT: sltu t0, a1, a4
; RV32ZBB-NEXT: .LBB32_2:
; RV32ZBB-NEXT: sub a5, a6, a5
; RV32ZBB-NEXT: sltu a6, a5, t0
; RV32ZBB-NEXT: sub a6, t1, a6
-; RV32ZBB-NEXT: sub a1, a1, a2
+; RV32ZBB-NEXT: sub a1, a1, a4
; RV32ZBB-NEXT: sub t1, a1, a7
-; RV32ZBB-NEXT: sub a2, a5, t0
-; RV32ZBB-NEXT: sub a3, a4, a3
+; RV32ZBB-NEXT: sub a4, a5, t0
+; RV32ZBB-NEXT: sub a3, a2, a3
; RV32ZBB-NEXT: srai a1, a6, 31
-; RV32ZBB-NEXT: xor a2, a2, a1
+; RV32ZBB-NEXT: xor a2, a4, a1
; RV32ZBB-NEXT: sltu a4, a1, a2
; RV32ZBB-NEXT: xor a5, a6, a1
; RV32ZBB-NEXT: sub a5, a1, a5
diff --git a/llvm/test/CodeGen/RISCV/abds.ll b/llvm/test/CodeGen/RISCV/abds.ll
index 919214b0e9a8dd..91b044902a5201 100644
--- a/llvm/test/CodeGen/RISCV/abds.ll
+++ b/llvm/test/CodeGen/RISCV/abds.ll
@@ -535,12 +535,12 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_ext_i128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw a5, 0(a2)
; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw a6, 8(a1)
-; RV32I-NEXT: lw a7, 8(a2)
; RV32I-NEXT: lw t0, 12(a1)
+; RV32I-NEXT: lw a7, 8(a2)
; RV32I-NEXT: lw t1, 12(a2)
+; RV32I-NEXT: lw a5, 0(a2)
; RV32I-NEXT: lw a1, 4(a2)
; RV32I-NEXT: sltu a2, a7, a6
; RV32I-NEXT: mv t4, a2
@@ -631,12 +631,12 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-LABEL: abd_ext_i128:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a3, 0(a1)
-; RV32ZBB-NEXT: lw a5, 0(a2)
; RV32ZBB-NEXT: lw a4, 4(a1)
; RV32ZBB-NEXT: lw a6, 8(a1)
-; RV32ZBB-NEXT: lw a7, 8(a2)
; RV32ZBB-NEXT: lw t0, 12(a1)
+; RV32ZBB-NEXT: lw a7, 8(a2)
; RV32ZBB-NEXT: lw t1, 12(a2)
+; RV32ZBB-NEXT: lw a5, 0(a2)
; RV32ZBB-NEXT: lw a1, 4(a2)
; RV32ZBB-NEXT: sltu a2, a7, a6
; RV32ZBB-NEXT: mv t4, a2
@@ -735,12 +735,12 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_ext_i128_undef:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw a5, 0(a2)
; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw a6, 8(a1)
-; RV32I-NEXT: lw a7, 8(a2)
; RV32I-NEXT: lw t0, 12(a1)
+; RV32I-NEXT: lw a7, 8(a2)
; RV32I-NEXT: lw t1, 12(a2)
+; RV32I-NEXT: lw a5, 0(a2)
; RV32I-NEXT: lw a1, 4(a2)
; RV32I-NEXT: sltu a2, a7, a6
; RV32I-NEXT: mv t4, a2
@@ -831,12 +831,12 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-LABEL: abd_ext_i128_undef:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a3, 0(a1)
-; RV32ZBB-NEXT: lw a5, 0(a2)
; RV32ZBB-NEXT: lw a4, 4(a1)
; RV32ZBB-NEXT: lw a6, 8(a1)
-; RV32ZBB-NEXT: lw a7, 8(a2)
; RV32ZBB-NEXT: lw t0, 12(a1)
+; RV32ZBB-NEXT: lw a7, 8(a2)
; RV32ZBB-NEXT: lw t1, 12(a2)
+; RV32ZBB-NEXT: lw a5, 0(a2)
; RV32ZBB-NEXT: lw a1, 4(a2)
; RV32ZBB-NEXT: sltu a2, a7, a6
; RV32ZBB-NEXT: mv t4, a2
@@ -1124,12 +1124,12 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_minmax_i128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw a5, 0(a2)
; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw a6, 8(a1)
-; RV32I-NEXT: lw a7, 8(a2)
; RV32I-NEXT: lw t0, 12(a1)
+; RV32I-NEXT: lw a7, 8(a2)
; RV32I-NEXT: lw t1, 12(a2)
+; RV32I-NEXT: lw a5, 0(a2)
; RV32I-NEXT: lw a1, 4(a2)
; RV32I-NEXT: sltu a2, a7, a6
; RV32I-NEXT: mv t4, a2
@@ -1220,12 +1220,12 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-LABEL: abd_minmax_i128:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a3, 0(a1)
-; RV32ZBB-NEXT: lw a5, 0(a2)
; RV32ZBB-NEXT: lw a4, 4(a1)
; RV32ZBB-NEXT: lw a6, 8(a1)
-; RV32ZBB-NEXT: lw a7, 8(a2)
; RV32ZBB-NEXT: lw t0, 12(a1)
+; RV32ZBB-NEXT: lw a7, 8(a2)
; RV32ZBB-NEXT: lw t1, 12(a2)
+; RV32ZBB-NEXT: lw a5, 0(a2)
; RV32ZBB-NEXT: lw a1, 4(a2)
; RV32ZBB-NEXT: sltu a2, a7, a6
; RV32ZBB-NEXT: mv t4, a2
@@ -1515,12 +1515,12 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_cmp_i128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw a5, 0(a2)
; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw a6, 8(a1)
-; RV32I-NEXT: lw a7, 8(a2)
; RV32I-NEXT: lw t0, 12(a1)
+; RV32I-NEXT: lw a7, 8(a2)
; RV32I-NEXT: lw t1, 12(a2)
+; RV32I-NEXT: lw a5, 0(a2)
; RV32I-NEXT: lw a1, 4(a2)
; RV32I-NEXT: sltu a2, a7, a6
; RV32I-NEXT: mv t4, a2
@@ -1611,12 +1611,12 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-LABEL: abd_cmp_i128:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a3, 0(a1)
-; RV32ZBB-NEXT: lw a5, 0(a2)
; RV32ZBB-NEXT: lw a4, 4(a1)
; RV32ZBB-NEXT: lw a6, 8(a1)
-; RV32ZBB-NEXT: lw a7, 8(a2)
; RV32ZBB-NEXT: lw t0, 12(a1)
+; RV32ZBB-NEXT: lw a7, 8(a2)
; RV32ZBB-NEXT: lw t1, 12(a2)
+; RV32ZBB-NEXT: lw a5, 0(a2)
; RV32ZBB-NEXT: lw a1, 4(a2)
; RV32ZBB-NEXT: sltu a2, a7, a6
; RV32ZBB-NEXT: mv t4, a2
@@ -2044,28 +2044,28 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_subnsw_i128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a3, 0(a2)
-; RV32I-NEXT: lw a5, 0(a1)
-; RV32I-NEXT: lw t1, 12(a2)
-; RV32I-NEXT: lw a7, 8(a2)
-; RV32I-NEXT: lw t0, 8(a1)
-; RV32I-NEXT: lw t2, 12(a1)
; RV32I-NEXT: lw a4, 4(a2)
-; RV32I-NEXT: lw a6, 4(a1)
-; RV32I-NEXT: sltu a1, t0, a7
-; RV32I-NEXT: sub a2, t2, t1
-; RV32I-NEXT: sltu t1, a5, a3
-; RV32I-NEXT: sub a1, a2, a1
-; RV32I-NEXT: mv a2, t1
-; RV32I-NEXT: beq a6, a4, .LBB31_2
+; RV32I-NEXT: lw a6, 8(a2)
+; RV32I-NEXT: lw t0, 12(a2)
+; RV32I-NEXT: lw a2, 8(a1)
+; RV32I-NEXT: lw t1, 12(a1)
+; RV32I-NEXT: lw a5, 0(a1)
+; RV32I-NEXT: lw a7, 4(a1)
+; RV32I-NEXT: sltu a1, a2, a6
+; RV32I-NEXT: sub t1, t1, t0
+; RV32I-NEXT: sltu t0, a5, a3
+; RV32I-NEXT: sub a1, t1, a1
+; RV32I-NEXT: mv t1, t0
+; RV32I-NEXT: beq a7, a4, .LBB31_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu a2, a6, a4
+; RV32I-NEXT: sltu t1, a7, a4
; RV32I-NEXT: .LBB31_2:
-; RV32I-NEXT: sub a7, t0, a7
-; RV32I-NEXT: sltu t0, a7, a2
-; RV32I-NEXT: sub a1, a1, t0
-; RV32I-NEXT: sub a2, a7, a2
-; RV32I-NEXT: sub a4, a6, a4
-; RV32I-NEXT: sub a4, a4, t1
+; RV32I-NEXT: sub a2, a2, a6
+; RV32I-NEXT: sltu a6, a2, t1
+; RV32I-NEXT: sub a1, a1, a6
+; RV32I-NEXT: sub a2, a2, t1
+; RV32I-NEXT: sub a4, a7, a4
+; RV32I-NEXT: sub a4, a4, t0
; RV32I-NEXT: sub a3, a5, a3
; RV32I-NEXT: bgez a1, .LBB31_4
; RV32I-NEXT: # %bb.3:
@@ -2107,28 +2107,28 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-LABEL: abd_subnsw_i128:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a3, 0(a2)
-; RV32ZBB-NEXT: lw a5, 0(a1)
-; RV32ZBB-NEXT: lw t1, 12(a2)
-; RV32ZBB-NEXT: lw a7, 8(a2)
-; RV32ZBB-NEXT: lw t0, 8(a1)
-; RV32ZBB-NEXT: lw t2, 12(a1)
; RV32ZBB-NEXT: lw a4, 4(a2)
-; RV32ZBB-NEXT: lw a6, 4(a1)
-; RV32ZBB-NEXT: sltu a1, t0, a7
-; RV32ZBB-NEXT: sub a2, t2, t1
-; RV32ZBB-NEXT: sltu t1, a5, a3
-; RV32ZBB-NEXT: sub a1, a2, a1
-; RV32ZBB-NEXT: mv a2, t1
-; RV32ZBB-NEXT: beq a6, a4, .LBB31_2
+; RV32ZBB-NEXT: lw a6, 8(a2)
+; RV32ZBB-NEXT: lw t0, 12(a2)
+; RV32ZBB-NEXT: lw a2, 8(a1)
+; RV32ZBB-NEXT: lw t1, 12(a1)
+; RV32ZBB-NEXT: lw a5, 0(a1)
+; RV32ZBB-NEXT: lw a7, 4(a1)
+; RV32ZBB-NEXT: sltu a1, a2, a6
+; RV32ZBB-NEXT: sub t1, t1, t0
+; RV32ZBB-NEXT: sltu t0, a5, a3
+; RV32ZBB-NEXT: sub a1, t1, a1
+; RV32ZBB-NEXT: mv t1, t0
+; RV32ZBB-NEXT: beq a7, a4, .LBB31_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu a2, a6, a4
+; RV32ZBB-NEXT: sltu t1, a7, a4
; RV32ZBB-NEXT: .LBB31_2:
-; RV32ZBB-NEXT: sub a7, t0, a7
-; RV32ZBB-NEXT: sltu t0, a7, a2
-; RV32ZBB-NEXT: sub a1, a1, t0
-; RV32ZBB-NEXT: sub a2, a7, a2
-; RV32ZBB-NEXT: sub a4, a6, a4
-; RV32ZBB-NEXT: sub a4, a4, t1
+; RV32ZBB-NEXT: sub a2, a2, a6
+; RV32ZBB-NEXT: sltu a6, a2, t1
+; RV32ZBB-NEXT: sub a1, a1, a6
+; RV32ZBB-NEXT: sub a2, a2, t1
+; RV32ZBB-NEXT: sub a4, a7, a4
+; RV32ZBB-NEXT: sub a4, a4, t0
; RV32ZBB-NEXT: sub a3, a5, a3
; RV32ZBB-NEXT: bgez a1, .LBB31_4
; RV32ZBB-NEXT: # %bb.3:
@@ -2175,28 +2175,28 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_subnsw_i128_undef:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a3, 0(a2)
-; RV32I-NEXT: lw a5, 0(a1)
-; RV32I-NEXT: lw t1, 12(a2)
-; RV32I-NEXT: lw a7, 8(a2)
-; RV32I-NEXT: lw t0, 8(a1)
-; RV32I-NEXT: lw t2, 12(a1)
; RV32I-NEXT: lw a4, 4(a2)
-; RV32I-NEXT: lw a6, 4(a1)
-; RV32I-NEXT: sltu a1, t0, a7
-; RV32I-NEXT: sub a2, t2, t1
-; RV32I-NEXT: sltu t1, a5, a3
-; RV32I-NEXT: sub a1, a2, a1
-; RV32I-NEXT: mv a2, t1
-; RV32I-NEXT: beq a6, a4, .LBB32_2
+; RV32I-NEXT: lw a6, 8(a2)
+; RV32I-NEXT: lw t0, 12(a2)
+; RV32I-NEXT: lw a2, 8(a1)
+; RV32I-NEXT: lw t1, 12(a1)
+; RV32I-NEXT: lw a5, 0(a1)
+; RV32I-NEXT: lw a7, 4(a1)
+; RV32I-NEXT: sltu a1, a2, a6
+; RV32I-NEXT: sub t1, t1, t0
+; RV32I-NEXT: sltu t0, a5, a3
+; RV32I-NEXT: sub a1, t1, a1
+; RV32I-NEXT: mv t1, t0
+; RV32I-NEXT: beq a7, a4, .LBB32_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu a2, a6, a4
+; RV32I-NEXT: sltu t1, a7, a4
; RV32I-NEXT: .LBB32_2:
-; RV32I-NEXT: sub a7, t0, a7
-; RV32I-NEXT: sltu t0, a7, a2
-; RV32I-NEXT: sub a1, a1, t0
-; RV32I-NEXT: sub a2, a7, a2
-; RV32I-NEXT: sub a4, a6, a4
-; RV32I-NEXT: sub a4, a4, t1
+; RV32I-NEXT: sub a2, a2, a6
+; RV32I-NEXT: sltu a6, a2, t1
+; RV32I-NEXT: sub a1, a1, a6
+; RV32I-NEXT: sub a2, a2, t1
+; RV32I-NEXT: sub a4, a7, a4
+; RV32I-NEXT: sub a4, a4, t0
; RV32I-NEXT: sub a3, a5, a3
; RV32I-NEXT: bgez a1, .LBB32_4
; RV32I-NEXT: # %bb.3:
@@ -2238,28 +2238,28 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind {
; RV32ZBB-LABEL: abd_subnsw_i128_undef:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a3, 0(a2)
-; RV32ZBB-NEXT: lw a5, 0(a1)
-; RV32ZBB-NEXT: lw t1, 12(a2)
-; RV32ZBB-NEXT: lw a7, 8(a2)
-; RV32ZBB-NEXT: lw t0, 8(a1)
-; RV32ZBB-NEXT: lw t2, 12(a1)
; RV32ZBB-NEXT: lw a4, 4(a2)
-; RV32ZBB-NEXT: lw a6, 4(a1)
-; RV32ZBB-NEXT: sltu a1, t0, a7
-; RV32ZBB-NEXT: sub a2, t2, t1
-; RV32ZBB-NEXT: sltu t1, a5, a3
-; RV32ZBB-NEXT: sub a1, a2, a1
-; RV32ZBB-NEXT: mv a2, t1
-; RV32ZBB-NEXT: beq a6, a4, .LBB32_2
+; RV32ZBB-NEXT: lw a6, 8(a2)
+; RV32ZBB-NEXT: lw t0, 12(a2)
+; RV32ZBB-NEXT: lw a2, 8(a1)
+; RV32ZBB-NEXT: lw t1, 12(a1)
+; RV32ZBB-NEXT: lw a5, 0(a1)
+; RV32ZBB-NEXT: lw a7, 4(a1)
+; RV32ZBB-NEXT: sltu a1, a2, a6
+; RV32ZBB-NEXT: sub t1, t1, t0
+; RV32ZBB-NEXT: sltu t0, a5, a3
+; RV32ZBB-NEXT: sub a1, t1, a1
+; RV32ZBB-NEXT: mv t1, t0
+; RV32ZBB-NEXT: beq a7, a4, .LBB32_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu a2, a6, a4
+; RV32ZBB-NEXT: sltu t1, a7, a4
; RV32ZBB-NEXT: .LBB32_2:
-; RV32ZBB-NEXT: sub a7, t0, a7
-; RV32ZBB-NEXT: sltu t0, a7, a2
-; RV32ZBB-NEXT: sub a1, a1, t0
-; RV32ZBB-NEXT: sub a2, a7, a2
-; RV32ZBB-NEXT: sub a4, a6, a4
-; RV32ZBB-NEXT: sub a4, a4, t1
+; RV32ZBB-NEXT: sub a2, a2, a6
+; RV32ZBB-NEXT: sltu a6, a2, t1
+; RV32ZBB-NEXT: sub a1, a1, a6
+; RV32ZBB-NEXT: sub a2, a2, t1
+; RV32ZBB-NEXT: sub a4, a7, a4
+; RV32ZBB-NEXT: sub a4, a4, t0
; RV32ZBB-NEXT: sub a3, a5, a3
; RV32ZBB-NEXT: bgez a1, .LBB32_4
; RV32ZBB-NEXT: # %bb.3:
@@ -2552,10 +2552,10 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_select_i128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a7, 4(a2)
-; RV32I-NEXT: lw a3, 4(a1)
; RV32I-NEXT: lw a6, 8(a2)
; RV32I-NEXT: lw t0, 12(a2)
; RV32I-NEXT: lw a5, 12(a1)
+; RV32I-NEXT: lw a3, 4(a1)
; RV32I-NEXT: lw a4, 8(a1)
; RV32I-NEXT: beq a5, t0, .LBB38_2
; RV32I-NEXT: # %bb.1:
@@ -2647,12 +2647,12 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-LABEL: abd_select_i128:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a3, 0(a1)
-; RV32ZBB-NEXT: lw a5, 0(a2)
; RV32ZBB-NEXT: lw a4, 4(a1)
; RV32ZBB-NEXT: lw a6, 8(a1)
-; RV32ZBB-NEXT: lw a7, 8(a2)
; RV32ZBB-NEXT: lw t0, 12(a1)
+; RV32ZBB-NEXT: lw a7, 8(a2)
; RV32ZBB-NEXT: lw t1, 12(a2)
+; RV32ZBB-NEXT: lw a5, 0(a2)
; RV32ZBB-NEXT: lw a1, 4(a2)
; RV32ZBB-NEXT: sltu a2, a7, a6
; RV32ZBB-NEXT: mv t4, a2
diff --git a/llvm/test/CodeGen/RISCV/abdu-neg.ll b/llvm/test/CodeGen/RISCV/abdu-neg.ll
index 87a06fc4403eb9..54075f41694392 100644
--- a/llvm/test/CodeGen/RISCV/abdu-neg.ll
+++ b/llvm/test/CodeGen/RISCV/abdu-neg.ll
@@ -624,83 +624,83 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_ext_i128:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a5, 0(a2)
-; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw t2, 12(a2)
+; RV32I-NEXT: lw a4, 0(a2)
+; RV32I-NEXT: lw a6, 4(a2)
; RV32I-NEXT: lw t1, 8(a2)
-; RV32I-NEXT: lw a4, 8(a1)
-; RV32I-NEXT: lw a6, 12(a1)
-; RV32I-NEXT: lw a7, 4(a2)
+; RV32I-NEXT: lw a2, 12(a2)
+; RV32I-NEXT: lw a3, 8(a1)
+; RV32I-NEXT: lw a5, 12(a1)
+; RV32I-NEXT: lw a7, 0(a1)
; RV32I-NEXT: lw t0, 4(a1)
-; RV32I-NEXT: sltu a1, a4, t1
-; RV32I-NEXT: sub a2, a6, t2
-; RV32I-NEXT: sltu t2, a3, a5
+; RV32I-NEXT: sltu a1, a3, t1
+; RV32I-NEXT: sub a2, a5, a2
+; RV32I-NEXT: sltu t2, a7, a4
; RV32I-NEXT: sub a1, a2, a1
; RV32I-NEXT: mv a2, t2
-; RV32I-NEXT: beq t0, a7, .LBB11_2
+; RV32I-NEXT: beq t0, a6, .LBB11_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu a2, t0, a7
+; RV32I-NEXT: sltu a2, t0, a6
; RV32I-NEXT: .LBB11_2:
-; RV32I-NEXT: sub t1, a4, t1
+; RV32I-NEXT: sub t1, a3, t1
; RV32I-NEXT: sltu t3, t1, a2
; RV32I-NEXT: sub a1, a1, t3
; RV32I-NEXT: sub a2, t1, a2
-; RV32I-NEXT: beq a1, a6, .LBB11_4
+; RV32I-NEXT: beq a1, a5, .LBB11_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: sltu t1, a6, a1
+; RV32I-NEXT: sltu t1, a5, a1
; RV32I-NEXT: j .LBB11_5
; RV32I-NEXT: .LBB11_4:
-; RV32I-NEXT: sltu t1, a4, a2
+; RV32I-NEXT: sltu t1, a3, a2
; RV32I-NEXT: .LBB11_5:
-; RV32I-NEXT: sub a7, t0, a7
-; RV32I-NEXT: sub a7, a7, t2
-; RV32I-NEXT: sub a5, a3, a5
-; RV32I-NEXT: beq a7, t0, .LBB11_7
+; RV32I-NEXT: sub a6, t0, a6
+; RV32I-NEXT: sub a6, a6, t2
+; RV32I-NEXT: sub t2, a7, a4
+; RV32I-NEXT: beq a6, t0, .LBB11_7
; RV32I-NEXT: # %bb.6:
-; RV32I-NEXT: sltu a3, t0, a7
+; RV32I-NEXT: sltu a4, t0, a6
; RV32I-NEXT: j .LBB11_8
; RV32I-NEXT: .LBB11_7:
-; RV32I-NEXT: sltu a3, a3, a5
+; RV32I-NEXT: sltu a4, a7, t2
; RV32I-NEXT: .LBB11_8:
-; RV32I-NEXT: xor a6, a1, a6
-; RV32I-NEXT: xor a4, a2, a4
-; RV32I-NEXT: or a4, a4, a6
-; RV32I-NEXT: beqz a4, .LBB11_10
+; RV32I-NEXT: xor a5, a1, a5
+; RV32I-NEXT: xor a3, a2, a3
+; RV32I-NEXT: or a3, a3, a5
+; RV32I-NEXT: beqz a3, .LBB11_10
; RV32I-NEXT: # %bb.9:
-; RV32I-NEXT: mv a3, t1
+; RV32I-NEXT: mv a4, t1
; RV32I-NEXT: .LBB11_10:
-; RV32I-NEXT: neg t0, a3
-; RV32I-NEXT: xor a5, a5, t0
+; RV32I-NEXT: neg t0, a4
+; RV32I-NEXT: xor a5, t2, t0
; RV32I-NEXT: sltu t2, a5, t0
-; RV32I-NEXT: xor t3, a7, t0
-; RV32I-NEXT: add a4, t3, a3
-; RV32I-NEXT: sub a4, a4, t2
-; RV32I-NEXT: snez t1, a4
-; RV32I-NEXT: add a5, a5, a3
-; RV32I-NEXT: snez a6, a5
-; RV32I-NEXT: or t1, a6, t1
-; RV32I-NEXT: beqz a7, .LBB11_12
+; RV32I-NEXT: xor t3, a6, t0
+; RV32I-NEXT: add a3, t3, a4
+; RV32I-NEXT: sub a3, a3, t2
+; RV32I-NEXT: snez t1, a3
+; RV32I-NEXT: add a5, a5, a4
+; RV32I-NEXT: snez a7, a5
+; RV32I-NEXT: or t1, a7, t1
+; RV32I-NEXT: beqz a6, .LBB11_12
; RV32I-NEXT: # %bb.11:
; RV32I-NEXT: sltu t2, t3, t0
; RV32I-NEXT: .LBB11_12:
; RV32I-NEXT: xor a2, a2, t0
-; RV32I-NEXT: add a7, a2, a3
-; RV32I-NEXT: sub t3, a7, t2
+; RV32I-NEXT: add a6, a2, a4
+; RV32I-NEXT: sub t3, a6, t2
; RV32I-NEXT: neg t4, t3
; RV32I-NEXT: sltu t5, t4, t1
; RV32I-NEXT: sltu a2, a2, t0
; RV32I-NEXT: xor a1, a1, t0
-; RV32I-NEXT: add a1, a1, a3
+; RV32I-NEXT: add a1, a1, a4
; RV32I-NEXT: sub a1, a1, a2
-; RV32I-NEXT: sltu a2, a7, t2
+; RV32I-NEXT: sltu a2, a6, t2
; RV32I-NEXT: sub a1, a1, a2
; RV32I-NEXT: snez a2, t3
; RV32I-NEXT: add a1, a1, a2
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: sub a1, a1, t5
; RV32I-NEXT: sub a2, t4, t1
-; RV32I-NEXT: add a4, a4, a6
-; RV32I-NEXT: neg a3, a4
+; RV32I-NEXT: add a3, a3, a7
+; RV32I-NEXT: neg a3, a3
; RV32I-NEXT: neg a4, a5
; RV32I-NEXT: sw a4, 0(a0)
; RV32I-NEXT: sw a3, 4(a0)
@@ -736,83 +736,83 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
;
; RV32ZBB-LABEL: abd_ext_i128:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a5, 0(a2)
-; RV32ZBB-NEXT: lw a3, 0(a1)
-; RV32ZBB-NEXT: lw t2, 12(a2)
+; RV32ZBB-NEXT: lw a4, 0(a2)
+; RV32ZBB-NEXT: lw a6, 4(a2)
; RV32ZBB-NEXT: lw t1, 8(a2)
-; RV32ZBB-NEXT: lw a4, 8(a1)
-; RV32ZBB-NEXT: lw a6, 12(a1)
-; RV32ZBB-NEXT: lw a7, 4(a2)
+; RV32ZBB-NEXT: lw a2, 12(a2)
+; RV32ZBB-NEXT: lw a3, 8(a1)
+; RV32ZBB-NEXT: lw a5, 12(a1)
+; RV32ZBB-NEXT: lw a7, 0(a1)
; RV32ZBB-NEXT: lw t0, 4(a1)
-; RV32ZBB-NEXT: sltu a1, a4, t1
-; RV32ZBB-NEXT: sub a2, a6, t2
-; RV32ZBB-NEXT: sltu t2, a3, a5
+; RV32ZBB-NEXT: sltu a1, a3, t1
+; RV32ZBB-NEXT: sub a2, a5, a2
+; RV32ZBB-NEXT: sltu t2, a7, a4
; RV32ZBB-NEXT: sub a1, a2, a1
; RV32ZBB-NEXT: mv a2, t2
-; RV32ZBB-NEXT: beq t0, a7, .LBB11_2
+; RV32ZBB-NEXT: beq t0, a6, .LBB11_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu a2, t0, a7
+; RV32ZBB-NEXT: sltu a2, t0, a6
; RV32ZBB-NEXT: .LBB11_2:
-; RV32ZBB-NEXT: sub t1, a4, t1
+; RV32ZBB-NEXT: sub t1, a3, t1
; RV32ZBB-NEXT: sltu t3, t1, a2
; RV32ZBB-NEXT: sub a1, a1, t3
; RV32ZBB-NEXT: sub a2, t1, a2
-; RV32ZBB-NEXT: beq a1, a6, .LBB11_4
+; RV32ZBB-NEXT: beq a1, a5, .LBB11_4
; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: sltu t1, a6, a1
+; RV32ZBB-NEXT: sltu t1, a5, a1
; RV32ZBB-NEXT: j .LBB11_5
; RV32ZBB-NEXT: .LBB11_4:
-; RV32ZBB-NEXT: sltu t1, a4, a2
+; RV32ZBB-NEXT: sltu t1, a3, a2
; RV32ZBB-NEXT: .LBB11_5:
-; RV32ZBB-NEXT: sub a7, t0, a7
-; RV32ZBB-NEXT: sub a7, a7, t2
-; RV32ZBB-NEXT: sub a5, a3, a5
-; RV32ZBB-NEXT: beq a7, t0, .LBB11_7
+; RV32ZBB-NEXT: sub a6, t0, a6
+; RV32ZBB-NEXT: sub a6, a6, t2
+; RV32ZBB-NEXT: sub t2, a7, a4
+; RV32ZBB-NEXT: beq a6, t0, .LBB11_7
; RV32ZBB-NEXT: # %bb.6:
-; RV32ZBB-NEXT: sltu a3, t0, a7
+; RV32ZBB-NEXT: sltu a4, t0, a6
; RV32ZBB-NEXT: j .LBB11_8
; RV32ZBB-NEXT: .LBB11_7:
-; RV32ZBB-NEXT: sltu a3, a3, a5
+; RV32ZBB-NEXT: sltu a4, a7, t2
; RV32ZBB-NEXT: .LBB11_8:
-; RV32ZBB-NEXT: xor a6, a1, a6
-; RV32ZBB-NEXT: xor a4, a2, a4
-; RV32ZBB-NEXT: or a4, a4, a6
-; RV32ZBB-NEXT: beqz a4, .LBB11_10
+; RV32ZBB-NEXT: xor a5, a1, a5
+; RV32ZBB-NEXT: xor a3, a2, a3
+; RV32ZBB-NEXT: or a3, a3, a5
+; RV32ZBB-NEXT: beqz a3, .LBB11_10
; RV32ZBB-NEXT: # %bb.9:
-; RV32ZBB-NEXT: mv a3, t1
+; RV32ZBB-NEXT: mv a4, t1
; RV32ZBB-NEXT: .LBB11_10:
-; RV32ZBB-NEXT: neg t0, a3
-; RV32ZBB-NEXT: xor a5, a5, t0
+; RV32ZBB-NEXT: neg t0, a4
+; RV32ZBB-NEXT: xor a5, t2, t0
; RV32ZBB-NEXT: sltu t2, a5, t0
-; RV32ZBB-NEXT: xor t3, a7, t0
-; RV32ZBB-NEXT: add a4, t3, a3
-; RV32ZBB-NEXT: sub a4, a4, t2
-; RV32ZBB-NEXT: snez t1, a4
-; RV32ZBB-NEXT: add a5, a5, a3
-; RV32ZBB-NEXT: snez a6, a5
-; RV32ZBB-NEXT: or t1, a6, t1
-; RV32ZBB-NEXT: beqz a7, .LBB11_12
+; RV32ZBB-NEXT: xor t3, a6, t0
+; RV32ZBB-NEXT: add a3, t3, a4
+; RV32ZBB-NEXT: sub a3, a3, t2
+; RV32ZBB-NEXT: snez t1, a3
+; RV32ZBB-NEXT: add a5, a5, a4
+; RV32ZBB-NEXT: snez a7, a5
+; RV32ZBB-NEXT: or t1, a7, t1
+; RV32ZBB-NEXT: beqz a6, .LBB11_12
; RV32ZBB-NEXT: # %bb.11:
; RV32ZBB-NEXT: sltu t2, t3, t0
; RV32ZBB-NEXT: .LBB11_12:
; RV32ZBB-NEXT: xor a2, a2, t0
-; RV32ZBB-NEXT: add a7, a2, a3
-; RV32ZBB-NEXT: sub t3, a7, t2
+; RV32ZBB-NEXT: add a6, a2, a4
+; RV32ZBB-NEXT: sub t3, a6, t2
; RV32ZBB-NEXT: neg t4, t3
; RV32ZBB-NEXT: sltu t5, t4, t1
; RV32ZBB-NEXT: sltu a2, a2, t0
; RV32ZBB-NEXT: xor a1, a1, t0
-; RV32ZBB-NEXT: add a1, a1, a3
+; RV32ZBB-NEXT: add a1, a1, a4
; RV32ZBB-NEXT: sub a1, a1, a2
-; RV32ZBB-NEXT: sltu a2, a7, t2
+; RV32ZBB-NEXT: sltu a2, a6, t2
; RV32ZBB-NEXT: sub a1, a1, a2
; RV32ZBB-NEXT: snez a2, t3
; RV32ZBB-NEXT: add a1, a1, a2
; RV32ZBB-NEXT: neg a1, a1
; RV32ZBB-NEXT: sub a1, a1, t5
; RV32ZBB-NEXT: sub a2, t4, t1
-; RV32ZBB-NEXT: add a4, a4, a6
-; RV32ZBB-NEXT: neg a3, a4
+; RV32ZBB-NEXT: add a3, a3, a7
+; RV32ZBB-NEXT: neg a3, a3
; RV32ZBB-NEXT: neg a4, a5
; RV32ZBB-NEXT: sw a4, 0(a0)
; RV32ZBB-NEXT: sw a3, 4(a0)
@@ -857,83 +857,83 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_ext_i128_undef:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a5, 0(a2)
-; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw t2, 12(a2)
+; RV32I-NEXT: lw a4, 0(a2)
+; RV32I-NEXT: lw a6, 4(a2)
; RV32I-NEXT: lw t1, 8(a2)
-; RV32I-NEXT: lw a4, 8(a1)
-; RV32I-NEXT: lw a6, 12(a1)
-; RV32I-NEXT: lw a7, 4(a2)
+; RV32I-NEXT: lw a2, 12(a2)
+; RV32I-NEXT: lw a3, 8(a1)
+; RV32I-NEXT: lw a5, 12(a1)
+; RV32I-NEXT: lw a7, 0(a1)
; RV32I-NEXT: lw t0, 4(a1)
-; RV32I-NEXT: sltu a1, a4, t1
-; RV32I-NEXT: sub a2, a6, t2
-; RV32I-NEXT: sltu t2, a3, a5
+; RV32I-NEXT: sltu a1, a3, t1
+; RV32I-NEXT: sub a2, a5, a2
+; RV32I-NEXT: sltu t2, a7, a4
; RV32I-NEXT: sub a1, a2, a1
; RV32I-NEXT: mv a2, t2
-; RV32I-NEXT: beq t0, a7, .LBB12_2
+; RV32I-NEXT: beq t0, a6, .LBB12_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu a2, t0, a7
+; RV32I-NEXT: sltu a2, t0, a6
; RV32I-NEXT: .LBB12_2:
-; RV32I-NEXT: sub t1, a4, t1
+; RV32I-NEXT: sub t1, a3, t1
; RV32I-NEXT: sltu t3, t1, a2
; RV32I-NEXT: sub a1, a1, t3
; RV32I-NEXT: sub a2, t1, a2
-; RV32I-NEXT: beq a1, a6, .LBB12_4
+; RV32I-NEXT: beq a1, a5, .LBB12_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: sltu t1, a6, a1
+; RV32I-NEXT: sltu t1, a5, a1
; RV32I-NEXT: j .LBB12_5
; RV32I-NEXT: .LBB12_4:
-; RV32I-NEXT: sltu t1, a4, a2
+; RV32I-NEXT: sltu t1, a3, a2
; RV32I-NEXT: .LBB12_5:
-; RV32I-NEXT: sub a7, t0, a7
-; RV32I-NEXT: sub a7, a7, t2
-; RV32I-NEXT: sub a5, a3, a5
-; RV32I-NEXT: beq a7, t0, .LBB12_7
+; RV32I-NEXT: sub a6, t0, a6
+; RV32I-NEXT: sub a6, a6, t2
+; RV32I-NEXT: sub t2, a7, a4
+; RV32I-NEXT: beq a6, t0, .LBB12_7
; RV32I-NEXT: # %bb.6:
-; RV32I-NEXT: sltu a3, t0, a7
+; RV32I-NEXT: sltu a4, t0, a6
; RV32I-NEXT: j .LBB12_8
; RV32I-NEXT: .LBB12_7:
-; RV32I-NEXT: sltu a3, a3, a5
+; RV32I-NEXT: sltu a4, a7, t2
; RV32I-NEXT: .LBB12_8:
-; RV32I-NEXT: xor a6, a1, a6
-; RV32I-NEXT: xor a4, a2, a4
-; RV32I-NEXT: or a4, a4, a6
-; RV32I-NEXT: beqz a4, .LBB12_10
+; RV32I-NEXT: xor a5, a1, a5
+; RV32I-NEXT: xor a3, a2, a3
+; RV32I-NEXT: or a3, a3, a5
+; RV32I-NEXT: beqz a3, .LBB12_10
; RV32I-NEXT: # %bb.9:
-; RV32I-NEXT: mv a3, t1
+; RV32I-NEXT: mv a4, t1
; RV32I-NEXT: .LBB12_10:
-; RV32I-NEXT: neg t0, a3
-; RV32I-NEXT: xor a5, a5, t0
+; RV32I-NEXT: neg t0, a4
+; RV32I-NEXT: xor a5, t2, t0
; RV32I-NEXT: sltu t2, a5, t0
-; RV32I-NEXT: xor t3, a7, t0
-; RV32I-NEXT: add a4, t3, a3
-; RV32I-NEXT: sub a4, a4, t2
-; RV32I-NEXT: snez t1, a4
-; RV32I-NEXT: add a5, a5, a3
-; RV32I-NEXT: snez a6, a5
-; RV32I-NEXT: or t1, a6, t1
-; RV32I-NEXT: beqz a7, .LBB12_12
+; RV32I-NEXT: xor t3, a6, t0
+; RV32I-NEXT: add a3, t3, a4
+; RV32I-NEXT: sub a3, a3, t2
+; RV32I-NEXT: snez t1, a3
+; RV32I-NEXT: add a5, a5, a4
+; RV32I-NEXT: snez a7, a5
+; RV32I-NEXT: or t1, a7, t1
+; RV32I-NEXT: beqz a6, .LBB12_12
; RV32I-NEXT: # %bb.11:
; RV32I-NEXT: sltu t2, t3, t0
; RV32I-NEXT: .LBB12_12:
; RV32I-NEXT: xor a2, a2, t0
-; RV32I-NEXT: add a7, a2, a3
-; RV32I-NEXT: sub t3, a7, t2
+; RV32I-NEXT: add a6, a2, a4
+; RV32I-NEXT: sub t3, a6, t2
; RV32I-NEXT: neg t4, t3
; RV32I-NEXT: sltu t5, t4, t1
; RV32I-NEXT: sltu a2, a2, t0
; RV32I-NEXT: xor a1, a1, t0
-; RV32I-NEXT: add a1, a1, a3
+; RV32I-NEXT: add a1, a1, a4
; RV32I-NEXT: sub a1, a1, a2
-; RV32I-NEXT: sltu a2, a7, t2
+; RV32I-NEXT: sltu a2, a6, t2
; RV32I-NEXT: sub a1, a1, a2
; RV32I-NEXT: snez a2, t3
; RV32I-NEXT: add a1, a1, a2
; RV32I-NEXT: neg a1, a1
; RV32I-NEXT: sub a1, a1, t5
; RV32I-NEXT: sub a2, t4, t1
-; RV32I-NEXT: add a4, a4, a6
-; RV32I-NEXT: neg a3, a4
+; RV32I-NEXT: add a3, a3, a7
+; RV32I-NEXT: neg a3, a3
; RV32I-NEXT: neg a4, a5
; RV32I-NEXT: sw a4, 0(a0)
; RV32I-NEXT: sw a3, 4(a0)
@@ -969,83 +969,83 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
;
; RV32ZBB-LABEL: abd_ext_i128_undef:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a5, 0(a2)
-; RV32ZBB-NEXT: lw a3, 0(a1)
-; RV32ZBB-NEXT: lw t2, 12(a2)
+; RV32ZBB-NEXT: lw a4, 0(a2)
+; RV32ZBB-NEXT: lw a6, 4(a2)
; RV32ZBB-NEXT: lw t1, 8(a2)
-; RV32ZBB-NEXT: lw a4, 8(a1)
-; RV32ZBB-NEXT: lw a6, 12(a1)
-; RV32ZBB-NEXT: lw a7, 4(a2)
+; RV32ZBB-NEXT: lw a2, 12(a2)
+; RV32ZBB-NEXT: lw a3, 8(a1)
+; RV32ZBB-NEXT: lw a5, 12(a1)
+; RV32ZBB-NEXT: lw a7, 0(a1)
; RV32ZBB-NEXT: lw t0, 4(a1)
-; RV32ZBB-NEXT: sltu a1, a4, t1
-; RV32ZBB-NEXT: sub a2, a6, t2
-; RV32ZBB-NEXT: sltu t2, a3, a5
+; RV32ZBB-NEXT: sltu a1, a3, t1
+; RV32ZBB-NEXT: sub a2, a5, a2
+; RV32ZBB-NEXT: sltu t2, a7, a4
; RV32ZBB-NEXT: sub a1, a2, a1
; RV32ZBB-NEXT: mv a2, t2
-; RV32ZBB-NEXT: beq t0, a7, .LBB12_2
+; RV32ZBB-NEXT: beq t0, a6, .LBB12_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu a2, t0, a7
+; RV32ZBB-NEXT: sltu a2, t0, a6
; RV32ZBB-NEXT: .LBB12_2:
-; RV32ZBB-NEXT: sub t1, a4, t1
+; RV32ZBB-NEXT: sub t1, a3, t1
; RV32ZBB-NEXT: sltu t3, t1, a2
; RV32ZBB-NEXT: sub a1, a1, t3
; RV32ZBB-NEXT: sub a2, t1, a2
-; RV32ZBB-NEXT: beq a1, a6, .LBB12_4
+; RV32ZBB-NEXT: beq a1, a5, .LBB12_4
; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: sltu t1, a6, a1
+; RV32ZBB-NEXT: sltu t1, a5, a1
; RV32ZBB-NEXT: j .LBB12_5
; RV32ZBB-NEXT: .LBB12_4:
-; RV32ZBB-NEXT: sltu t1, a4, a2
+; RV32ZBB-NEXT: sltu t1, a3, a2
; RV32ZBB-NEXT: .LBB12_5:
-; RV32ZBB-NEXT: sub a7, t0, a7
-; RV32ZBB-NEXT: sub a7, a7, t2
-; RV32ZBB-NEXT: sub a5, a3, a5
-; RV32ZBB-NEXT: beq a7, t0, .LBB12_7
+; RV32ZBB-NEXT: sub a6, t0, a6
+; RV32ZBB-NEXT: sub a6, a6, t2
+; RV32ZBB-NEXT: sub t2, a7, a4
+; RV32ZBB-NEXT: beq a6, t0, .LBB12_7
; RV32ZBB-NEXT: # %bb.6:
-; RV32ZBB-NEXT: sltu a3, t0, a7
+; RV32ZBB-NEXT: sltu a4, t0, a6
; RV32ZBB-NEXT: j .LBB12_8
; RV32ZBB-NEXT: .LBB12_7:
-; RV32ZBB-NEXT: sltu a3, a3, a5
+; RV32ZBB-NEXT: sltu a4, a7, t2
; RV32ZBB-NEXT: .LBB12_8:
-; RV32ZBB-NEXT: xor a6, a1, a6
-; RV32ZBB-NEXT: xor a4, a2, a4
-; RV32ZBB-NEXT: or a4, a4, a6
-; RV32ZBB-NEXT: beqz a4, .LBB12_10
+; RV32ZBB-NEXT: xor a5, a1, a5
+; RV32ZBB-NEXT: xor a3, a2, a3
+; RV32ZBB-NEXT: or a3, a3, a5
+; RV32ZBB-NEXT: beqz a3, .LBB12_10
; RV32ZBB-NEXT: # %bb.9:
-; RV32ZBB-NEXT: mv a3, t1
+; RV32ZBB-NEXT: mv a4, t1
; RV32ZBB-NEXT: .LBB12_10:
-; RV32ZBB-NEXT: neg t0, a3
-; RV32ZBB-NEXT: xor a5, a5, t0
+; RV32ZBB-NEXT: neg t0, a4
+; RV32ZBB-NEXT: xor a5, t2, t0
; RV32ZBB-NEXT: sltu t2, a5, t0
-; RV32ZBB-NEXT: xor t3, a7, t0
-; RV32ZBB-NEXT: add a4, t3, a3
-; RV32ZBB-NEXT: sub a4, a4, t2
-; RV32ZBB-NEXT: snez t1, a4
-; RV32ZBB-NEXT: add a5, a5, a3
-; RV32ZBB-NEXT: snez a6, a5
-; RV32ZBB-NEXT: or t1, a6, t1
-; RV32ZBB-NEXT: beqz a7, .LBB12_12
+; RV32ZBB-NEXT: xor t3, a6, t0
+; RV32ZBB-NEXT: add a3, t3, a4
+; RV32ZBB-NEXT: sub a3, a3, t2
+; RV32ZBB-NEXT: snez t1, a3
+; RV32ZBB-NEXT: add a5, a5, a4
+; RV32ZBB-NEXT: snez a7, a5
+; RV32ZBB-NEXT: or t1, a7, t1
+; RV32ZBB-NEXT: beqz a6, .LBB12_12
; RV32ZBB-NEXT: # %bb.11:
; RV32ZBB-NEXT: sltu t2, t3, t0
; RV32ZBB-NEXT: .LBB12_12:
; RV32ZBB-NEXT: xor a2, a2, t0
-; RV32ZBB-NEXT: add a7, a2, a3
-; RV32ZBB-NEXT: sub t3, a7, t2
+; RV32ZBB-NEXT: add a6, a2, a4
+; RV32ZBB-NEXT: sub t3, a6, t2
; RV32ZBB-NEXT: neg t4, t3
; RV32ZBB-NEXT: sltu t5, t4, t1
; RV32ZBB-NEXT: sltu a2, a2, t0
; RV32ZBB-NEXT: xor a1, a1, t0
-; RV32ZBB-NEXT: add a1, a1, a3
+; RV32ZBB-NEXT: add a1, a1, a4
; RV32ZBB-NEXT: sub a1, a1, a2
-; RV32ZBB-NEXT: sltu a2, a7, t2
+; RV32ZBB-NEXT: sltu a2, a6, t2
; RV32ZBB-NEXT: sub a1, a1, a2
; RV32ZBB-NEXT: snez a2, t3
; RV32ZBB-NEXT: add a1, a1, a2
; RV32ZBB-NEXT: neg a1, a1
; RV32ZBB-NEXT: sub a1, a1, t5
; RV32ZBB-NEXT: sub a2, t4, t1
-; RV32ZBB-NEXT: add a4, a4, a6
-; RV32ZBB-NEXT: neg a3, a4
+; RV32ZBB-NEXT: add a3, a3, a7
+; RV32ZBB-NEXT: neg a3, a3
; RV32ZBB-NEXT: neg a4, a5
; RV32ZBB-NEXT: sw a4, 0(a0)
; RV32ZBB-NEXT: sw a3, 4(a0)
@@ -1336,10 +1336,10 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_minmax_i128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a6, 4(a2)
-; RV32I-NEXT: lw a3, 4(a1)
; RV32I-NEXT: lw a7, 8(a2)
; RV32I-NEXT: lw t0, 12(a2)
; RV32I-NEXT: lw a5, 12(a1)
+; RV32I-NEXT: lw a3, 4(a1)
; RV32I-NEXT: lw a4, 8(a1)
; RV32I-NEXT: beq a5, t0, .LBB17_2
; RV32I-NEXT: # %bb.1:
@@ -1463,10 +1463,10 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-LABEL: abd_minmax_i128:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a6, 4(a2)
-; RV32ZBB-NEXT: lw a3, 4(a1)
; RV32ZBB-NEXT: lw a7, 8(a2)
; RV32ZBB-NEXT: lw t0, 12(a2)
; RV32ZBB-NEXT: lw a5, 12(a1)
+; RV32ZBB-NEXT: lw a3, 4(a1)
; RV32ZBB-NEXT: lw a4, 8(a1)
; RV32ZBB-NEXT: beq a5, t0, .LBB17_2
; RV32ZBB-NEXT: # %bb.1:
@@ -1798,67 +1798,67 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_cmp_i128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a3, 0(a2)
-; RV32I-NEXT: lw a4, 0(a1)
-; RV32I-NEXT: lw a5, 4(a2)
-; RV32I-NEXT: lw a6, 8(a2)
-; RV32I-NEXT: lw a7, 8(a1)
-; RV32I-NEXT: lw a2, 12(a2)
+; RV32I-NEXT: lw a4, 4(a2)
+; RV32I-NEXT: lw a5, 8(a2)
+; RV32I-NEXT: lw a7, 12(a2)
+; RV32I-NEXT: lw a6, 8(a1)
; RV32I-NEXT: lw t0, 12(a1)
+; RV32I-NEXT: lw a2, 0(a1)
; RV32I-NEXT: lw a1, 4(a1)
-; RV32I-NEXT: sltu t1, a7, a6
+; RV32I-NEXT: sltu t1, a6, a5
; RV32I-NEXT: mv t4, t1
-; RV32I-NEXT: beq t0, a2, .LBB22_2
+; RV32I-NEXT: beq t0, a7, .LBB22_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu t4, t0, a2
+; RV32I-NEXT: sltu t4, t0, a7
; RV32I-NEXT: .LBB22_2:
-; RV32I-NEXT: sltu t2, a4, a3
+; RV32I-NEXT: sltu t2, a2, a3
; RV32I-NEXT: mv t3, t2
-; RV32I-NEXT: beq a1, a5, .LBB22_4
+; RV32I-NEXT: beq a1, a4, .LBB22_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: sltu t3, a1, a5
+; RV32I-NEXT: sltu t3, a1, a4
; RV32I-NEXT: .LBB22_4:
-; RV32I-NEXT: xor t5, t0, a2
-; RV32I-NEXT: xor t6, a7, a6
+; RV32I-NEXT: xor t5, t0, a7
+; RV32I-NEXT: xor t6, a6, a5
; RV32I-NEXT: or t5, t6, t5
; RV32I-NEXT: mv t6, t3
; RV32I-NEXT: beqz t5, .LBB22_6
; RV32I-NEXT: # %bb.5:
; RV32I-NEXT: mv t6, t4
; RV32I-NEXT: .LBB22_6:
-; RV32I-NEXT: sltu t4, a3, a4
+; RV32I-NEXT: sltu t4, a3, a2
; RV32I-NEXT: mv t5, t4
-; RV32I-NEXT: beq a1, a5, .LBB22_8
+; RV32I-NEXT: beq a1, a4, .LBB22_8
; RV32I-NEXT: # %bb.7:
-; RV32I-NEXT: sltu t5, a5, a1
+; RV32I-NEXT: sltu t5, a4, a1
; RV32I-NEXT: .LBB22_8:
; RV32I-NEXT: bnez t6, .LBB22_10
; RV32I-NEXT: # %bb.9:
-; RV32I-NEXT: sltu t1, a6, a7
-; RV32I-NEXT: sub a2, a2, t0
-; RV32I-NEXT: sub a2, a2, t1
-; RV32I-NEXT: sub a6, a6, a7
-; RV32I-NEXT: sltu a7, a6, t5
-; RV32I-NEXT: sub a2, a2, a7
+; RV32I-NEXT: sltu t1, a5, a6
+; RV32I-NEXT: sub a7, a7, t0
+; RV32I-NEXT: sub a7, a7, t1
+; RV32I-NEXT: sub a6, a5, a6
+; RV32I-NEXT: sltu a5, a6, t5
+; RV32I-NEXT: sub a5, a7, a5
; RV32I-NEXT: sub a6, a6, t5
-; RV32I-NEXT: sub a5, a5, a1
-; RV32I-NEXT: sub a1, a5, t4
-; RV32I-NEXT: sub a3, a3, a4
+; RV32I-NEXT: sub a4, a4, a1
+; RV32I-NEXT: sub a1, a4, t4
+; RV32I-NEXT: sub a2, a3, a2
; RV32I-NEXT: j .LBB22_11
; RV32I-NEXT: .LBB22_10:
-; RV32I-NEXT: sub a2, t0, a2
-; RV32I-NEXT: sub a6, a7, a6
-; RV32I-NEXT: sub a2, a2, t1
+; RV32I-NEXT: sub a7, t0, a7
+; RV32I-NEXT: sub a6, a6, a5
+; RV32I-NEXT: sub a5, a7, t1
; RV32I-NEXT: sltu a7, a6, t3
-; RV32I-NEXT: sub a1, a1, a5
-; RV32I-NEXT: sub a2, a2, a7
+; RV32I-NEXT: sub a1, a1, a4
+; RV32I-NEXT: sub a5, a5, a7
; RV32I-NEXT: sub a6, a6, t3
; RV32I-NEXT: sub a1, a1, t2
-; RV32I-NEXT: sub a3, a4, a3
+; RV32I-NEXT: sub a2, a2, a3
; RV32I-NEXT: .LBB22_11:
; RV32I-NEXT: sw a6, 8(a0)
; RV32I-NEXT: sw a1, 4(a0)
-; RV32I-NEXT: sw a3, 0(a0)
-; RV32I-NEXT: sw a2, 12(a0)
+; RV32I-NEXT: sw a2, 0(a0)
+; RV32I-NEXT: sw a5, 12(a0)
; RV32I-NEXT: ret
;
; RV64I-LABEL: abd_cmp_i128:
@@ -1885,67 +1885,67 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32ZBB-LABEL: abd_cmp_i128:
; RV32ZBB: # %bb.0:
; RV32ZBB-NEXT: lw a3, 0(a2)
-; RV32ZBB-NEXT: lw a4, 0(a1)
-; RV32ZBB-NEXT: lw a5, 4(a2)
-; RV32ZBB-NEXT: lw a6, 8(a2)
-; RV32ZBB-NEXT: lw a7, 8(a1)
-; RV32ZBB-NEXT: lw a2, 12(a2)
+; RV32ZBB-NEXT: lw a4, 4(a2)
+; RV32ZBB-NEXT: lw a5, 8(a2)
+; RV32ZBB-NEXT: lw a7, 12(a2)
+; RV32ZBB-NEXT: lw a6, 8(a1)
; RV32ZBB-NEXT: lw t0, 12(a1)
+; RV32ZBB-NEXT: lw a2, 0(a1)
; RV32ZBB-NEXT: lw a1, 4(a1)
-; RV32ZBB-NEXT: sltu t1, a7, a6
+; RV32ZBB-NEXT: sltu t1, a6, a5
; RV32ZBB-NEXT: mv t4, t1
-; RV32ZBB-NEXT: beq t0, a2, .LBB22_2
+; RV32ZBB-NEXT: beq t0, a7, .LBB22_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu t4, t0, a2
+; RV32ZBB-NEXT: sltu t4, t0, a7
; RV32ZBB-NEXT: .LBB22_2:
-; RV32ZBB-NEXT: sltu t2, a4, a3
+; RV32ZBB-NEXT: sltu t2, a2, a3
; RV32ZBB-NEXT: mv t3, t2
-; RV32ZBB-NEXT: beq a1, a5, .LBB22_4
+; RV32ZBB-NEXT: beq a1, a4, .LBB22_4
; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: sltu t3, a1, a5
+; RV32ZBB-NEXT: sltu t3, a1, a4
; RV32ZBB-NEXT: .LBB22_4:
-; RV32ZBB-NEXT: xor t5, t0, a2
-; RV32ZBB-NEXT: xor t6, a7, a6
+; RV32ZBB-NEXT: xor t5, t0, a7
+; RV32ZBB-NEXT: xor t6, a6, a5
; RV32ZBB-NEXT: or t5, t6, t5
; RV32ZBB-NEXT: mv t6, t3
; RV32ZBB-NEXT: beqz t5, .LBB22_6
; RV32ZBB-NEXT: # %bb.5:
; RV32ZBB-NEXT: mv t6, t4
; RV32ZBB-NEXT: .LBB22_6:
-; RV32ZBB-NEXT: sltu t4, a3, a4
+; RV32ZBB-NEXT: sltu t4, a3, a2
; RV32ZBB-NEXT: mv t5, t4
-; RV32ZBB-NEXT: beq a1, a5, .LBB22_8
+; RV32ZBB-NEXT: beq a1, a4, .LBB22_8
; RV32ZBB-NEXT: # %bb.7:
-; RV32ZBB-NEXT: sltu t5, a5, a1
+; RV32ZBB-NEXT: sltu t5, a4, a1
; RV32ZBB-NEXT: .LBB22_8:
; RV32ZBB-NEXT: bnez t6, .LBB22_10
; RV32ZBB-NEXT: # %bb.9:
-; RV32ZBB-NEXT: sltu t1, a6, a7
-; RV32ZBB-NEXT: sub a2, a2, t0
-; RV32ZBB-NEXT: sub a2, a2, t1
-; RV32ZBB-NEXT: sub a6, a6, a7
-; RV32ZBB-NEXT: sltu a7, a6, t5
-; RV32ZBB-NEXT: sub a2, a2, a7
+; RV32ZBB-NEXT: sltu t1, a5, a6
+; RV32ZBB-NEXT: sub a7, a7, t0
+; RV32ZBB-NEXT: sub a7, a7, t1
+; RV32ZBB-NEXT: sub a6, a5, a6
+; RV32ZBB-NEXT: sltu a5, a6, t5
+; RV32ZBB-NEXT: sub a5, a7, a5
; RV32ZBB-NEXT: sub a6, a6, t5
-; RV32ZBB-NEXT: sub a5, a5, a1
-; RV32ZBB-NEXT: sub a1, a5, t4
-; RV32ZBB-NEXT: sub a3, a3, a4
+; RV32ZBB-NEXT: sub a4, a4, a1
+; RV32ZBB-NEXT: sub a1, a4, t4
+; RV32ZBB-NEXT: sub a2, a3, a2
; RV32ZBB-NEXT: j .LBB22_11
; RV32ZBB-NEXT: .LBB22_10:
-; RV32ZBB-NEXT: sub a2, t0, a2
-; RV32ZBB-NEXT: sub a6, a7, a6
-; RV32ZBB-NEXT: sub a2, a2, t1
+; RV32ZBB-NEXT: sub a7, t0, a7
+; RV32ZBB-NEXT: sub a6, a6, a5
+; RV32ZBB-NEXT: sub a5, a7, t1
; RV32ZBB-NEXT: sltu a7, a6, t3
-; RV32ZBB-NEXT: sub a1, a1, a5
-; RV32ZBB-NEXT: sub a2, a2, a7
+; RV32ZBB-NEXT: sub a1, a1, a4
+; RV32ZBB-NEXT: sub a5, a5, a7
; RV32ZBB-NEXT: sub a6, a6, t3
; RV32ZBB-NEXT: sub a1, a1, t2
-; RV32ZBB-NEXT: sub a3, a4, a3
+; RV32ZBB-NEXT: sub a2, a2, a3
; RV32ZBB-NEXT: .LBB22_11:
; RV32ZBB-NEXT: sw a6, 8(a0)
; RV32ZBB-NEXT: sw a1, 4(a0)
-; RV32ZBB-NEXT: sw a3, 0(a0)
-; RV32ZBB-NEXT: sw a2, 12(a0)
+; RV32ZBB-NEXT: sw a2, 0(a0)
+; RV32ZBB-NEXT: sw a5, 12(a0)
; RV32ZBB-NEXT: ret
;
; RV64ZBB-LABEL: abd_cmp_i128:
diff --git a/llvm/test/CodeGen/RISCV/abdu.ll b/llvm/test/CodeGen/RISCV/abdu.ll
index a9f933243f679a..a04a800157dbb1 100644
--- a/llvm/test/CodeGen/RISCV/abdu.ll
+++ b/llvm/test/CodeGen/RISCV/abdu.ll
@@ -540,75 +540,75 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_ext_i128:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a5, 0(a2)
-; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw t1, 12(a2)
-; RV32I-NEXT: lw a7, 8(a2)
-; RV32I-NEXT: lw a4, 8(a1)
-; RV32I-NEXT: lw a6, 12(a1)
-; RV32I-NEXT: lw t0, 4(a2)
+; RV32I-NEXT: lw a3, 0(a2)
+; RV32I-NEXT: lw a5, 4(a2)
+; RV32I-NEXT: lw a6, 8(a2)
+; RV32I-NEXT: lw a7, 12(a2)
+; RV32I-NEXT: lw a2, 8(a1)
+; RV32I-NEXT: lw a4, 12(a1)
+; RV32I-NEXT: lw t0, 0(a1)
; RV32I-NEXT: lw a1, 4(a1)
-; RV32I-NEXT: sltu a2, a4, a7
-; RV32I-NEXT: sub t1, a6, t1
-; RV32I-NEXT: sltu t2, a3, a5
-; RV32I-NEXT: sub a2, t1, a2
+; RV32I-NEXT: sltu t1, a2, a6
+; RV32I-NEXT: sub a7, a4, a7
+; RV32I-NEXT: sltu t2, t0, a3
+; RV32I-NEXT: sub a7, a7, t1
; RV32I-NEXT: mv t1, t2
-; RV32I-NEXT: beq a1, t0, .LBB11_2
+; RV32I-NEXT: beq a1, a5, .LBB11_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu t1, a1, t0
+; RV32I-NEXT: sltu t1, a1, a5
; RV32I-NEXT: .LBB11_2:
-; RV32I-NEXT: sub a7, a4, a7
-; RV32I-NEXT: sltu t3, a7, t1
-; RV32I-NEXT: sub a2, a2, t3
-; RV32I-NEXT: sub a7, a7, t1
-; RV32I-NEXT: beq a2, a6, .LBB11_4
+; RV32I-NEXT: sub t3, a2, a6
+; RV32I-NEXT: sltu a6, t3, t1
+; RV32I-NEXT: sub a6, a7, a6
+; RV32I-NEXT: sub a7, t3, t1
+; RV32I-NEXT: beq a6, a4, .LBB11_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: sltu t1, a6, a2
+; RV32I-NEXT: sltu t1, a4, a6
; RV32I-NEXT: j .LBB11_5
; RV32I-NEXT: .LBB11_4:
-; RV32I-NEXT: sltu t1, a4, a7
+; RV32I-NEXT: sltu t1, a2, a7
; RV32I-NEXT: .LBB11_5:
-; RV32I-NEXT: sub t0, a1, t0
-; RV32I-NEXT: sub t0, t0, t2
-; RV32I-NEXT: sub a5, a3, a5
-; RV32I-NEXT: beq t0, a1, .LBB11_7
+; RV32I-NEXT: sub a5, a1, a5
+; RV32I-NEXT: sub a5, a5, t2
+; RV32I-NEXT: sub a3, t0, a3
+; RV32I-NEXT: beq a5, a1, .LBB11_7
; RV32I-NEXT: # %bb.6:
-; RV32I-NEXT: sltu a1, a1, t0
+; RV32I-NEXT: sltu a1, a1, a5
; RV32I-NEXT: j .LBB11_8
; RV32I-NEXT: .LBB11_7:
-; RV32I-NEXT: sltu a1, a3, a5
+; RV32I-NEXT: sltu a1, t0, a3
; RV32I-NEXT: .LBB11_8:
-; RV32I-NEXT: xor a3, a2, a6
-; RV32I-NEXT: xor a4, a7, a4
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: beqz a3, .LBB11_10
+; RV32I-NEXT: xor a4, a6, a4
+; RV32I-NEXT: xor a2, a7, a2
+; RV32I-NEXT: or a2, a2, a4
+; RV32I-NEXT: beqz a2, .LBB11_10
; RV32I-NEXT: # %bb.9:
; RV32I-NEXT: mv a1, t1
; RV32I-NEXT: .LBB11_10:
-; RV32I-NEXT: neg a6, a1
-; RV32I-NEXT: xor a3, a7, a6
-; RV32I-NEXT: sltu a4, a3, a6
-; RV32I-NEXT: xor a2, a2, a6
-; RV32I-NEXT: add a2, a2, a1
-; RV32I-NEXT: sub a4, a2, a4
-; RV32I-NEXT: xor a2, a5, a6
-; RV32I-NEXT: sltu a5, a2, a6
-; RV32I-NEXT: xor a7, t0, a6
-; RV32I-NEXT: mv t1, a5
-; RV32I-NEXT: beqz t0, .LBB11_12
+; RV32I-NEXT: neg t0, a1
+; RV32I-NEXT: xor a2, a7, t0
+; RV32I-NEXT: sltu a4, a2, t0
+; RV32I-NEXT: xor a6, a6, t0
+; RV32I-NEXT: add a6, a6, a1
+; RV32I-NEXT: sub a4, a6, a4
+; RV32I-NEXT: xor a3, a3, t0
+; RV32I-NEXT: sltu a6, a3, t0
+; RV32I-NEXT: xor a7, a5, t0
+; RV32I-NEXT: mv t1, a6
+; RV32I-NEXT: beqz a5, .LBB11_12
; RV32I-NEXT: # %bb.11:
-; RV32I-NEXT: sltu t1, a7, a6
+; RV32I-NEXT: sltu t1, a7, t0
; RV32I-NEXT: .LBB11_12:
-; RV32I-NEXT: add a3, a3, a1
-; RV32I-NEXT: sltu a6, a3, t1
-; RV32I-NEXT: sub a4, a4, a6
-; RV32I-NEXT: sub a3, a3, t1
+; RV32I-NEXT: add a2, a2, a1
+; RV32I-NEXT: sltu a5, a2, t1
+; RV32I-NEXT: sub a4, a4, a5
+; RV32I-NEXT: sub a2, a2, t1
; RV32I-NEXT: add a7, a7, a1
-; RV32I-NEXT: sub a5, a7, a5
-; RV32I-NEXT: add a1, a2, a1
+; RV32I-NEXT: sub a5, a7, a6
+; RV32I-NEXT: add a1, a3, a1
; RV32I-NEXT: sw a1, 0(a0)
; RV32I-NEXT: sw a5, 4(a0)
-; RV32I-NEXT: sw a3, 8(a0)
+; RV32I-NEXT: sw a2, 8(a0)
; RV32I-NEXT: sw a4, 12(a0)
; RV32I-NEXT: ret
;
@@ -636,75 +636,75 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
;
; RV32ZBB-LABEL: abd_ext_i128:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a5, 0(a2)
-; RV32ZBB-NEXT: lw a3, 0(a1)
-; RV32ZBB-NEXT: lw t1, 12(a2)
-; RV32ZBB-NEXT: lw a7, 8(a2)
-; RV32ZBB-NEXT: lw a4, 8(a1)
-; RV32ZBB-NEXT: lw a6, 12(a1)
-; RV32ZBB-NEXT: lw t0, 4(a2)
+; RV32ZBB-NEXT: lw a3, 0(a2)
+; RV32ZBB-NEXT: lw a5, 4(a2)
+; RV32ZBB-NEXT: lw a6, 8(a2)
+; RV32ZBB-NEXT: lw a7, 12(a2)
+; RV32ZBB-NEXT: lw a2, 8(a1)
+; RV32ZBB-NEXT: lw a4, 12(a1)
+; RV32ZBB-NEXT: lw t0, 0(a1)
; RV32ZBB-NEXT: lw a1, 4(a1)
-; RV32ZBB-NEXT: sltu a2, a4, a7
-; RV32ZBB-NEXT: sub t1, a6, t1
-; RV32ZBB-NEXT: sltu t2, a3, a5
-; RV32ZBB-NEXT: sub a2, t1, a2
+; RV32ZBB-NEXT: sltu t1, a2, a6
+; RV32ZBB-NEXT: sub a7, a4, a7
+; RV32ZBB-NEXT: sltu t2, t0, a3
+; RV32ZBB-NEXT: sub a7, a7, t1
; RV32ZBB-NEXT: mv t1, t2
-; RV32ZBB-NEXT: beq a1, t0, .LBB11_2
+; RV32ZBB-NEXT: beq a1, a5, .LBB11_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu t1, a1, t0
+; RV32ZBB-NEXT: sltu t1, a1, a5
; RV32ZBB-NEXT: .LBB11_2:
-; RV32ZBB-NEXT: sub a7, a4, a7
-; RV32ZBB-NEXT: sltu t3, a7, t1
-; RV32ZBB-NEXT: sub a2, a2, t3
-; RV32ZBB-NEXT: sub a7, a7, t1
-; RV32ZBB-NEXT: beq a2, a6, .LBB11_4
+; RV32ZBB-NEXT: sub t3, a2, a6
+; RV32ZBB-NEXT: sltu a6, t3, t1
+; RV32ZBB-NEXT: sub a6, a7, a6
+; RV32ZBB-NEXT: sub a7, t3, t1
+; RV32ZBB-NEXT: beq a6, a4, .LBB11_4
; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: sltu t1, a6, a2
+; RV32ZBB-NEXT: sltu t1, a4, a6
; RV32ZBB-NEXT: j .LBB11_5
; RV32ZBB-NEXT: .LBB11_4:
-; RV32ZBB-NEXT: sltu t1, a4, a7
+; RV32ZBB-NEXT: sltu t1, a2, a7
; RV32ZBB-NEXT: .LBB11_5:
-; RV32ZBB-NEXT: sub t0, a1, t0
-; RV32ZBB-NEXT: sub t0, t0, t2
-; RV32ZBB-NEXT: sub a5, a3, a5
-; RV32ZBB-NEXT: beq t0, a1, .LBB11_7
+; RV32ZBB-NEXT: sub a5, a1, a5
+; RV32ZBB-NEXT: sub a5, a5, t2
+; RV32ZBB-NEXT: sub a3, t0, a3
+; RV32ZBB-NEXT: beq a5, a1, .LBB11_7
; RV32ZBB-NEXT: # %bb.6:
-; RV32ZBB-NEXT: sltu a1, a1, t0
+; RV32ZBB-NEXT: sltu a1, a1, a5
; RV32ZBB-NEXT: j .LBB11_8
; RV32ZBB-NEXT: .LBB11_7:
-; RV32ZBB-NEXT: sltu a1, a3, a5
+; RV32ZBB-NEXT: sltu a1, t0, a3
; RV32ZBB-NEXT: .LBB11_8:
-; RV32ZBB-NEXT: xor a3, a2, a6
-; RV32ZBB-NEXT: xor a4, a7, a4
-; RV32ZBB-NEXT: or a3, a4, a3
-; RV32ZBB-NEXT: beqz a3, .LBB11_10
+; RV32ZBB-NEXT: xor a4, a6, a4
+; RV32ZBB-NEXT: xor a2, a7, a2
+; RV32ZBB-NEXT: or a2, a2, a4
+; RV32ZBB-NEXT: beqz a2, .LBB11_10
; RV32ZBB-NEXT: # %bb.9:
; RV32ZBB-NEXT: mv a1, t1
; RV32ZBB-NEXT: .LBB11_10:
-; RV32ZBB-NEXT: neg a6, a1
-; RV32ZBB-NEXT: xor a3, a7, a6
-; RV32ZBB-NEXT: sltu a4, a3, a6
-; RV32ZBB-NEXT: xor a2, a2, a6
-; RV32ZBB-NEXT: add a2, a2, a1
-; RV32ZBB-NEXT: sub a4, a2, a4
-; RV32ZBB-NEXT: xor a2, a5, a6
-; RV32ZBB-NEXT: sltu a5, a2, a6
-; RV32ZBB-NEXT: xor a7, t0, a6
-; RV32ZBB-NEXT: mv t1, a5
-; RV32ZBB-NEXT: beqz t0, .LBB11_12
+; RV32ZBB-NEXT: neg t0, a1
+; RV32ZBB-NEXT: xor a2, a7, t0
+; RV32ZBB-NEXT: sltu a4, a2, t0
+; RV32ZBB-NEXT: xor a6, a6, t0
+; RV32ZBB-NEXT: add a6, a6, a1
+; RV32ZBB-NEXT: sub a4, a6, a4
+; RV32ZBB-NEXT: xor a3, a3, t0
+; RV32ZBB-NEXT: sltu a6, a3, t0
+; RV32ZBB-NEXT: xor a7, a5, t0
+; RV32ZBB-NEXT: mv t1, a6
+; RV32ZBB-NEXT: beqz a5, .LBB11_12
; RV32ZBB-NEXT: # %bb.11:
-; RV32ZBB-NEXT: sltu t1, a7, a6
+; RV32ZBB-NEXT: sltu t1, a7, t0
; RV32ZBB-NEXT: .LBB11_12:
-; RV32ZBB-NEXT: add a3, a3, a1
-; RV32ZBB-NEXT: sltu a6, a3, t1
-; RV32ZBB-NEXT: sub a4, a4, a6
-; RV32ZBB-NEXT: sub a3, a3, t1
+; RV32ZBB-NEXT: add a2, a2, a1
+; RV32ZBB-NEXT: sltu a5, a2, t1
+; RV32ZBB-NEXT: sub a4, a4, a5
+; RV32ZBB-NEXT: sub a2, a2, t1
; RV32ZBB-NEXT: add a7, a7, a1
-; RV32ZBB-NEXT: sub a5, a7, a5
-; RV32ZBB-NEXT: add a1, a2, a1
+; RV32ZBB-NEXT: sub a5, a7, a6
+; RV32ZBB-NEXT: add a1, a3, a1
; RV32ZBB-NEXT: sw a1, 0(a0)
; RV32ZBB-NEXT: sw a5, 4(a0)
-; RV32ZBB-NEXT: sw a3, 8(a0)
+; RV32ZBB-NEXT: sw a2, 8(a0)
; RV32ZBB-NEXT: sw a4, 12(a0)
; RV32ZBB-NEXT: ret
;
@@ -740,75 +740,75 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind {
define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_ext_i128_undef:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a5, 0(a2)
-; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw t1, 12(a2)
-; RV32I-NEXT: lw a7, 8(a2)
-; RV32I-NEXT: lw a4, 8(a1)
-; RV32I-NEXT: lw a6, 12(a1)
-; RV32I-NEXT: lw t0, 4(a2)
+; RV32I-NEXT: lw a3, 0(a2)
+; RV32I-NEXT: lw a5, 4(a2)
+; RV32I-NEXT: lw a6, 8(a2)
+; RV32I-NEXT: lw a7, 12(a2)
+; RV32I-NEXT: lw a2, 8(a1)
+; RV32I-NEXT: lw a4, 12(a1)
+; RV32I-NEXT: lw t0, 0(a1)
; RV32I-NEXT: lw a1, 4(a1)
-; RV32I-NEXT: sltu a2, a4, a7
-; RV32I-NEXT: sub t1, a6, t1
-; RV32I-NEXT: sltu t2, a3, a5
-; RV32I-NEXT: sub a2, t1, a2
+; RV32I-NEXT: sltu t1, a2, a6
+; RV32I-NEXT: sub a7, a4, a7
+; RV32I-NEXT: sltu t2, t0, a3
+; RV32I-NEXT: sub a7, a7, t1
; RV32I-NEXT: mv t1, t2
-; RV32I-NEXT: beq a1, t0, .LBB12_2
+; RV32I-NEXT: beq a1, a5, .LBB12_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu t1, a1, t0
+; RV32I-NEXT: sltu t1, a1, a5
; RV32I-NEXT: .LBB12_2:
-; RV32I-NEXT: sub a7, a4, a7
-; RV32I-NEXT: sltu t3, a7, t1
-; RV32I-NEXT: sub a2, a2, t3
-; RV32I-NEXT: sub a7, a7, t1
-; RV32I-NEXT: beq a2, a6, .LBB12_4
+; RV32I-NEXT: sub t3, a2, a6
+; RV32I-NEXT: sltu a6, t3, t1
+; RV32I-NEXT: sub a6, a7, a6
+; RV32I-NEXT: sub a7, t3, t1
+; RV32I-NEXT: beq a6, a4, .LBB12_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: sltu t1, a6, a2
+; RV32I-NEXT: sltu t1, a4, a6
; RV32I-NEXT: j .LBB12_5
; RV32I-NEXT: .LBB12_4:
-; RV32I-NEXT: sltu t1, a4, a7
+; RV32I-NEXT: sltu t1, a2, a7
; RV32I-NEXT: .LBB12_5:
-; RV32I-NEXT: sub t0, a1, t0
-; RV32I-NEXT: sub t0, t0, t2
-; RV32I-NEXT: sub a5, a3, a5
-; RV32I-NEXT: beq t0, a1, .LBB12_7
+; RV32I-NEXT: sub a5, a1, a5
+; RV32I-NEXT: sub a5, a5, t2
+; RV32I-NEXT: sub a3, t0, a3
+; RV32I-NEXT: beq a5, a1, .LBB12_7
; RV32I-NEXT: # %bb.6:
-; RV32I-NEXT: sltu a1, a1, t0
+; RV32I-NEXT: sltu a1, a1, a5
; RV32I-NEXT: j .LBB12_8
; RV32I-NEXT: .LBB12_7:
-; RV32I-NEXT: sltu a1, a3, a5
+; RV32I-NEXT: sltu a1, t0, a3
; RV32I-NEXT: .LBB12_8:
-; RV32I-NEXT: xor a3, a2, a6
-; RV32I-NEXT: xor a4, a7, a4
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: beqz a3, .LBB12_10
+; RV32I-NEXT: xor a4, a6, a4
+; RV32I-NEXT: xor a2, a7, a2
+; RV32I-NEXT: or a2, a2, a4
+; RV32I-NEXT: beqz a2, .LBB12_10
; RV32I-NEXT: # %bb.9:
; RV32I-NEXT: mv a1, t1
; RV32I-NEXT: .LBB12_10:
-; RV32I-NEXT: neg a6, a1
-; RV32I-NEXT: xor a3, a7, a6
-; RV32I-NEXT: sltu a4, a3, a6
-; RV32I-NEXT: xor a2, a2, a6
-; RV32I-NEXT: add a2, a2, a1
-; RV32I-NEXT: sub a4, a2, a4
-; RV32I-NEXT: xor a2, a5, a6
-; RV32I-NEXT: sltu a5, a2, a6
-; RV32I-NEXT: xor a7, t0, a6
-; RV32I-NEXT: mv t1, a5
-; RV32I-NEXT: beqz t0, .LBB12_12
+; RV32I-NEXT: neg t0, a1
+; RV32I-NEXT: xor a2, a7, t0
+; RV32I-NEXT: sltu a4, a2, t0
+; RV32I-NEXT: xor a6, a6, t0
+; RV32I-NEXT: add a6, a6, a1
+; RV32I-NEXT: sub a4, a6, a4
+; RV32I-NEXT: xor a3, a3, t0
+; RV32I-NEXT: sltu a6, a3, t0
+; RV32I-NEXT: xor a7, a5, t0
+; RV32I-NEXT: mv t1, a6
+; RV32I-NEXT: beqz a5, .LBB12_12
; RV32I-NEXT: # %bb.11:
-; RV32I-NEXT: sltu t1, a7, a6
+; RV32I-NEXT: sltu t1, a7, t0
; RV32I-NEXT: .LBB12_12:
-; RV32I-NEXT: add a3, a3, a1
-; RV32I-NEXT: sltu a6, a3, t1
-; RV32I-NEXT: sub a4, a4, a6
-; RV32I-NEXT: sub a3, a3, t1
+; RV32I-NEXT: add a2, a2, a1
+; RV32I-NEXT: sltu a5, a2, t1
+; RV32I-NEXT: sub a4, a4, a5
+; RV32I-NEXT: sub a2, a2, t1
; RV32I-NEXT: add a7, a7, a1
-; RV32I-NEXT: sub a5, a7, a5
-; RV32I-NEXT: add a1, a2, a1
+; RV32I-NEXT: sub a5, a7, a6
+; RV32I-NEXT: add a1, a3, a1
; RV32I-NEXT: sw a1, 0(a0)
; RV32I-NEXT: sw a5, 4(a0)
-; RV32I-NEXT: sw a3, 8(a0)
+; RV32I-NEXT: sw a2, 8(a0)
; RV32I-NEXT: sw a4, 12(a0)
; RV32I-NEXT: ret
;
@@ -836,75 +836,75 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
;
; RV32ZBB-LABEL: abd_ext_i128_undef:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a5, 0(a2)
-; RV32ZBB-NEXT: lw a3, 0(a1)
-; RV32ZBB-NEXT: lw t1, 12(a2)
-; RV32ZBB-NEXT: lw a7, 8(a2)
-; RV32ZBB-NEXT: lw a4, 8(a1)
-; RV32ZBB-NEXT: lw a6, 12(a1)
-; RV32ZBB-NEXT: lw t0, 4(a2)
+; RV32ZBB-NEXT: lw a3, 0(a2)
+; RV32ZBB-NEXT: lw a5, 4(a2)
+; RV32ZBB-NEXT: lw a6, 8(a2)
+; RV32ZBB-NEXT: lw a7, 12(a2)
+; RV32ZBB-NEXT: lw a2, 8(a1)
+; RV32ZBB-NEXT: lw a4, 12(a1)
+; RV32ZBB-NEXT: lw t0, 0(a1)
; RV32ZBB-NEXT: lw a1, 4(a1)
-; RV32ZBB-NEXT: sltu a2, a4, a7
-; RV32ZBB-NEXT: sub t1, a6, t1
-; RV32ZBB-NEXT: sltu t2, a3, a5
-; RV32ZBB-NEXT: sub a2, t1, a2
+; RV32ZBB-NEXT: sltu t1, a2, a6
+; RV32ZBB-NEXT: sub a7, a4, a7
+; RV32ZBB-NEXT: sltu t2, t0, a3
+; RV32ZBB-NEXT: sub a7, a7, t1
; RV32ZBB-NEXT: mv t1, t2
-; RV32ZBB-NEXT: beq a1, t0, .LBB12_2
+; RV32ZBB-NEXT: beq a1, a5, .LBB12_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu t1, a1, t0
+; RV32ZBB-NEXT: sltu t1, a1, a5
; RV32ZBB-NEXT: .LBB12_2:
-; RV32ZBB-NEXT: sub a7, a4, a7
-; RV32ZBB-NEXT: sltu t3, a7, t1
-; RV32ZBB-NEXT: sub a2, a2, t3
-; RV32ZBB-NEXT: sub a7, a7, t1
-; RV32ZBB-NEXT: beq a2, a6, .LBB12_4
+; RV32ZBB-NEXT: sub t3, a2, a6
+; RV32ZBB-NEXT: sltu a6, t3, t1
+; RV32ZBB-NEXT: sub a6, a7, a6
+; RV32ZBB-NEXT: sub a7, t3, t1
+; RV32ZBB-NEXT: beq a6, a4, .LBB12_4
; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: sltu t1, a6, a2
+; RV32ZBB-NEXT: sltu t1, a4, a6
; RV32ZBB-NEXT: j .LBB12_5
; RV32ZBB-NEXT: .LBB12_4:
-; RV32ZBB-NEXT: sltu t1, a4, a7
+; RV32ZBB-NEXT: sltu t1, a2, a7
; RV32ZBB-NEXT: .LBB12_5:
-; RV32ZBB-NEXT: sub t0, a1, t0
-; RV32ZBB-NEXT: sub t0, t0, t2
-; RV32ZBB-NEXT: sub a5, a3, a5
-; RV32ZBB-NEXT: beq t0, a1, .LBB12_7
+; RV32ZBB-NEXT: sub a5, a1, a5
+; RV32ZBB-NEXT: sub a5, a5, t2
+; RV32ZBB-NEXT: sub a3, t0, a3
+; RV32ZBB-NEXT: beq a5, a1, .LBB12_7
; RV32ZBB-NEXT: # %bb.6:
-; RV32ZBB-NEXT: sltu a1, a1, t0
+; RV32ZBB-NEXT: sltu a1, a1, a5
; RV32ZBB-NEXT: j .LBB12_8
; RV32ZBB-NEXT: .LBB12_7:
-; RV32ZBB-NEXT: sltu a1, a3, a5
+; RV32ZBB-NEXT: sltu a1, t0, a3
; RV32ZBB-NEXT: .LBB12_8:
-; RV32ZBB-NEXT: xor a3, a2, a6
-; RV32ZBB-NEXT: xor a4, a7, a4
-; RV32ZBB-NEXT: or a3, a4, a3
-; RV32ZBB-NEXT: beqz a3, .LBB12_10
+; RV32ZBB-NEXT: xor a4, a6, a4
+; RV32ZBB-NEXT: xor a2, a7, a2
+; RV32ZBB-NEXT: or a2, a2, a4
+; RV32ZBB-NEXT: beqz a2, .LBB12_10
; RV32ZBB-NEXT: # %bb.9:
; RV32ZBB-NEXT: mv a1, t1
; RV32ZBB-NEXT: .LBB12_10:
-; RV32ZBB-NEXT: neg a6, a1
-; RV32ZBB-NEXT: xor a3, a7, a6
-; RV32ZBB-NEXT: sltu a4, a3, a6
-; RV32ZBB-NEXT: xor a2, a2, a6
-; RV32ZBB-NEXT: add a2, a2, a1
-; RV32ZBB-NEXT: sub a4, a2, a4
-; RV32ZBB-NEXT: xor a2, a5, a6
-; RV32ZBB-NEXT: sltu a5, a2, a6
-; RV32ZBB-NEXT: xor a7, t0, a6
-; RV32ZBB-NEXT: mv t1, a5
-; RV32ZBB-NEXT: beqz t0, .LBB12_12
+; RV32ZBB-NEXT: neg t0, a1
+; RV32ZBB-NEXT: xor a2, a7, t0
+; RV32ZBB-NEXT: sltu a4, a2, t0
+; RV32ZBB-NEXT: xor a6, a6, t0
+; RV32ZBB-NEXT: add a6, a6, a1
+; RV32ZBB-NEXT: sub a4, a6, a4
+; RV32ZBB-NEXT: xor a3, a3, t0
+; RV32ZBB-NEXT: sltu a6, a3, t0
+; RV32ZBB-NEXT: xor a7, a5, t0
+; RV32ZBB-NEXT: mv t1, a6
+; RV32ZBB-NEXT: beqz a5, .LBB12_12
; RV32ZBB-NEXT: # %bb.11:
-; RV32ZBB-NEXT: sltu t1, a7, a6
+; RV32ZBB-NEXT: sltu t1, a7, t0
; RV32ZBB-NEXT: .LBB12_12:
-; RV32ZBB-NEXT: add a3, a3, a1
-; RV32ZBB-NEXT: sltu a6, a3, t1
-; RV32ZBB-NEXT: sub a4, a4, a6
-; RV32ZBB-NEXT: sub a3, a3, t1
+; RV32ZBB-NEXT: add a2, a2, a1
+; RV32ZBB-NEXT: sltu a5, a2, t1
+; RV32ZBB-NEXT: sub a4, a4, a5
+; RV32ZBB-NEXT: sub a2, a2, t1
; RV32ZBB-NEXT: add a7, a7, a1
-; RV32ZBB-NEXT: sub a5, a7, a5
-; RV32ZBB-NEXT: add a1, a2, a1
+; RV32ZBB-NEXT: sub a5, a7, a6
+; RV32ZBB-NEXT: add a1, a3, a1
; RV32ZBB-NEXT: sw a1, 0(a0)
; RV32ZBB-NEXT: sw a5, 4(a0)
-; RV32ZBB-NEXT: sw a3, 8(a0)
+; RV32ZBB-NEXT: sw a2, 8(a0)
; RV32ZBB-NEXT: sw a4, 12(a0)
; RV32ZBB-NEXT: ret
;
@@ -1131,75 +1131,75 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_minmax_i128:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a5, 0(a2)
-; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw t1, 12(a2)
-; RV32I-NEXT: lw a7, 8(a2)
-; RV32I-NEXT: lw a4, 8(a1)
-; RV32I-NEXT: lw a6, 12(a1)
-; RV32I-NEXT: lw t0, 4(a2)
+; RV32I-NEXT: lw a3, 0(a2)
+; RV32I-NEXT: lw a5, 4(a2)
+; RV32I-NEXT: lw a6, 8(a2)
+; RV32I-NEXT: lw a7, 12(a2)
+; RV32I-NEXT: lw a2, 8(a1)
+; RV32I-NEXT: lw a4, 12(a1)
+; RV32I-NEXT: lw t0, 0(a1)
; RV32I-NEXT: lw a1, 4(a1)
-; RV32I-NEXT: sltu a2, a4, a7
-; RV32I-NEXT: sub t1, a6, t1
-; RV32I-NEXT: sltu t2, a3, a5
-; RV32I-NEXT: sub a2, t1, a2
+; RV32I-NEXT: sltu t1, a2, a6
+; RV32I-NEXT: sub a7, a4, a7
+; RV32I-NEXT: sltu t2, t0, a3
+; RV32I-NEXT: sub a7, a7, t1
; RV32I-NEXT: mv t1, t2
-; RV32I-NEXT: beq a1, t0, .LBB17_2
+; RV32I-NEXT: beq a1, a5, .LBB17_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu t1, a1, t0
+; RV32I-NEXT: sltu t1, a1, a5
; RV32I-NEXT: .LBB17_2:
-; RV32I-NEXT: sub a7, a4, a7
-; RV32I-NEXT: sltu t3, a7, t1
-; RV32I-NEXT: sub a2, a2, t3
-; RV32I-NEXT: sub a7, a7, t1
-; RV32I-NEXT: beq a2, a6, .LBB17_4
+; RV32I-NEXT: sub t3, a2, a6
+; RV32I-NEXT: sltu a6, t3, t1
+; RV32I-NEXT: sub a6, a7, a6
+; RV32I-NEXT: sub a7, t3, t1
+; RV32I-NEXT: beq a6, a4, .LBB17_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: sltu t1, a6, a2
+; RV32I-NEXT: sltu t1, a4, a6
; RV32I-NEXT: j .LBB17_5
; RV32I-NEXT: .LBB17_4:
-; RV32I-NEXT: sltu t1, a4, a7
+; RV32I-NEXT: sltu t1, a2, a7
; RV32I-NEXT: .LBB17_5:
-; RV32I-NEXT: sub t0, a1, t0
-; RV32I-NEXT: sub t0, t0, t2
-; RV32I-NEXT: sub a5, a3, a5
-; RV32I-NEXT: beq t0, a1, .LBB17_7
+; RV32I-NEXT: sub a5, a1, a5
+; RV32I-NEXT: sub a5, a5, t2
+; RV32I-NEXT: sub a3, t0, a3
+; RV32I-NEXT: beq a5, a1, .LBB17_7
; RV32I-NEXT: # %bb.6:
-; RV32I-NEXT: sltu a1, a1, t0
+; RV32I-NEXT: sltu a1, a1, a5
; RV32I-NEXT: j .LBB17_8
; RV32I-NEXT: .LBB17_7:
-; RV32I-NEXT: sltu a1, a3, a5
+; RV32I-NEXT: sltu a1, t0, a3
; RV32I-NEXT: .LBB17_8:
-; RV32I-NEXT: xor a3, a2, a6
-; RV32I-NEXT: xor a4, a7, a4
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: beqz a3, .LBB17_10
+; RV32I-NEXT: xor a4, a6, a4
+; RV32I-NEXT: xor a2, a7, a2
+; RV32I-NEXT: or a2, a2, a4
+; RV32I-NEXT: beqz a2, .LBB17_10
; RV32I-NEXT: # %bb.9:
; RV32I-NEXT: mv a1, t1
; RV32I-NEXT: .LBB17_10:
-; RV32I-NEXT: neg a6, a1
-; RV32I-NEXT: xor a3, a7, a6
-; RV32I-NEXT: sltu a4, a3, a6
-; RV32I-NEXT: xor a2, a2, a6
-; RV32I-NEXT: add a2, a2, a1
-; RV32I-NEXT: sub a4, a2, a4
-; RV32I-NEXT: xor a2, a5, a6
-; RV32I-NEXT: sltu a5, a2, a6
-; RV32I-NEXT: xor a7, t0, a6
-; RV32I-NEXT: mv t1, a5
-; RV32I-NEXT: beqz t0, .LBB17_12
+; RV32I-NEXT: neg t0, a1
+; RV32I-NEXT: xor a2, a7, t0
+; RV32I-NEXT: sltu a4, a2, t0
+; RV32I-NEXT: xor a6, a6, t0
+; RV32I-NEXT: add a6, a6, a1
+; RV32I-NEXT: sub a4, a6, a4
+; RV32I-NEXT: xor a3, a3, t0
+; RV32I-NEXT: sltu a6, a3, t0
+; RV32I-NEXT: xor a7, a5, t0
+; RV32I-NEXT: mv t1, a6
+; RV32I-NEXT: beqz a5, .LBB17_12
; RV32I-NEXT: # %bb.11:
-; RV32I-NEXT: sltu t1, a7, a6
+; RV32I-NEXT: sltu t1, a7, t0
; RV32I-NEXT: .LBB17_12:
-; RV32I-NEXT: add a3, a3, a1
-; RV32I-NEXT: sltu a6, a3, t1
-; RV32I-NEXT: sub a4, a4, a6
-; RV32I-NEXT: sub a3, a3, t1
+; RV32I-NEXT: add a2, a2, a1
+; RV32I-NEXT: sltu a5, a2, t1
+; RV32I-NEXT: sub a4, a4, a5
+; RV32I-NEXT: sub a2, a2, t1
; RV32I-NEXT: add a7, a7, a1
-; RV32I-NEXT: sub a5, a7, a5
-; RV32I-NEXT: add a1, a2, a1
+; RV32I-NEXT: sub a5, a7, a6
+; RV32I-NEXT: add a1, a3, a1
; RV32I-NEXT: sw a1, 0(a0)
; RV32I-NEXT: sw a5, 4(a0)
-; RV32I-NEXT: sw a3, 8(a0)
+; RV32I-NEXT: sw a2, 8(a0)
; RV32I-NEXT: sw a4, 12(a0)
; RV32I-NEXT: ret
;
@@ -1227,75 +1227,75 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
;
; RV32ZBB-LABEL: abd_minmax_i128:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a5, 0(a2)
-; RV32ZBB-NEXT: lw a3, 0(a1)
-; RV32ZBB-NEXT: lw t1, 12(a2)
-; RV32ZBB-NEXT: lw a7, 8(a2)
-; RV32ZBB-NEXT: lw a4, 8(a1)
-; RV32ZBB-NEXT: lw a6, 12(a1)
-; RV32ZBB-NEXT: lw t0, 4(a2)
+; RV32ZBB-NEXT: lw a3, 0(a2)
+; RV32ZBB-NEXT: lw a5, 4(a2)
+; RV32ZBB-NEXT: lw a6, 8(a2)
+; RV32ZBB-NEXT: lw a7, 12(a2)
+; RV32ZBB-NEXT: lw a2, 8(a1)
+; RV32ZBB-NEXT: lw a4, 12(a1)
+; RV32ZBB-NEXT: lw t0, 0(a1)
; RV32ZBB-NEXT: lw a1, 4(a1)
-; RV32ZBB-NEXT: sltu a2, a4, a7
-; RV32ZBB-NEXT: sub t1, a6, t1
-; RV32ZBB-NEXT: sltu t2, a3, a5
-; RV32ZBB-NEXT: sub a2, t1, a2
+; RV32ZBB-NEXT: sltu t1, a2, a6
+; RV32ZBB-NEXT: sub a7, a4, a7
+; RV32ZBB-NEXT: sltu t2, t0, a3
+; RV32ZBB-NEXT: sub a7, a7, t1
; RV32ZBB-NEXT: mv t1, t2
-; RV32ZBB-NEXT: beq a1, t0, .LBB17_2
+; RV32ZBB-NEXT: beq a1, a5, .LBB17_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu t1, a1, t0
+; RV32ZBB-NEXT: sltu t1, a1, a5
; RV32ZBB-NEXT: .LBB17_2:
-; RV32ZBB-NEXT: sub a7, a4, a7
-; RV32ZBB-NEXT: sltu t3, a7, t1
-; RV32ZBB-NEXT: sub a2, a2, t3
-; RV32ZBB-NEXT: sub a7, a7, t1
-; RV32ZBB-NEXT: beq a2, a6, .LBB17_4
+; RV32ZBB-NEXT: sub t3, a2, a6
+; RV32ZBB-NEXT: sltu a6, t3, t1
+; RV32ZBB-NEXT: sub a6, a7, a6
+; RV32ZBB-NEXT: sub a7, t3, t1
+; RV32ZBB-NEXT: beq a6, a4, .LBB17_4
; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: sltu t1, a6, a2
+; RV32ZBB-NEXT: sltu t1, a4, a6
; RV32ZBB-NEXT: j .LBB17_5
; RV32ZBB-NEXT: .LBB17_4:
-; RV32ZBB-NEXT: sltu t1, a4, a7
+; RV32ZBB-NEXT: sltu t1, a2, a7
; RV32ZBB-NEXT: .LBB17_5:
-; RV32ZBB-NEXT: sub t0, a1, t0
-; RV32ZBB-NEXT: sub t0, t0, t2
-; RV32ZBB-NEXT: sub a5, a3, a5
-; RV32ZBB-NEXT: beq t0, a1, .LBB17_7
+; RV32ZBB-NEXT: sub a5, a1, a5
+; RV32ZBB-NEXT: sub a5, a5, t2
+; RV32ZBB-NEXT: sub a3, t0, a3
+; RV32ZBB-NEXT: beq a5, a1, .LBB17_7
; RV32ZBB-NEXT: # %bb.6:
-; RV32ZBB-NEXT: sltu a1, a1, t0
+; RV32ZBB-NEXT: sltu a1, a1, a5
; RV32ZBB-NEXT: j .LBB17_8
; RV32ZBB-NEXT: .LBB17_7:
-; RV32ZBB-NEXT: sltu a1, a3, a5
+; RV32ZBB-NEXT: sltu a1, t0, a3
; RV32ZBB-NEXT: .LBB17_8:
-; RV32ZBB-NEXT: xor a3, a2, a6
-; RV32ZBB-NEXT: xor a4, a7, a4
-; RV32ZBB-NEXT: or a3, a4, a3
-; RV32ZBB-NEXT: beqz a3, .LBB17_10
+; RV32ZBB-NEXT: xor a4, a6, a4
+; RV32ZBB-NEXT: xor a2, a7, a2
+; RV32ZBB-NEXT: or a2, a2, a4
+; RV32ZBB-NEXT: beqz a2, .LBB17_10
; RV32ZBB-NEXT: # %bb.9:
; RV32ZBB-NEXT: mv a1, t1
; RV32ZBB-NEXT: .LBB17_10:
-; RV32ZBB-NEXT: neg a6, a1
-; RV32ZBB-NEXT: xor a3, a7, a6
-; RV32ZBB-NEXT: sltu a4, a3, a6
-; RV32ZBB-NEXT: xor a2, a2, a6
-; RV32ZBB-NEXT: add a2, a2, a1
-; RV32ZBB-NEXT: sub a4, a2, a4
-; RV32ZBB-NEXT: xor a2, a5, a6
-; RV32ZBB-NEXT: sltu a5, a2, a6
-; RV32ZBB-NEXT: xor a7, t0, a6
-; RV32ZBB-NEXT: mv t1, a5
-; RV32ZBB-NEXT: beqz t0, .LBB17_12
+; RV32ZBB-NEXT: neg t0, a1
+; RV32ZBB-NEXT: xor a2, a7, t0
+; RV32ZBB-NEXT: sltu a4, a2, t0
+; RV32ZBB-NEXT: xor a6, a6, t0
+; RV32ZBB-NEXT: add a6, a6, a1
+; RV32ZBB-NEXT: sub a4, a6, a4
+; RV32ZBB-NEXT: xor a3, a3, t0
+; RV32ZBB-NEXT: sltu a6, a3, t0
+; RV32ZBB-NEXT: xor a7, a5, t0
+; RV32ZBB-NEXT: mv t1, a6
+; RV32ZBB-NEXT: beqz a5, .LBB17_12
; RV32ZBB-NEXT: # %bb.11:
-; RV32ZBB-NEXT: sltu t1, a7, a6
+; RV32ZBB-NEXT: sltu t1, a7, t0
; RV32ZBB-NEXT: .LBB17_12:
-; RV32ZBB-NEXT: add a3, a3, a1
-; RV32ZBB-NEXT: sltu a6, a3, t1
-; RV32ZBB-NEXT: sub a4, a4, a6
-; RV32ZBB-NEXT: sub a3, a3, t1
+; RV32ZBB-NEXT: add a2, a2, a1
+; RV32ZBB-NEXT: sltu a5, a2, t1
+; RV32ZBB-NEXT: sub a4, a4, a5
+; RV32ZBB-NEXT: sub a2, a2, t1
; RV32ZBB-NEXT: add a7, a7, a1
-; RV32ZBB-NEXT: sub a5, a7, a5
-; RV32ZBB-NEXT: add a1, a2, a1
+; RV32ZBB-NEXT: sub a5, a7, a6
+; RV32ZBB-NEXT: add a1, a3, a1
; RV32ZBB-NEXT: sw a1, 0(a0)
; RV32ZBB-NEXT: sw a5, 4(a0)
-; RV32ZBB-NEXT: sw a3, 8(a0)
+; RV32ZBB-NEXT: sw a2, 8(a0)
; RV32ZBB-NEXT: sw a4, 12(a0)
; RV32ZBB-NEXT: ret
;
@@ -1524,75 +1524,75 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_cmp_i128:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a5, 0(a2)
-; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw t1, 12(a2)
-; RV32I-NEXT: lw a7, 8(a2)
-; RV32I-NEXT: lw a4, 8(a1)
-; RV32I-NEXT: lw a6, 12(a1)
-; RV32I-NEXT: lw t0, 4(a2)
+; RV32I-NEXT: lw a3, 0(a2)
+; RV32I-NEXT: lw a5, 4(a2)
+; RV32I-NEXT: lw a6, 8(a2)
+; RV32I-NEXT: lw a7, 12(a2)
+; RV32I-NEXT: lw a2, 8(a1)
+; RV32I-NEXT: lw a4, 12(a1)
+; RV32I-NEXT: lw t0, 0(a1)
; RV32I-NEXT: lw a1, 4(a1)
-; RV32I-NEXT: sltu a2, a4, a7
-; RV32I-NEXT: sub t1, a6, t1
-; RV32I-NEXT: sltu t2, a3, a5
-; RV32I-NEXT: sub a2, t1, a2
+; RV32I-NEXT: sltu t1, a2, a6
+; RV32I-NEXT: sub a7, a4, a7
+; RV32I-NEXT: sltu t2, t0, a3
+; RV32I-NEXT: sub a7, a7, t1
; RV32I-NEXT: mv t1, t2
-; RV32I-NEXT: beq a1, t0, .LBB22_2
+; RV32I-NEXT: beq a1, a5, .LBB22_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu t1, a1, t0
+; RV32I-NEXT: sltu t1, a1, a5
; RV32I-NEXT: .LBB22_2:
-; RV32I-NEXT: sub a7, a4, a7
-; RV32I-NEXT: sltu t3, a7, t1
-; RV32I-NEXT: sub a2, a2, t3
-; RV32I-NEXT: sub a7, a7, t1
-; RV32I-NEXT: beq a2, a6, .LBB22_4
+; RV32I-NEXT: sub t3, a2, a6
+; RV32I-NEXT: sltu a6, t3, t1
+; RV32I-NEXT: sub a6, a7, a6
+; RV32I-NEXT: sub a7, t3, t1
+; RV32I-NEXT: beq a6, a4, .LBB22_4
; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: sltu t1, a6, a2
+; RV32I-NEXT: sltu t1, a4, a6
; RV32I-NEXT: j .LBB22_5
; RV32I-NEXT: .LBB22_4:
-; RV32I-NEXT: sltu t1, a4, a7
+; RV32I-NEXT: sltu t1, a2, a7
; RV32I-NEXT: .LBB22_5:
-; RV32I-NEXT: sub t0, a1, t0
-; RV32I-NEXT: sub t0, t0, t2
-; RV32I-NEXT: sub a5, a3, a5
-; RV32I-NEXT: beq t0, a1, .LBB22_7
+; RV32I-NEXT: sub a5, a1, a5
+; RV32I-NEXT: sub a5, a5, t2
+; RV32I-NEXT: sub a3, t0, a3
+; RV32I-NEXT: beq a5, a1, .LBB22_7
; RV32I-NEXT: # %bb.6:
-; RV32I-NEXT: sltu a1, a1, t0
+; RV32I-NEXT: sltu a1, a1, a5
; RV32I-NEXT: j .LBB22_8
; RV32I-NEXT: .LBB22_7:
-; RV32I-NEXT: sltu a1, a3, a5
+; RV32I-NEXT: sltu a1, t0, a3
; RV32I-NEXT: .LBB22_8:
-; RV32I-NEXT: xor a3, a2, a6
-; RV32I-NEXT: xor a4, a7, a4
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: beqz a3, .LBB22_10
+; RV32I-NEXT: xor a4, a6, a4
+; RV32I-NEXT: xor a2, a7, a2
+; RV32I-NEXT: or a2, a2, a4
+; RV32I-NEXT: beqz a2, .LBB22_10
; RV32I-NEXT: # %bb.9:
; RV32I-NEXT: mv a1, t1
; RV32I-NEXT: .LBB22_10:
-; RV32I-NEXT: neg a6, a1
-; RV32I-NEXT: xor a3, a7, a6
-; RV32I-NEXT: sltu a4, a3, a6
-; RV32I-NEXT: xor a2, a2, a6
-; RV32I-NEXT: add a2, a2, a1
-; RV32I-NEXT: sub a4, a2, a4
-; RV32I-NEXT: xor a2, a5, a6
-; RV32I-NEXT: sltu a5, a2, a6
-; RV32I-NEXT: xor a7, t0, a6
-; RV32I-NEXT: mv t1, a5
-; RV32I-NEXT: beqz t0, .LBB22_12
+; RV32I-NEXT: neg t0, a1
+; RV32I-NEXT: xor a2, a7, t0
+; RV32I-NEXT: sltu a4, a2, t0
+; RV32I-NEXT: xor a6, a6, t0
+; RV32I-NEXT: add a6, a6, a1
+; RV32I-NEXT: sub a4, a6, a4
+; RV32I-NEXT: xor a3, a3, t0
+; RV32I-NEXT: sltu a6, a3, t0
+; RV32I-NEXT: xor a7, a5, t0
+; RV32I-NEXT: mv t1, a6
+; RV32I-NEXT: beqz a5, .LBB22_12
; RV32I-NEXT: # %bb.11:
-; RV32I-NEXT: sltu t1, a7, a6
+; RV32I-NEXT: sltu t1, a7, t0
; RV32I-NEXT: .LBB22_12:
-; RV32I-NEXT: add a3, a3, a1
-; RV32I-NEXT: sltu a6, a3, t1
-; RV32I-NEXT: sub a4, a4, a6
-; RV32I-NEXT: sub a3, a3, t1
+; RV32I-NEXT: add a2, a2, a1
+; RV32I-NEXT: sltu a5, a2, t1
+; RV32I-NEXT: sub a4, a4, a5
+; RV32I-NEXT: sub a2, a2, t1
; RV32I-NEXT: add a7, a7, a1
-; RV32I-NEXT: sub a5, a7, a5
-; RV32I-NEXT: add a1, a2, a1
+; RV32I-NEXT: sub a5, a7, a6
+; RV32I-NEXT: add a1, a3, a1
; RV32I-NEXT: sw a1, 0(a0)
; RV32I-NEXT: sw a5, 4(a0)
-; RV32I-NEXT: sw a3, 8(a0)
+; RV32I-NEXT: sw a2, 8(a0)
; RV32I-NEXT: sw a4, 12(a0)
; RV32I-NEXT: ret
;
@@ -1620,75 +1620,75 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
;
; RV32ZBB-LABEL: abd_cmp_i128:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a5, 0(a2)
-; RV32ZBB-NEXT: lw a3, 0(a1)
-; RV32ZBB-NEXT: lw t1, 12(a2)
-; RV32ZBB-NEXT: lw a7, 8(a2)
-; RV32ZBB-NEXT: lw a4, 8(a1)
-; RV32ZBB-NEXT: lw a6, 12(a1)
-; RV32ZBB-NEXT: lw t0, 4(a2)
+; RV32ZBB-NEXT: lw a3, 0(a2)
+; RV32ZBB-NEXT: lw a5, 4(a2)
+; RV32ZBB-NEXT: lw a6, 8(a2)
+; RV32ZBB-NEXT: lw a7, 12(a2)
+; RV32ZBB-NEXT: lw a2, 8(a1)
+; RV32ZBB-NEXT: lw a4, 12(a1)
+; RV32ZBB-NEXT: lw t0, 0(a1)
; RV32ZBB-NEXT: lw a1, 4(a1)
-; RV32ZBB-NEXT: sltu a2, a4, a7
-; RV32ZBB-NEXT: sub t1, a6, t1
-; RV32ZBB-NEXT: sltu t2, a3, a5
-; RV32ZBB-NEXT: sub a2, t1, a2
+; RV32ZBB-NEXT: sltu t1, a2, a6
+; RV32ZBB-NEXT: sub a7, a4, a7
+; RV32ZBB-NEXT: sltu t2, t0, a3
+; RV32ZBB-NEXT: sub a7, a7, t1
; RV32ZBB-NEXT: mv t1, t2
-; RV32ZBB-NEXT: beq a1, t0, .LBB22_2
+; RV32ZBB-NEXT: beq a1, a5, .LBB22_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu t1, a1, t0
+; RV32ZBB-NEXT: sltu t1, a1, a5
; RV32ZBB-NEXT: .LBB22_2:
-; RV32ZBB-NEXT: sub a7, a4, a7
-; RV32ZBB-NEXT: sltu t3, a7, t1
-; RV32ZBB-NEXT: sub a2, a2, t3
-; RV32ZBB-NEXT: sub a7, a7, t1
-; RV32ZBB-NEXT: beq a2, a6, .LBB22_4
+; RV32ZBB-NEXT: sub t3, a2, a6
+; RV32ZBB-NEXT: sltu a6, t3, t1
+; RV32ZBB-NEXT: sub a6, a7, a6
+; RV32ZBB-NEXT: sub a7, t3, t1
+; RV32ZBB-NEXT: beq a6, a4, .LBB22_4
; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: sltu t1, a6, a2
+; RV32ZBB-NEXT: sltu t1, a4, a6
; RV32ZBB-NEXT: j .LBB22_5
; RV32ZBB-NEXT: .LBB22_4:
-; RV32ZBB-NEXT: sltu t1, a4, a7
+; RV32ZBB-NEXT: sltu t1, a2, a7
; RV32ZBB-NEXT: .LBB22_5:
-; RV32ZBB-NEXT: sub t0, a1, t0
-; RV32ZBB-NEXT: sub t0, t0, t2
-; RV32ZBB-NEXT: sub a5, a3, a5
-; RV32ZBB-NEXT: beq t0, a1, .LBB22_7
+; RV32ZBB-NEXT: sub a5, a1, a5
+; RV32ZBB-NEXT: sub a5, a5, t2
+; RV32ZBB-NEXT: sub a3, t0, a3
+; RV32ZBB-NEXT: beq a5, a1, .LBB22_7
; RV32ZBB-NEXT: # %bb.6:
-; RV32ZBB-NEXT: sltu a1, a1, t0
+; RV32ZBB-NEXT: sltu a1, a1, a5
; RV32ZBB-NEXT: j .LBB22_8
; RV32ZBB-NEXT: .LBB22_7:
-; RV32ZBB-NEXT: sltu a1, a3, a5
+; RV32ZBB-NEXT: sltu a1, t0, a3
; RV32ZBB-NEXT: .LBB22_8:
-; RV32ZBB-NEXT: xor a3, a2, a6
-; RV32ZBB-NEXT: xor a4, a7, a4
-; RV32ZBB-NEXT: or a3, a4, a3
-; RV32ZBB-NEXT: beqz a3, .LBB22_10
+; RV32ZBB-NEXT: xor a4, a6, a4
+; RV32ZBB-NEXT: xor a2, a7, a2
+; RV32ZBB-NEXT: or a2, a2, a4
+; RV32ZBB-NEXT: beqz a2, .LBB22_10
; RV32ZBB-NEXT: # %bb.9:
; RV32ZBB-NEXT: mv a1, t1
; RV32ZBB-NEXT: .LBB22_10:
-; RV32ZBB-NEXT: neg a6, a1
-; RV32ZBB-NEXT: xor a3, a7, a6
-; RV32ZBB-NEXT: sltu a4, a3, a6
-; RV32ZBB-NEXT: xor a2, a2, a6
-; RV32ZBB-NEXT: add a2, a2, a1
-; RV32ZBB-NEXT: sub a4, a2, a4
-; RV32ZBB-NEXT: xor a2, a5, a6
-; RV32ZBB-NEXT: sltu a5, a2, a6
-; RV32ZBB-NEXT: xor a7, t0, a6
-; RV32ZBB-NEXT: mv t1, a5
-; RV32ZBB-NEXT: beqz t0, .LBB22_12
+; RV32ZBB-NEXT: neg t0, a1
+; RV32ZBB-NEXT: xor a2, a7, t0
+; RV32ZBB-NEXT: sltu a4, a2, t0
+; RV32ZBB-NEXT: xor a6, a6, t0
+; RV32ZBB-NEXT: add a6, a6, a1
+; RV32ZBB-NEXT: sub a4, a6, a4
+; RV32ZBB-NEXT: xor a3, a3, t0
+; RV32ZBB-NEXT: sltu a6, a3, t0
+; RV32ZBB-NEXT: xor a7, a5, t0
+; RV32ZBB-NEXT: mv t1, a6
+; RV32ZBB-NEXT: beqz a5, .LBB22_12
; RV32ZBB-NEXT: # %bb.11:
-; RV32ZBB-NEXT: sltu t1, a7, a6
+; RV32ZBB-NEXT: sltu t1, a7, t0
; RV32ZBB-NEXT: .LBB22_12:
-; RV32ZBB-NEXT: add a3, a3, a1
-; RV32ZBB-NEXT: sltu a6, a3, t1
-; RV32ZBB-NEXT: sub a4, a4, a6
-; RV32ZBB-NEXT: sub a3, a3, t1
+; RV32ZBB-NEXT: add a2, a2, a1
+; RV32ZBB-NEXT: sltu a5, a2, t1
+; RV32ZBB-NEXT: sub a4, a4, a5
+; RV32ZBB-NEXT: sub a2, a2, t1
; RV32ZBB-NEXT: add a7, a7, a1
-; RV32ZBB-NEXT: sub a5, a7, a5
-; RV32ZBB-NEXT: add a1, a2, a1
+; RV32ZBB-NEXT: sub a5, a7, a6
+; RV32ZBB-NEXT: add a1, a3, a1
; RV32ZBB-NEXT: sw a1, 0(a0)
; RV32ZBB-NEXT: sw a5, 4(a0)
-; RV32ZBB-NEXT: sw a3, 8(a0)
+; RV32ZBB-NEXT: sw a2, 8(a0)
; RV32ZBB-NEXT: sw a4, 12(a0)
; RV32ZBB-NEXT: ret
;
@@ -1918,10 +1918,10 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: abd_select_i128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a7, 4(a2)
-; RV32I-NEXT: lw a3, 4(a1)
; RV32I-NEXT: lw a6, 8(a2)
; RV32I-NEXT: lw t0, 12(a2)
; RV32I-NEXT: lw a5, 12(a1)
+; RV32I-NEXT: lw a3, 4(a1)
; RV32I-NEXT: lw a4, 8(a1)
; RV32I-NEXT: beq a5, t0, .LBB27_2
; RV32I-NEXT: # %bb.1:
@@ -2012,75 +2012,75 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind {
;
; RV32ZBB-LABEL: abd_select_i128:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a5, 0(a2)
-; RV32ZBB-NEXT: lw a3, 0(a1)
-; RV32ZBB-NEXT: lw t1, 12(a2)
-; RV32ZBB-NEXT: lw a7, 8(a2)
-; RV32ZBB-NEXT: lw a4, 8(a1)
-; RV32ZBB-NEXT: lw a6, 12(a1)
-; RV32ZBB-NEXT: lw t0, 4(a2)
+; RV32ZBB-NEXT: lw a3, 0(a2)
+; RV32ZBB-NEXT: lw a5, 4(a2)
+; RV32ZBB-NEXT: lw a6, 8(a2)
+; RV32ZBB-NEXT: lw a7, 12(a2)
+; RV32ZBB-NEXT: lw a2, 8(a1)
+; RV32ZBB-NEXT: lw a4, 12(a1)
+; RV32ZBB-NEXT: lw t0, 0(a1)
; RV32ZBB-NEXT: lw a1, 4(a1)
-; RV32ZBB-NEXT: sltu a2, a4, a7
-; RV32ZBB-NEXT: sub t1, a6, t1
-; RV32ZBB-NEXT: sltu t2, a3, a5
-; RV32ZBB-NEXT: sub a2, t1, a2
+; RV32ZBB-NEXT: sltu t1, a2, a6
+; RV32ZBB-NEXT: sub a7, a4, a7
+; RV32ZBB-NEXT: sltu t2, t0, a3
+; RV32ZBB-NEXT: sub a7, a7, t1
; RV32ZBB-NEXT: mv t1, t2
-; RV32ZBB-NEXT: beq a1, t0, .LBB27_2
+; RV32ZBB-NEXT: beq a1, a5, .LBB27_2
; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: sltu t1, a1, t0
+; RV32ZBB-NEXT: sltu t1, a1, a5
; RV32ZBB-NEXT: .LBB27_2:
-; RV32ZBB-NEXT: sub a7, a4, a7
-; RV32ZBB-NEXT: sltu t3, a7, t1
-; RV32ZBB-NEXT: sub a2, a2, t3
-; RV32ZBB-NEXT: sub a7, a7, t1
-; RV32ZBB-NEXT: beq a2, a6, .LBB27_4
+; RV32ZBB-NEXT: sub t3, a2, a6
+; RV32ZBB-NEXT: sltu a6, t3, t1
+; RV32ZBB-NEXT: sub a6, a7, a6
+; RV32ZBB-NEXT: sub a7, t3, t1
+; RV32ZBB-NEXT: beq a6, a4, .LBB27_4
; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: sltu t1, a6, a2
+; RV32ZBB-NEXT: sltu t1, a4, a6
; RV32ZBB-NEXT: j .LBB27_5
; RV32ZBB-NEXT: .LBB27_4:
-; RV32ZBB-NEXT: sltu t1, a4, a7
+; RV32ZBB-NEXT: sltu t1, a2, a7
; RV32ZBB-NEXT: .LBB27_5:
-; RV32ZBB-NEXT: sub t0, a1, t0
-; RV32ZBB-NEXT: sub t0, t0, t2
-; RV32ZBB-NEXT: sub a5, a3, a5
-; RV32ZBB-NEXT: beq t0, a1, .LBB27_7
+; RV32ZBB-NEXT: sub a5, a1, a5
+; RV32ZBB-NEXT: sub a5, a5, t2
+; RV32ZBB-NEXT: sub a3, t0, a3
+; RV32ZBB-NEXT: beq a5, a1, .LBB27_7
; RV32ZBB-NEXT: # %bb.6:
-; RV32ZBB-NEXT: sltu a1, a1, t0
+; RV32ZBB-NEXT: sltu a1, a1, a5
; RV32ZBB-NEXT: j .LBB27_8
; RV32ZBB-NEXT: .LBB27_7:
-; RV32ZBB-NEXT: sltu a1, a3, a5
+; RV32ZBB-NEXT: sltu a1, t0, a3
; RV32ZBB-NEXT: .LBB27_8:
-; RV32ZBB-NEXT: xor a3, a2, a6
-; RV32ZBB-NEXT: xor a4, a7, a4
-; RV32ZBB-NEXT: or a3, a4, a3
-; RV32ZBB-NEXT: beqz a3, .LBB27_10
+; RV32ZBB-NEXT: xor a4, a6, a4
+; RV32ZBB-NEXT: xor a2, a7, a2
+; RV32ZBB-NEXT: or a2, a2, a4
+; RV32ZBB-NEXT: beqz a2, .LBB27_10
; RV32ZBB-NEXT: # %bb.9:
; RV32ZBB-NEXT: mv a1, t1
; RV32ZBB-NEXT: .LBB27_10:
-; RV32ZBB-NEXT: neg a6, a1
-; RV32ZBB-NEXT: xor a3, a7, a6
-; RV32ZBB-NEXT: sltu a4, a3, a6
-; RV32ZBB-NEXT: xor a2, a2, a6
-; RV32ZBB-NEXT: add a2, a2, a1
-; RV32ZBB-NEXT: sub a4, a2, a4
-; RV32ZBB-NEXT: xor a2, a5, a6
-; RV32ZBB-NEXT: sltu a5, a2, a6
-; RV32ZBB-NEXT: xor a7, t0, a6
-; RV32ZBB-NEXT: mv t1, a5
-; RV32ZBB-NEXT: beqz t0, .LBB27_12
+; RV32ZBB-NEXT: neg t0, a1
+; RV32ZBB-NEXT: xor a2, a7, t0
+; RV32ZBB-NEXT: sltu a4, a2, t0
+; RV32ZBB-NEXT: xor a6, a6, t0
+; RV32ZBB-NEXT: add a6, a6, a1
+; RV32ZBB-NEXT: sub a4, a6, a4
+; RV32ZBB-NEXT: xor a3, a3, t0
+; RV32ZBB-NEXT: sltu a6, a3, t0
+; RV32ZBB-NEXT: xor a7, a5, t0
+; RV32ZBB-NEXT: mv t1, a6
+; RV32ZBB-NEXT: beqz a5, .LBB27_12
; RV32ZBB-NEXT: # %bb.11:
-; RV32ZBB-NEXT: sltu t1, a7, a6
+; RV32ZBB-NEXT: sltu t1, a7, t0
; RV32ZBB-NEXT: .LBB27_12:
-; RV32ZBB-NEXT: add a3, a3, a1
-; RV32ZBB-NEXT: sltu a6, a3, t1
-; RV32ZBB-NEXT: sub a4, a4, a6
-; RV32ZBB-NEXT: sub a3, a3, t1
+; RV32ZBB-NEXT: add a2, a2, a1
+; RV32ZBB-NEXT: sltu a5, a2, t1
+; RV32ZBB-NEXT: sub a4, a4, a5
+; RV32ZBB-NEXT: sub a2, a2, t1
; RV32ZBB-NEXT: add a7, a7, a1
-; RV32ZBB-NEXT: sub a5, a7, a5
-; RV32ZBB-NEXT: add a1, a2, a1
+; RV32ZBB-NEXT: sub a5, a7, a6
+; RV32ZBB-NEXT: add a1, a3, a1
; RV32ZBB-NEXT: sw a1, 0(a0)
; RV32ZBB-NEXT: sw a5, 4(a0)
-; RV32ZBB-NEXT: sw a3, 8(a0)
+; RV32ZBB-NEXT: sw a2, 8(a0)
; RV32ZBB-NEXT: sw a4, 12(a0)
; RV32ZBB-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll
index 274f1cef49aa95..823918f1c42e7a 100644
--- a/llvm/test/CodeGen/RISCV/add-before-shl.ll
+++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll
@@ -167,17 +167,17 @@ define i128 @add_wide_operand(i128 %a) nounwind {
; RV32I: # %bb.0:
; RV32I-NEXT: lw a2, 0(a1)
; RV32I-NEXT: lw a3, 4(a1)
-; RV32I-NEXT: lw a4, 12(a1)
-; RV32I-NEXT: lw a1, 8(a1)
+; RV32I-NEXT: lw a4, 8(a1)
+; RV32I-NEXT: lw a1, 12(a1)
; RV32I-NEXT: srli a5, a2, 29
; RV32I-NEXT: slli a6, a3, 3
; RV32I-NEXT: or a5, a6, a5
; RV32I-NEXT: srli a3, a3, 29
-; RV32I-NEXT: slli a6, a1, 3
+; RV32I-NEXT: slli a6, a4, 3
; RV32I-NEXT: or a3, a6, a3
-; RV32I-NEXT: srli a1, a1, 29
-; RV32I-NEXT: slli a4, a4, 3
-; RV32I-NEXT: or a1, a4, a1
+; RV32I-NEXT: srli a4, a4, 29
+; RV32I-NEXT: slli a1, a1, 3
+; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: slli a2, a2, 3
; RV32I-NEXT: lui a4, 128
; RV32I-NEXT: add a1, a1, a4
@@ -200,26 +200,26 @@ define i128 @add_wide_operand(i128 %a) nounwind {
;
; RV32C-LABEL: add_wide_operand:
; RV32C: # %bb.0:
-; RV32C-NEXT: lw a6, 4(a1)
-; RV32C-NEXT: c.lw a3, 12(a1)
-; RV32C-NEXT: c.lw a4, 0(a1)
+; RV32C-NEXT: c.lw a2, 12(a1)
+; RV32C-NEXT: lw a6, 0(a1)
+; RV32C-NEXT: c.lw a3, 4(a1)
; RV32C-NEXT: c.lw a1, 8(a1)
; RV32C-NEXT: c.lui a5, 16
-; RV32C-NEXT: c.add a3, a5
-; RV32C-NEXT: c.slli a3, 3
+; RV32C-NEXT: c.add a2, a5
+; RV32C-NEXT: c.slli a2, 3
; RV32C-NEXT: srli a5, a1, 29
-; RV32C-NEXT: c.or a3, a5
-; RV32C-NEXT: srli a5, a4, 29
-; RV32C-NEXT: slli a2, a6, 3
; RV32C-NEXT: c.or a2, a5
; RV32C-NEXT: srli a5, a6, 29
+; RV32C-NEXT: slli a4, a3, 3
+; RV32C-NEXT: c.or a4, a5
+; RV32C-NEXT: c.srli a3, 29
; RV32C-NEXT: c.slli a1, 3
-; RV32C-NEXT: c.or a1, a5
-; RV32C-NEXT: c.slli a4, 3
-; RV32C-NEXT: c.sw a4, 0(a0)
+; RV32C-NEXT: c.or a1, a3
+; RV32C-NEXT: c.slli a6, 3
+; RV32C-NEXT: sw a6, 0(a0)
; RV32C-NEXT: c.sw a1, 8(a0)
-; RV32C-NEXT: c.sw a2, 4(a0)
-; RV32C-NEXT: c.sw a3, 12(a0)
+; RV32C-NEXT: c.sw a4, 4(a0)
+; RV32C-NEXT: c.sw a2, 12(a0)
; RV32C-NEXT: c.jr ra
;
; RV64C-LABEL: add_wide_operand:
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll
index 8d3fc96109262e..35a1227b86b3a6 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll
@@ -192,37 +192,37 @@ define void @amomax_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lw a4, 4(a0)
-; RV32-NEXT: lw a5, 0(a0)
+; RV32-NEXT: lw a4, 0(a0)
+; RV32-NEXT: lw a5, 4(a0)
; RV32-NEXT: mv s1, a2
; RV32-NEXT: mv s2, a1
; RV32-NEXT: j .LBB11_2
; RV32-NEXT: .LBB11_1: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1
-; RV32-NEXT: sw a5, 8(sp)
-; RV32-NEXT: sw a4, 12(sp)
+; RV32-NEXT: sw a4, 8(sp)
+; RV32-NEXT: sw a5, 12(sp)
; RV32-NEXT: addi a1, sp, 8
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a4, 12(sp)
-; RV32-NEXT: lw a5, 8(sp)
+; RV32-NEXT: lw a4, 8(sp)
+; RV32-NEXT: lw a5, 12(sp)
; RV32-NEXT: bnez a0, .LBB11_6
; RV32-NEXT: .LBB11_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32-NEXT: beq a4, s1, .LBB11_4
+; RV32-NEXT: beq a5, s1, .LBB11_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1
-; RV32-NEXT: slt a0, s1, a4
-; RV32-NEXT: mv a2, a5
-; RV32-NEXT: mv a3, a4
+; RV32-NEXT: slt a0, s1, a5
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: mv a3, a5
; RV32-NEXT: bnez a0, .LBB11_1
; RV32-NEXT: j .LBB11_5
; RV32-NEXT: .LBB11_4: # in Loop: Header=BB11_2 Depth=1
-; RV32-NEXT: sltu a0, s2, a5
-; RV32-NEXT: mv a2, a5
-; RV32-NEXT: mv a3, a4
+; RV32-NEXT: sltu a0, s2, a4
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: mv a3, a5
; RV32-NEXT: bnez a0, .LBB11_1
; RV32-NEXT: .LBB11_5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1
@@ -268,37 +268,37 @@ define void @amomaxu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lw a4, 4(a0)
-; RV32-NEXT: lw a5, 0(a0)
+; RV32-NEXT: lw a4, 0(a0)
+; RV32-NEXT: lw a5, 4(a0)
; RV32-NEXT: mv s1, a2
; RV32-NEXT: mv s2, a1
; RV32-NEXT: j .LBB13_2
; RV32-NEXT: .LBB13_1: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1
-; RV32-NEXT: sw a5, 8(sp)
-; RV32-NEXT: sw a4, 12(sp)
+; RV32-NEXT: sw a4, 8(sp)
+; RV32-NEXT: sw a5, 12(sp)
; RV32-NEXT: addi a1, sp, 8
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a4, 12(sp)
-; RV32-NEXT: lw a5, 8(sp)
+; RV32-NEXT: lw a4, 8(sp)
+; RV32-NEXT: lw a5, 12(sp)
; RV32-NEXT: bnez a0, .LBB13_6
; RV32-NEXT: .LBB13_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32-NEXT: beq a4, s1, .LBB13_4
+; RV32-NEXT: beq a5, s1, .LBB13_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1
-; RV32-NEXT: sltu a0, s1, a4
-; RV32-NEXT: mv a2, a5
-; RV32-NEXT: mv a3, a4
+; RV32-NEXT: sltu a0, s1, a5
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: mv a3, a5
; RV32-NEXT: bnez a0, .LBB13_1
; RV32-NEXT: j .LBB13_5
; RV32-NEXT: .LBB13_4: # in Loop: Header=BB13_2 Depth=1
-; RV32-NEXT: sltu a0, s2, a5
-; RV32-NEXT: mv a2, a5
-; RV32-NEXT: mv a3, a4
+; RV32-NEXT: sltu a0, s2, a4
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: mv a3, a5
; RV32-NEXT: bnez a0, .LBB13_1
; RV32-NEXT: .LBB13_5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1
@@ -344,37 +344,37 @@ define void @amomin_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lw a4, 4(a0)
-; RV32-NEXT: lw a5, 0(a0)
+; RV32-NEXT: lw a4, 0(a0)
+; RV32-NEXT: lw a5, 4(a0)
; RV32-NEXT: mv s1, a2
; RV32-NEXT: mv s2, a1
; RV32-NEXT: j .LBB15_2
; RV32-NEXT: .LBB15_1: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1
-; RV32-NEXT: sw a5, 8(sp)
-; RV32-NEXT: sw a4, 12(sp)
+; RV32-NEXT: sw a4, 8(sp)
+; RV32-NEXT: sw a5, 12(sp)
; RV32-NEXT: addi a1, sp, 8
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a4, 12(sp)
-; RV32-NEXT: lw a5, 8(sp)
+; RV32-NEXT: lw a4, 8(sp)
+; RV32-NEXT: lw a5, 12(sp)
; RV32-NEXT: bnez a0, .LBB15_6
; RV32-NEXT: .LBB15_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32-NEXT: beq a4, s1, .LBB15_4
+; RV32-NEXT: beq a5, s1, .LBB15_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1
-; RV32-NEXT: slt a0, s1, a4
-; RV32-NEXT: mv a2, a5
-; RV32-NEXT: mv a3, a4
+; RV32-NEXT: slt a0, s1, a5
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: mv a3, a5
; RV32-NEXT: beqz a0, .LBB15_1
; RV32-NEXT: j .LBB15_5
; RV32-NEXT: .LBB15_4: # in Loop: Header=BB15_2 Depth=1
-; RV32-NEXT: sltu a0, s2, a5
-; RV32-NEXT: mv a2, a5
-; RV32-NEXT: mv a3, a4
+; RV32-NEXT: sltu a0, s2, a4
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: mv a3, a5
; RV32-NEXT: beqz a0, .LBB15_1
; RV32-NEXT: .LBB15_5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1
@@ -420,37 +420,37 @@ define void @amominu_d_discard(ptr %a, i64 %b) nounwind {
; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lw a4, 4(a0)
-; RV32-NEXT: lw a5, 0(a0)
+; RV32-NEXT: lw a4, 0(a0)
+; RV32-NEXT: lw a5, 4(a0)
; RV32-NEXT: mv s1, a2
; RV32-NEXT: mv s2, a1
; RV32-NEXT: j .LBB17_2
; RV32-NEXT: .LBB17_1: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1
-; RV32-NEXT: sw a5, 8(sp)
-; RV32-NEXT: sw a4, 12(sp)
+; RV32-NEXT: sw a4, 8(sp)
+; RV32-NEXT: sw a5, 12(sp)
; RV32-NEXT: addi a1, sp, 8
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a4, 12(sp)
-; RV32-NEXT: lw a5, 8(sp)
+; RV32-NEXT: lw a4, 8(sp)
+; RV32-NEXT: lw a5, 12(sp)
; RV32-NEXT: bnez a0, .LBB17_6
; RV32-NEXT: .LBB17_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32-NEXT: beq a4, s1, .LBB17_4
+; RV32-NEXT: beq a5, s1, .LBB17_4
; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1
-; RV32-NEXT: sltu a0, s1, a4
-; RV32-NEXT: mv a2, a5
-; RV32-NEXT: mv a3, a4
+; RV32-NEXT: sltu a0, s1, a5
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: mv a3, a5
; RV32-NEXT: beqz a0, .LBB17_1
; RV32-NEXT: j .LBB17_5
; RV32-NEXT: .LBB17_4: # in Loop: Header=BB17_2 Depth=1
-; RV32-NEXT: sltu a0, s2, a5
-; RV32-NEXT: mv a2, a5
-; RV32-NEXT: mv a3, a4
+; RV32-NEXT: sltu a0, s2, a4
+; RV32-NEXT: mv a2, a4
+; RV32-NEXT: mv a3, a5
; RV32-NEXT: beqz a0, .LBB17_1
; RV32-NEXT: .LBB17_5: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
index f50744fc3c1f32..469edacb391df6 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
@@ -26073,36 +26073,36 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB220_2
; RV32I-NEXT: .LBB220_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB220_7
; RV32I-NEXT: .LBB220_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB220_4
+; RV32I-NEXT: beq a4, s1, .LBB220_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a5
+; RV32I-NEXT: slt a0, s1, a4
; RV32I-NEXT: j .LBB220_5
; RV32I-NEXT: .LBB220_4: # in Loop: Header=BB220_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB220_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: bnez a0, .LBB220_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1
@@ -26110,8 +26110,8 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB220_1
; RV32I-NEXT: .LBB220_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26127,36 +26127,36 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB220_2
; RV32IA-NEXT: .LBB220_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB220_7
; RV32IA-NEXT: .LBB220_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB220_4
+; RV32IA-NEXT: beq a4, s1, .LBB220_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a5
+; RV32IA-NEXT: slt a0, s1, a4
; RV32IA-NEXT: j .LBB220_5
; RV32IA-NEXT: .LBB220_4: # in Loop: Header=BB220_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB220_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: bnez a0, .LBB220_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1
@@ -26164,8 +26164,8 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB220_1
; RV32IA-NEXT: .LBB220_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26226,36 +26226,36 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB221_2
; RV32I-NEXT: .LBB221_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB221_7
; RV32I-NEXT: .LBB221_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB221_4
+; RV32I-NEXT: beq a4, s1, .LBB221_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a5
+; RV32I-NEXT: slt a0, s1, a4
; RV32I-NEXT: j .LBB221_5
; RV32I-NEXT: .LBB221_4: # in Loop: Header=BB221_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB221_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: bnez a0, .LBB221_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1
@@ -26263,8 +26263,8 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB221_1
; RV32I-NEXT: .LBB221_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26280,36 +26280,36 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB221_2
; RV32IA-NEXT: .LBB221_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB221_7
; RV32IA-NEXT: .LBB221_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB221_4
+; RV32IA-NEXT: beq a4, s1, .LBB221_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a5
+; RV32IA-NEXT: slt a0, s1, a4
; RV32IA-NEXT: j .LBB221_5
; RV32IA-NEXT: .LBB221_4: # in Loop: Header=BB221_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB221_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: bnez a0, .LBB221_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1
@@ -26317,8 +26317,8 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB221_1
; RV32IA-NEXT: .LBB221_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26384,36 +26384,36 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB222_2
; RV32I-NEXT: .LBB222_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB222_7
; RV32I-NEXT: .LBB222_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB222_4
+; RV32I-NEXT: beq a4, s1, .LBB222_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a5
+; RV32I-NEXT: slt a0, s1, a4
; RV32I-NEXT: j .LBB222_5
; RV32I-NEXT: .LBB222_4: # in Loop: Header=BB222_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB222_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: bnez a0, .LBB222_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1
@@ -26421,8 +26421,8 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB222_1
; RV32I-NEXT: .LBB222_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26438,36 +26438,36 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB222_2
; RV32IA-NEXT: .LBB222_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 3
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB222_7
; RV32IA-NEXT: .LBB222_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB222_4
+; RV32IA-NEXT: beq a4, s1, .LBB222_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a5
+; RV32IA-NEXT: slt a0, s1, a4
; RV32IA-NEXT: j .LBB222_5
; RV32IA-NEXT: .LBB222_4: # in Loop: Header=BB222_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB222_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: bnez a0, .LBB222_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1
@@ -26475,8 +26475,8 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB222_1
; RV32IA-NEXT: .LBB222_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26542,36 +26542,36 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB223_2
; RV32I-NEXT: .LBB223_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB223_7
; RV32I-NEXT: .LBB223_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB223_4
+; RV32I-NEXT: beq a4, s1, .LBB223_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a5
+; RV32I-NEXT: slt a0, s1, a4
; RV32I-NEXT: j .LBB223_5
; RV32I-NEXT: .LBB223_4: # in Loop: Header=BB223_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB223_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: bnez a0, .LBB223_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1
@@ -26579,8 +26579,8 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB223_1
; RV32I-NEXT: .LBB223_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26596,36 +26596,36 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB223_2
; RV32IA-NEXT: .LBB223_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB223_7
; RV32IA-NEXT: .LBB223_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB223_4
+; RV32IA-NEXT: beq a4, s1, .LBB223_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a5
+; RV32IA-NEXT: slt a0, s1, a4
; RV32IA-NEXT: j .LBB223_5
; RV32IA-NEXT: .LBB223_4: # in Loop: Header=BB223_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB223_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: bnez a0, .LBB223_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1
@@ -26633,8 +26633,8 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB223_1
; RV32IA-NEXT: .LBB223_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26700,36 +26700,36 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB224_2
; RV32I-NEXT: .LBB224_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB224_7
; RV32I-NEXT: .LBB224_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB224_4
+; RV32I-NEXT: beq a4, s1, .LBB224_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a5
+; RV32I-NEXT: slt a0, s1, a4
; RV32I-NEXT: j .LBB224_5
; RV32I-NEXT: .LBB224_4: # in Loop: Header=BB224_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB224_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: bnez a0, .LBB224_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1
@@ -26737,8 +26737,8 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB224_1
; RV32I-NEXT: .LBB224_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26754,36 +26754,36 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB224_2
; RV32IA-NEXT: .LBB224_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB224_7
; RV32IA-NEXT: .LBB224_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB224_4
+; RV32IA-NEXT: beq a4, s1, .LBB224_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a5
+; RV32IA-NEXT: slt a0, s1, a4
; RV32IA-NEXT: j .LBB224_5
; RV32IA-NEXT: .LBB224_4: # in Loop: Header=BB224_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB224_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: bnez a0, .LBB224_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1
@@ -26791,8 +26791,8 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB224_1
; RV32IA-NEXT: .LBB224_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26858,36 +26858,36 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB225_2
; RV32I-NEXT: .LBB225_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB225_7
; RV32I-NEXT: .LBB225_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB225_4
+; RV32I-NEXT: beq a4, s1, .LBB225_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a5
+; RV32I-NEXT: slt a0, s1, a4
; RV32I-NEXT: j .LBB225_5
; RV32I-NEXT: .LBB225_4: # in Loop: Header=BB225_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB225_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: beqz a0, .LBB225_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1
@@ -26895,8 +26895,8 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB225_1
; RV32I-NEXT: .LBB225_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -26912,36 +26912,36 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB225_2
; RV32IA-NEXT: .LBB225_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB225_7
; RV32IA-NEXT: .LBB225_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB225_4
+; RV32IA-NEXT: beq a4, s1, .LBB225_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a5
+; RV32IA-NEXT: slt a0, s1, a4
; RV32IA-NEXT: j .LBB225_5
; RV32IA-NEXT: .LBB225_4: # in Loop: Header=BB225_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB225_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: beqz a0, .LBB225_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1
@@ -26949,8 +26949,8 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB225_1
; RV32IA-NEXT: .LBB225_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27011,36 +27011,36 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB226_2
; RV32I-NEXT: .LBB226_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB226_7
; RV32I-NEXT: .LBB226_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB226_4
+; RV32I-NEXT: beq a4, s1, .LBB226_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a5
+; RV32I-NEXT: slt a0, s1, a4
; RV32I-NEXT: j .LBB226_5
; RV32I-NEXT: .LBB226_4: # in Loop: Header=BB226_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB226_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: beqz a0, .LBB226_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1
@@ -27048,8 +27048,8 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB226_1
; RV32I-NEXT: .LBB226_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27065,36 +27065,36 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB226_2
; RV32IA-NEXT: .LBB226_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB226_7
; RV32IA-NEXT: .LBB226_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB226_4
+; RV32IA-NEXT: beq a4, s1, .LBB226_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a5
+; RV32IA-NEXT: slt a0, s1, a4
; RV32IA-NEXT: j .LBB226_5
; RV32IA-NEXT: .LBB226_4: # in Loop: Header=BB226_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB226_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: beqz a0, .LBB226_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1
@@ -27102,8 +27102,8 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB226_1
; RV32IA-NEXT: .LBB226_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27169,36 +27169,36 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB227_2
; RV32I-NEXT: .LBB227_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB227_7
; RV32I-NEXT: .LBB227_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB227_4
+; RV32I-NEXT: beq a4, s1, .LBB227_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a5
+; RV32I-NEXT: slt a0, s1, a4
; RV32I-NEXT: j .LBB227_5
; RV32I-NEXT: .LBB227_4: # in Loop: Header=BB227_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB227_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: beqz a0, .LBB227_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1
@@ -27206,8 +27206,8 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB227_1
; RV32I-NEXT: .LBB227_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27223,36 +27223,36 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB227_2
; RV32IA-NEXT: .LBB227_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 3
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB227_7
; RV32IA-NEXT: .LBB227_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB227_4
+; RV32IA-NEXT: beq a4, s1, .LBB227_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a5
+; RV32IA-NEXT: slt a0, s1, a4
; RV32IA-NEXT: j .LBB227_5
; RV32IA-NEXT: .LBB227_4: # in Loop: Header=BB227_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB227_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: beqz a0, .LBB227_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1
@@ -27260,8 +27260,8 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB227_1
; RV32IA-NEXT: .LBB227_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27327,36 +27327,36 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB228_2
; RV32I-NEXT: .LBB228_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB228_7
; RV32I-NEXT: .LBB228_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB228_4
+; RV32I-NEXT: beq a4, s1, .LBB228_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a5
+; RV32I-NEXT: slt a0, s1, a4
; RV32I-NEXT: j .LBB228_5
; RV32I-NEXT: .LBB228_4: # in Loop: Header=BB228_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB228_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: beqz a0, .LBB228_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1
@@ -27364,8 +27364,8 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB228_1
; RV32I-NEXT: .LBB228_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27381,36 +27381,36 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB228_2
; RV32IA-NEXT: .LBB228_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB228_7
; RV32IA-NEXT: .LBB228_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB228_4
+; RV32IA-NEXT: beq a4, s1, .LBB228_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a5
+; RV32IA-NEXT: slt a0, s1, a4
; RV32IA-NEXT: j .LBB228_5
; RV32IA-NEXT: .LBB228_4: # in Loop: Header=BB228_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB228_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: beqz a0, .LBB228_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1
@@ -27418,8 +27418,8 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB228_1
; RV32IA-NEXT: .LBB228_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27485,36 +27485,36 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB229_2
; RV32I-NEXT: .LBB229_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB229_7
; RV32I-NEXT: .LBB229_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB229_4
+; RV32I-NEXT: beq a4, s1, .LBB229_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a5
+; RV32I-NEXT: slt a0, s1, a4
; RV32I-NEXT: j .LBB229_5
; RV32I-NEXT: .LBB229_4: # in Loop: Header=BB229_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB229_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: beqz a0, .LBB229_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1
@@ -27522,8 +27522,8 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB229_1
; RV32I-NEXT: .LBB229_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27539,36 +27539,36 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB229_2
; RV32IA-NEXT: .LBB229_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB229_7
; RV32IA-NEXT: .LBB229_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB229_4
+; RV32IA-NEXT: beq a4, s1, .LBB229_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a5
+; RV32IA-NEXT: slt a0, s1, a4
; RV32IA-NEXT: j .LBB229_5
; RV32IA-NEXT: .LBB229_4: # in Loop: Header=BB229_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB229_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: beqz a0, .LBB229_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1
@@ -27576,8 +27576,8 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB229_1
; RV32IA-NEXT: .LBB229_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27643,36 +27643,36 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB230_2
; RV32I-NEXT: .LBB230_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB230_7
; RV32I-NEXT: .LBB230_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB230_4
+; RV32I-NEXT: beq a4, s1, .LBB230_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a5
+; RV32I-NEXT: sltu a0, s1, a4
; RV32I-NEXT: j .LBB230_5
; RV32I-NEXT: .LBB230_4: # in Loop: Header=BB230_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB230_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: bnez a0, .LBB230_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1
@@ -27680,8 +27680,8 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB230_1
; RV32I-NEXT: .LBB230_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27697,36 +27697,36 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB230_2
; RV32IA-NEXT: .LBB230_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB230_7
; RV32IA-NEXT: .LBB230_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB230_4
+; RV32IA-NEXT: beq a4, s1, .LBB230_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a5
+; RV32IA-NEXT: sltu a0, s1, a4
; RV32IA-NEXT: j .LBB230_5
; RV32IA-NEXT: .LBB230_4: # in Loop: Header=BB230_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB230_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: bnez a0, .LBB230_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1
@@ -27734,8 +27734,8 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB230_1
; RV32IA-NEXT: .LBB230_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27796,36 +27796,36 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB231_2
; RV32I-NEXT: .LBB231_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB231_7
; RV32I-NEXT: .LBB231_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB231_4
+; RV32I-NEXT: beq a4, s1, .LBB231_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a5
+; RV32I-NEXT: sltu a0, s1, a4
; RV32I-NEXT: j .LBB231_5
; RV32I-NEXT: .LBB231_4: # in Loop: Header=BB231_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB231_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: bnez a0, .LBB231_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1
@@ -27833,8 +27833,8 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB231_1
; RV32I-NEXT: .LBB231_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27850,36 +27850,36 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB231_2
; RV32IA-NEXT: .LBB231_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB231_7
; RV32IA-NEXT: .LBB231_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB231_4
+; RV32IA-NEXT: beq a4, s1, .LBB231_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a5
+; RV32IA-NEXT: sltu a0, s1, a4
; RV32IA-NEXT: j .LBB231_5
; RV32IA-NEXT: .LBB231_4: # in Loop: Header=BB231_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB231_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: bnez a0, .LBB231_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1
@@ -27887,8 +27887,8 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB231_1
; RV32IA-NEXT: .LBB231_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -27954,36 +27954,36 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB232_2
; RV32I-NEXT: .LBB232_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB232_7
; RV32I-NEXT: .LBB232_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB232_4
+; RV32I-NEXT: beq a4, s1, .LBB232_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a5
+; RV32I-NEXT: sltu a0, s1, a4
; RV32I-NEXT: j .LBB232_5
; RV32I-NEXT: .LBB232_4: # in Loop: Header=BB232_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB232_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: bnez a0, .LBB232_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1
@@ -27991,8 +27991,8 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB232_1
; RV32I-NEXT: .LBB232_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28008,36 +28008,36 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB232_2
; RV32IA-NEXT: .LBB232_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 3
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB232_7
; RV32IA-NEXT: .LBB232_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB232_4
+; RV32IA-NEXT: beq a4, s1, .LBB232_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a5
+; RV32IA-NEXT: sltu a0, s1, a4
; RV32IA-NEXT: j .LBB232_5
; RV32IA-NEXT: .LBB232_4: # in Loop: Header=BB232_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB232_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: bnez a0, .LBB232_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1
@@ -28045,8 +28045,8 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB232_1
; RV32IA-NEXT: .LBB232_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28112,36 +28112,36 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB233_2
; RV32I-NEXT: .LBB233_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB233_7
; RV32I-NEXT: .LBB233_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB233_4
+; RV32I-NEXT: beq a4, s1, .LBB233_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a5
+; RV32I-NEXT: sltu a0, s1, a4
; RV32I-NEXT: j .LBB233_5
; RV32I-NEXT: .LBB233_4: # in Loop: Header=BB233_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB233_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: bnez a0, .LBB233_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1
@@ -28149,8 +28149,8 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB233_1
; RV32I-NEXT: .LBB233_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28166,36 +28166,36 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB233_2
; RV32IA-NEXT: .LBB233_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB233_7
; RV32IA-NEXT: .LBB233_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB233_4
+; RV32IA-NEXT: beq a4, s1, .LBB233_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a5
+; RV32IA-NEXT: sltu a0, s1, a4
; RV32IA-NEXT: j .LBB233_5
; RV32IA-NEXT: .LBB233_4: # in Loop: Header=BB233_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB233_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: bnez a0, .LBB233_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1
@@ -28203,8 +28203,8 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB233_1
; RV32IA-NEXT: .LBB233_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28270,36 +28270,36 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB234_2
; RV32I-NEXT: .LBB234_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB234_7
; RV32I-NEXT: .LBB234_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB234_4
+; RV32I-NEXT: beq a4, s1, .LBB234_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a5
+; RV32I-NEXT: sltu a0, s1, a4
; RV32I-NEXT: j .LBB234_5
; RV32I-NEXT: .LBB234_4: # in Loop: Header=BB234_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB234_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: bnez a0, .LBB234_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1
@@ -28307,8 +28307,8 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB234_1
; RV32I-NEXT: .LBB234_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28324,36 +28324,36 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB234_2
; RV32IA-NEXT: .LBB234_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB234_7
; RV32IA-NEXT: .LBB234_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB234_4
+; RV32IA-NEXT: beq a4, s1, .LBB234_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a5
+; RV32IA-NEXT: sltu a0, s1, a4
; RV32IA-NEXT: j .LBB234_5
; RV32IA-NEXT: .LBB234_4: # in Loop: Header=BB234_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB234_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: bnez a0, .LBB234_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1
@@ -28361,8 +28361,8 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB234_1
; RV32IA-NEXT: .LBB234_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28428,36 +28428,36 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB235_2
; RV32I-NEXT: .LBB235_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB235_7
; RV32I-NEXT: .LBB235_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB235_4
+; RV32I-NEXT: beq a4, s1, .LBB235_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a5
+; RV32I-NEXT: sltu a0, s1, a4
; RV32I-NEXT: j .LBB235_5
; RV32I-NEXT: .LBB235_4: # in Loop: Header=BB235_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB235_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: beqz a0, .LBB235_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1
@@ -28465,8 +28465,8 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB235_1
; RV32I-NEXT: .LBB235_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28482,36 +28482,36 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB235_2
; RV32IA-NEXT: .LBB235_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB235_7
; RV32IA-NEXT: .LBB235_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB235_4
+; RV32IA-NEXT: beq a4, s1, .LBB235_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a5
+; RV32IA-NEXT: sltu a0, s1, a4
; RV32IA-NEXT: j .LBB235_5
; RV32IA-NEXT: .LBB235_4: # in Loop: Header=BB235_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB235_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: beqz a0, .LBB235_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1
@@ -28519,8 +28519,8 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB235_1
; RV32IA-NEXT: .LBB235_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28581,36 +28581,36 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB236_2
; RV32I-NEXT: .LBB236_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 2
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB236_7
; RV32I-NEXT: .LBB236_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB236_4
+; RV32I-NEXT: beq a4, s1, .LBB236_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a5
+; RV32I-NEXT: sltu a0, s1, a4
; RV32I-NEXT: j .LBB236_5
; RV32I-NEXT: .LBB236_4: # in Loop: Header=BB236_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB236_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: beqz a0, .LBB236_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1
@@ -28618,8 +28618,8 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB236_1
; RV32I-NEXT: .LBB236_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28635,36 +28635,36 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB236_2
; RV32IA-NEXT: .LBB236_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 2
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB236_7
; RV32IA-NEXT: .LBB236_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB236_4
+; RV32IA-NEXT: beq a4, s1, .LBB236_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a5
+; RV32IA-NEXT: sltu a0, s1, a4
; RV32IA-NEXT: j .LBB236_5
; RV32IA-NEXT: .LBB236_4: # in Loop: Header=BB236_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB236_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: beqz a0, .LBB236_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1
@@ -28672,8 +28672,8 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB236_1
; RV32IA-NEXT: .LBB236_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28739,36 +28739,36 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB237_2
; RV32I-NEXT: .LBB237_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 3
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB237_7
; RV32I-NEXT: .LBB237_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB237_4
+; RV32I-NEXT: beq a4, s1, .LBB237_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a5
+; RV32I-NEXT: sltu a0, s1, a4
; RV32I-NEXT: j .LBB237_5
; RV32I-NEXT: .LBB237_4: # in Loop: Header=BB237_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB237_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: beqz a0, .LBB237_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1
@@ -28776,8 +28776,8 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB237_1
; RV32I-NEXT: .LBB237_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28793,36 +28793,36 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB237_2
; RV32IA-NEXT: .LBB237_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 3
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB237_7
; RV32IA-NEXT: .LBB237_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB237_4
+; RV32IA-NEXT: beq a4, s1, .LBB237_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a5
+; RV32IA-NEXT: sltu a0, s1, a4
; RV32IA-NEXT: j .LBB237_5
; RV32IA-NEXT: .LBB237_4: # in Loop: Header=BB237_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB237_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: beqz a0, .LBB237_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1
@@ -28830,8 +28830,8 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB237_1
; RV32IA-NEXT: .LBB237_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28897,36 +28897,36 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB238_2
; RV32I-NEXT: .LBB238_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 4
; RV32I-NEXT: li a5, 2
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB238_7
; RV32I-NEXT: .LBB238_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB238_4
+; RV32I-NEXT: beq a4, s1, .LBB238_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a5
+; RV32I-NEXT: sltu a0, s1, a4
; RV32I-NEXT: j .LBB238_5
; RV32I-NEXT: .LBB238_4: # in Loop: Header=BB238_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB238_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: beqz a0, .LBB238_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1
@@ -28934,8 +28934,8 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB238_1
; RV32I-NEXT: .LBB238_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -28951,36 +28951,36 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB238_2
; RV32IA-NEXT: .LBB238_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 4
; RV32IA-NEXT: li a5, 2
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB238_7
; RV32IA-NEXT: .LBB238_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB238_4
+; RV32IA-NEXT: beq a4, s1, .LBB238_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a5
+; RV32IA-NEXT: sltu a0, s1, a4
; RV32IA-NEXT: j .LBB238_5
; RV32IA-NEXT: .LBB238_4: # in Loop: Header=BB238_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB238_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: beqz a0, .LBB238_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1
@@ -28988,8 +28988,8 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB238_1
; RV32IA-NEXT: .LBB238_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -29055,36 +29055,36 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB239_2
; RV32I-NEXT: .LBB239_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB239_7
; RV32I-NEXT: .LBB239_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB239_4
+; RV32I-NEXT: beq a4, s1, .LBB239_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a5
+; RV32I-NEXT: sltu a0, s1, a4
; RV32I-NEXT: j .LBB239_5
; RV32I-NEXT: .LBB239_4: # in Loop: Header=BB239_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB239_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: beqz a0, .LBB239_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1
@@ -29092,8 +29092,8 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB239_1
; RV32I-NEXT: .LBB239_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -29109,36 +29109,36 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB239_2
; RV32IA-NEXT: .LBB239_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB239_7
; RV32IA-NEXT: .LBB239_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB239_4
+; RV32IA-NEXT: beq a4, s1, .LBB239_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a5
+; RV32IA-NEXT: sltu a0, s1, a4
; RV32IA-NEXT: j .LBB239_5
; RV32IA-NEXT: .LBB239_4: # in Loop: Header=BB239_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB239_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: beqz a0, .LBB239_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1
@@ -29146,8 +29146,8 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB239_1
; RV32IA-NEXT: .LBB239_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll
index ed0a160d3f58ad..06594e35be8703 100644
--- a/llvm/test/CodeGen/RISCV/atomic-signext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll
@@ -3183,36 +3183,36 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB43_2
; RV32I-NEXT: .LBB43_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB43_7
; RV32I-NEXT: .LBB43_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB43_4
+; RV32I-NEXT: beq a4, s1, .LBB43_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a5
+; RV32I-NEXT: slt a0, s1, a4
; RV32I-NEXT: j .LBB43_5
; RV32I-NEXT: .LBB43_4: # in Loop: Header=BB43_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB43_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: bnez a0, .LBB43_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1
@@ -3220,8 +3220,8 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB43_1
; RV32I-NEXT: .LBB43_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -3237,36 +3237,36 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB43_2
; RV32IA-NEXT: .LBB43_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB43_7
; RV32IA-NEXT: .LBB43_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB43_4
+; RV32IA-NEXT: beq a4, s1, .LBB43_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a5
+; RV32IA-NEXT: slt a0, s1, a4
; RV32IA-NEXT: j .LBB43_5
; RV32IA-NEXT: .LBB43_4: # in Loop: Header=BB43_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB43_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: bnez a0, .LBB43_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1
@@ -3274,8 +3274,8 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB43_1
; RV32IA-NEXT: .LBB43_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -3336,36 +3336,36 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB44_2
; RV32I-NEXT: .LBB44_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB44_7
; RV32I-NEXT: .LBB44_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB44_4
+; RV32I-NEXT: beq a4, s1, .LBB44_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1
-; RV32I-NEXT: slt a0, s1, a5
+; RV32I-NEXT: slt a0, s1, a4
; RV32I-NEXT: j .LBB44_5
; RV32I-NEXT: .LBB44_4: # in Loop: Header=BB44_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB44_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: beqz a0, .LBB44_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1
@@ -3373,8 +3373,8 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB44_1
; RV32I-NEXT: .LBB44_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -3390,36 +3390,36 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB44_2
; RV32IA-NEXT: .LBB44_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB44_7
; RV32IA-NEXT: .LBB44_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB44_4
+; RV32IA-NEXT: beq a4, s1, .LBB44_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1
-; RV32IA-NEXT: slt a0, s1, a5
+; RV32IA-NEXT: slt a0, s1, a4
; RV32IA-NEXT: j .LBB44_5
; RV32IA-NEXT: .LBB44_4: # in Loop: Header=BB44_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB44_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: beqz a0, .LBB44_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1
@@ -3427,8 +3427,8 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB44_1
; RV32IA-NEXT: .LBB44_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -3489,36 +3489,36 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB45_2
; RV32I-NEXT: .LBB45_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB45_7
; RV32I-NEXT: .LBB45_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB45_4
+; RV32I-NEXT: beq a4, s1, .LBB45_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a5
+; RV32I-NEXT: sltu a0, s1, a4
; RV32I-NEXT: j .LBB45_5
; RV32I-NEXT: .LBB45_4: # in Loop: Header=BB45_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB45_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: bnez a0, .LBB45_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1
@@ -3526,8 +3526,8 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB45_1
; RV32I-NEXT: .LBB45_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -3543,36 +3543,36 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB45_2
; RV32IA-NEXT: .LBB45_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB45_7
; RV32IA-NEXT: .LBB45_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB45_4
+; RV32IA-NEXT: beq a4, s1, .LBB45_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a5
+; RV32IA-NEXT: sltu a0, s1, a4
; RV32IA-NEXT: j .LBB45_5
; RV32IA-NEXT: .LBB45_4: # in Loop: Header=BB45_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB45_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: bnez a0, .LBB45_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1
@@ -3580,8 +3580,8 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB45_1
; RV32IA-NEXT: .LBB45_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -3642,36 +3642,36 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB46_2
; RV32I-NEXT: .LBB46_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: li a4, 0
; RV32I-NEXT: li a5, 0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB46_7
; RV32I-NEXT: .LBB46_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB46_4
+; RV32I-NEXT: beq a4, s1, .LBB46_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a5
+; RV32I-NEXT: sltu a0, s1, a4
; RV32I-NEXT: j .LBB46_5
; RV32I-NEXT: .LBB46_4: # in Loop: Header=BB46_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB46_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV32I-NEXT: mv a2, a4
-; RV32I-NEXT: mv a3, a5
+; RV32I-NEXT: mv a2, a5
+; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: beqz a0, .LBB46_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1
@@ -3679,8 +3679,8 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32I-NEXT: mv a3, s1
; RV32I-NEXT: j .LBB46_1
; RV32I-NEXT: .LBB46_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -3696,36 +3696,36 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB46_2
; RV32IA-NEXT: .LBB46_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: li a4, 0
; RV32IA-NEXT: li a5, 0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB46_7
; RV32IA-NEXT: .LBB46_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB46_4
+; RV32IA-NEXT: beq a4, s1, .LBB46_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a5
+; RV32IA-NEXT: sltu a0, s1, a4
; RV32IA-NEXT: j .LBB46_5
; RV32IA-NEXT: .LBB46_4: # in Loop: Header=BB46_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB46_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1
-; RV32IA-NEXT: mv a2, a4
-; RV32IA-NEXT: mv a3, a5
+; RV32IA-NEXT: mv a2, a5
+; RV32IA-NEXT: mv a3, a4
; RV32IA-NEXT: beqz a0, .LBB46_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1
@@ -3733,8 +3733,8 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind {
; RV32IA-NEXT: mv a3, s1
; RV32IA-NEXT: j .LBB46_1
; RV32IA-NEXT: .LBB46_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll
index b5e892c0ff6aca..0d6ae3a51e2469 100644
--- a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll
+++ b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll
@@ -488,43 +488,43 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
; RV32I-NEXT: .cfi_offset s1, -12
; RV32I-NEXT: .cfi_offset s2, -16
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB3_3
; RV32I-NEXT: .LBB3_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1
-; RV32I-NEXT: sltu a0, a5, s1
+; RV32I-NEXT: sltu a0, a4, s1
; RV32I-NEXT: .LBB3_2: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1
; RV32I-NEXT: xori a0, a0, 1
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: and a1, a0, s2
-; RV32I-NEXT: sltu a2, a4, a1
+; RV32I-NEXT: sltu a2, a5, a1
; RV32I-NEXT: and a0, a0, s1
-; RV32I-NEXT: sub a3, a5, a0
+; RV32I-NEXT: sub a3, a4, a0
; RV32I-NEXT: sub a3, a3, a2
-; RV32I-NEXT: sub a2, a4, a1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sub a2, a5, a1
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB3_5
; RV32I-NEXT: .LBB3_3: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: bne a5, s1, .LBB3_1
+; RV32I-NEXT: bne a4, s1, .LBB3_1
; RV32I-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1
-; RV32I-NEXT: sltu a0, a4, s2
+; RV32I-NEXT: sltu a0, a5, s2
; RV32I-NEXT: j .LBB3_2
; RV32I-NEXT: .LBB3_5: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -545,43 +545,43 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
; RV32IA-NEXT: .cfi_offset s1, -12
; RV32IA-NEXT: .cfi_offset s2, -16
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB3_3
; RV32IA-NEXT: .LBB3_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1
-; RV32IA-NEXT: sltu a0, a5, s1
+; RV32IA-NEXT: sltu a0, a4, s1
; RV32IA-NEXT: .LBB3_2: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1
; RV32IA-NEXT: xori a0, a0, 1
; RV32IA-NEXT: neg a0, a0
; RV32IA-NEXT: and a1, a0, s2
-; RV32IA-NEXT: sltu a2, a4, a1
+; RV32IA-NEXT: sltu a2, a5, a1
; RV32IA-NEXT: and a0, a0, s1
-; RV32IA-NEXT: sub a3, a5, a0
+; RV32IA-NEXT: sub a3, a4, a0
; RV32IA-NEXT: sub a3, a3, a2
-; RV32IA-NEXT: sub a2, a4, a1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sub a2, a5, a1
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB3_5
; RV32IA-NEXT: .LBB3_3: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: bne a5, s1, .LBB3_1
+; RV32IA-NEXT: bne a4, s1, .LBB3_1
; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1
-; RV32IA-NEXT: sltu a0, a4, s2
+; RV32IA-NEXT: sltu a0, a5, s2
; RV32IA-NEXT: j .LBB3_2
; RV32IA-NEXT: .LBB3_5: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -1102,42 +1102,42 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
; RV32I-NEXT: .cfi_offset s1, -12
; RV32I-NEXT: .cfi_offset s2, -16
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB7_3
; RV32I-NEXT: .LBB7_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB7_3 Depth=1
-; RV32I-NEXT: sltu a2, a5, a0
+; RV32I-NEXT: sltu a2, a4, a0
; RV32I-NEXT: .LBB7_2: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB7_3 Depth=1
; RV32I-NEXT: addi a3, a2, -1
; RV32I-NEXT: and a2, a3, a1
; RV32I-NEXT: and a3, a3, a0
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB7_5
; RV32I-NEXT: .LBB7_3: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: sltu a0, a4, s2
-; RV32I-NEXT: sub a1, a5, s1
+; RV32I-NEXT: sltu a0, a5, s2
+; RV32I-NEXT: sub a1, a4, s1
; RV32I-NEXT: sub a0, a1, a0
-; RV32I-NEXT: sub a1, a4, s2
-; RV32I-NEXT: bne a0, a5, .LBB7_1
+; RV32I-NEXT: sub a1, a5, s2
+; RV32I-NEXT: bne a0, a4, .LBB7_1
; RV32I-NEXT: # %bb.4: # in Loop: Header=BB7_3 Depth=1
-; RV32I-NEXT: sltu a2, a4, a1
+; RV32I-NEXT: sltu a2, a5, a1
; RV32I-NEXT: j .LBB7_2
; RV32I-NEXT: .LBB7_5: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -1158,42 +1158,42 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
; RV32IA-NEXT: .cfi_offset s1, -12
; RV32IA-NEXT: .cfi_offset s2, -16
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB7_3
; RV32IA-NEXT: .LBB7_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB7_3 Depth=1
-; RV32IA-NEXT: sltu a2, a5, a0
+; RV32IA-NEXT: sltu a2, a4, a0
; RV32IA-NEXT: .LBB7_2: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB7_3 Depth=1
; RV32IA-NEXT: addi a3, a2, -1
; RV32IA-NEXT: and a2, a3, a1
; RV32IA-NEXT: and a3, a3, a0
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB7_5
; RV32IA-NEXT: .LBB7_3: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: sltu a0, a4, s2
-; RV32IA-NEXT: sub a1, a5, s1
+; RV32IA-NEXT: sltu a0, a5, s2
+; RV32IA-NEXT: sub a1, a4, s1
; RV32IA-NEXT: sub a0, a1, a0
-; RV32IA-NEXT: sub a1, a4, s2
-; RV32IA-NEXT: bne a0, a5, .LBB7_1
+; RV32IA-NEXT: sub a1, a5, s2
+; RV32IA-NEXT: bne a0, a4, .LBB7_1
; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB7_3 Depth=1
-; RV32IA-NEXT: sltu a2, a4, a1
+; RV32IA-NEXT: sltu a2, a5, a1
; RV32IA-NEXT: j .LBB7_2
; RV32IA-NEXT: .LBB7_5: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
index 634ed45044ee21..927e778c9dd9c2 100644
--- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
@@ -468,41 +468,41 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; RV32I-NEXT: .cfi_offset s1, -12
; RV32I-NEXT: .cfi_offset s2, -16
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB3_3
; RV32I-NEXT: .LBB3_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1
-; RV32I-NEXT: sltu a0, a5, s1
+; RV32I-NEXT: sltu a0, a4, s1
; RV32I-NEXT: .LBB3_2: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1
-; RV32I-NEXT: addi a1, a4, 1
+; RV32I-NEXT: addi a1, a5, 1
; RV32I-NEXT: seqz a2, a1
-; RV32I-NEXT: add a3, a5, a2
+; RV32I-NEXT: add a3, a4, a2
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: and a2, a0, a1
; RV32I-NEXT: and a3, a0, a3
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB3_5
; RV32I-NEXT: .LBB3_3: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: bne a5, s1, .LBB3_1
+; RV32I-NEXT: bne a4, s1, .LBB3_1
; RV32I-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1
-; RV32I-NEXT: sltu a0, a4, s2
+; RV32I-NEXT: sltu a0, a5, s2
; RV32I-NEXT: j .LBB3_2
; RV32I-NEXT: .LBB3_5: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -523,41 +523,41 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; RV32IA-NEXT: .cfi_offset s1, -12
; RV32IA-NEXT: .cfi_offset s2, -16
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB3_3
; RV32IA-NEXT: .LBB3_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1
-; RV32IA-NEXT: sltu a0, a5, s1
+; RV32IA-NEXT: sltu a0, a4, s1
; RV32IA-NEXT: .LBB3_2: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1
-; RV32IA-NEXT: addi a1, a4, 1
+; RV32IA-NEXT: addi a1, a5, 1
; RV32IA-NEXT: seqz a2, a1
-; RV32IA-NEXT: add a3, a5, a2
+; RV32IA-NEXT: add a3, a4, a2
; RV32IA-NEXT: neg a0, a0
; RV32IA-NEXT: and a2, a0, a1
; RV32IA-NEXT: and a3, a0, a3
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB3_5
; RV32IA-NEXT: .LBB3_3: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: bne a5, s1, .LBB3_1
+; RV32IA-NEXT: bne a4, s1, .LBB3_1
; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1
-; RV32IA-NEXT: sltu a0, a4, s2
+; RV32IA-NEXT: sltu a0, a5, s2
; RV32IA-NEXT: j .LBB3_2
; RV32IA-NEXT: .LBB3_5: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -1211,35 +1211,35 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; RV32I-NEXT: .cfi_offset s1, -12
; RV32I-NEXT: .cfi_offset s2, -16
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lw a5, 4(a0)
-; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: mv s1, a2
; RV32I-NEXT: mv s2, a1
; RV32I-NEXT: j .LBB7_2
; RV32I-NEXT: .LBB7_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a4, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_8
-; RV32I-NEXT: lw a5, 12(sp)
-; RV32I-NEXT: lw a4, 8(sp)
+; RV32I-NEXT: lw a5, 8(sp)
+; RV32I-NEXT: lw a4, 12(sp)
; RV32I-NEXT: bnez a0, .LBB7_7
; RV32I-NEXT: .LBB7_2: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT: beq a5, s1, .LBB7_4
+; RV32I-NEXT: beq a4, s1, .LBB7_4
; RV32I-NEXT: # %bb.3: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1
-; RV32I-NEXT: sltu a0, s1, a5
+; RV32I-NEXT: sltu a0, s1, a4
; RV32I-NEXT: j .LBB7_5
; RV32I-NEXT: .LBB7_4: # in Loop: Header=BB7_2 Depth=1
-; RV32I-NEXT: sltu a0, s2, a4
+; RV32I-NEXT: sltu a0, s2, a5
; RV32I-NEXT: .LBB7_5: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1
-; RV32I-NEXT: or a1, a4, a5
+; RV32I-NEXT: or a1, a5, a4
; RV32I-NEXT: seqz a1, a1
; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: mv a2, s2
@@ -1247,13 +1247,13 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; RV32I-NEXT: bnez a0, .LBB7_1
; RV32I-NEXT: # %bb.6: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1
-; RV32I-NEXT: seqz a0, a4
-; RV32I-NEXT: sub a3, a5, a0
-; RV32I-NEXT: addi a2, a4, -1
+; RV32I-NEXT: seqz a0, a5
+; RV32I-NEXT: sub a3, a4, a0
+; RV32I-NEXT: addi a2, a5, -1
; RV32I-NEXT: j .LBB7_1
; RV32I-NEXT: .LBB7_7: # %atomicrmw.end
-; RV32I-NEXT: mv a0, a4
-; RV32I-NEXT: mv a1, a5
+; RV32I-NEXT: mv a0, a5
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -1274,35 +1274,35 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; RV32IA-NEXT: .cfi_offset s1, -12
; RV32IA-NEXT: .cfi_offset s2, -16
; RV32IA-NEXT: mv s0, a0
-; RV32IA-NEXT: lw a5, 4(a0)
-; RV32IA-NEXT: lw a4, 0(a0)
+; RV32IA-NEXT: lw a5, 0(a0)
+; RV32IA-NEXT: lw a4, 4(a0)
; RV32IA-NEXT: mv s1, a2
; RV32IA-NEXT: mv s2, a1
; RV32IA-NEXT: j .LBB7_2
; RV32IA-NEXT: .LBB7_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1
-; RV32IA-NEXT: sw a4, 8(sp)
-; RV32IA-NEXT: sw a5, 12(sp)
+; RV32IA-NEXT: sw a5, 8(sp)
+; RV32IA-NEXT: sw a4, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a0, s0
; RV32IA-NEXT: call __atomic_compare_exchange_8
-; RV32IA-NEXT: lw a5, 12(sp)
-; RV32IA-NEXT: lw a4, 8(sp)
+; RV32IA-NEXT: lw a5, 8(sp)
+; RV32IA-NEXT: lw a4, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB7_7
; RV32IA-NEXT: .LBB7_2: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: beq a5, s1, .LBB7_4
+; RV32IA-NEXT: beq a4, s1, .LBB7_4
; RV32IA-NEXT: # %bb.3: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1
-; RV32IA-NEXT: sltu a0, s1, a5
+; RV32IA-NEXT: sltu a0, s1, a4
; RV32IA-NEXT: j .LBB7_5
; RV32IA-NEXT: .LBB7_4: # in Loop: Header=BB7_2 Depth=1
-; RV32IA-NEXT: sltu a0, s2, a4
+; RV32IA-NEXT: sltu a0, s2, a5
; RV32IA-NEXT: .LBB7_5: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1
-; RV32IA-NEXT: or a1, a4, a5
+; RV32IA-NEXT: or a1, a5, a4
; RV32IA-NEXT: seqz a1, a1
; RV32IA-NEXT: or a0, a1, a0
; RV32IA-NEXT: mv a2, s2
@@ -1310,13 +1310,13 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; RV32IA-NEXT: bnez a0, .LBB7_1
; RV32IA-NEXT: # %bb.6: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1
-; RV32IA-NEXT: seqz a0, a4
-; RV32IA-NEXT: sub a3, a5, a0
-; RV32IA-NEXT: addi a2, a4, -1
+; RV32IA-NEXT: seqz a0, a5
+; RV32IA-NEXT: sub a3, a4, a0
+; RV32IA-NEXT: addi a2, a5, -1
; RV32IA-NEXT: j .LBB7_1
; RV32IA-NEXT: .LBB7_7: # %atomicrmw.end
-; RV32IA-NEXT: mv a0, a4
-; RV32IA-NEXT: mv a1, a5
+; RV32IA-NEXT: mv a0, a5
+; RV32IA-NEXT: mv a1, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
index 2122b3fd91788b..337e9bc5845f94 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
@@ -53,22 +53,22 @@ define void @callee() nounwind {
; ILP32-NEXT: flw fs1, 84(a1)
; ILP32-NEXT: flw fs2, 88(a1)
; ILP32-NEXT: flw fs3, 92(a1)
-; ILP32-NEXT: flw fs4, 96(a1)
-; ILP32-NEXT: flw fs5, 100(a1)
-; ILP32-NEXT: flw fs6, 104(a1)
-; ILP32-NEXT: flw fs7, 108(a1)
-; ILP32-NEXT: flw fs8, 124(a1)
-; ILP32-NEXT: flw fs9, 120(a1)
-; ILP32-NEXT: flw fs10, 116(a1)
-; ILP32-NEXT: flw fs11, 112(a1)
-; ILP32-NEXT: fsw fs8, 124(a1)
-; ILP32-NEXT: fsw fs9, 120(a1)
-; ILP32-NEXT: fsw fs10, 116(a1)
-; ILP32-NEXT: fsw fs11, 112(a1)
-; ILP32-NEXT: fsw fs7, 108(a1)
-; ILP32-NEXT: fsw fs6, 104(a1)
-; ILP32-NEXT: fsw fs5, 100(a1)
-; ILP32-NEXT: fsw fs4, 96(a1)
+; ILP32-NEXT: flw fs4, 112(a1)
+; ILP32-NEXT: flw fs5, 116(a1)
+; ILP32-NEXT: flw fs6, 120(a1)
+; ILP32-NEXT: flw fs7, 124(a1)
+; ILP32-NEXT: flw fs8, 96(a1)
+; ILP32-NEXT: flw fs9, 100(a1)
+; ILP32-NEXT: flw fs10, 104(a1)
+; ILP32-NEXT: flw fs11, 108(a1)
+; ILP32-NEXT: fsw fs7, 124(a1)
+; ILP32-NEXT: fsw fs6, 120(a1)
+; ILP32-NEXT: fsw fs5, 116(a1)
+; ILP32-NEXT: fsw fs4, 112(a1)
+; ILP32-NEXT: fsw fs11, 108(a1)
+; ILP32-NEXT: fsw fs10, 104(a1)
+; ILP32-NEXT: fsw fs9, 100(a1)
+; ILP32-NEXT: fsw fs8, 96(a1)
; ILP32-NEXT: fsw fs3, 92(a1)
; ILP32-NEXT: fsw fs2, 88(a1)
; ILP32-NEXT: fsw fs1, 84(a1)
@@ -123,22 +123,22 @@ define void @callee() nounwind {
; ILP32E-NEXT: flw fs1, 84(a1)
; ILP32E-NEXT: flw fs2, 88(a1)
; ILP32E-NEXT: flw fs3, 92(a1)
-; ILP32E-NEXT: flw fs4, 96(a1)
-; ILP32E-NEXT: flw fs5, 100(a1)
-; ILP32E-NEXT: flw fs6, 104(a1)
-; ILP32E-NEXT: flw fs7, 108(a1)
-; ILP32E-NEXT: flw fs8, 124(a1)
-; ILP32E-NEXT: flw fs9, 120(a1)
-; ILP32E-NEXT: flw fs10, 116(a1)
-; ILP32E-NEXT: flw fs11, 112(a1)
-; ILP32E-NEXT: fsw fs8, 124(a1)
-; ILP32E-NEXT: fsw fs9, 120(a1)
-; ILP32E-NEXT: fsw fs10, 116(a1)
-; ILP32E-NEXT: fsw fs11, 112(a1)
-; ILP32E-NEXT: fsw fs7, 108(a1)
-; ILP32E-NEXT: fsw fs6, 104(a1)
-; ILP32E-NEXT: fsw fs5, 100(a1)
-; ILP32E-NEXT: fsw fs4, 96(a1)
+; ILP32E-NEXT: flw fs4, 112(a1)
+; ILP32E-NEXT: flw fs5, 116(a1)
+; ILP32E-NEXT: flw fs6, 120(a1)
+; ILP32E-NEXT: flw fs7, 124(a1)
+; ILP32E-NEXT: flw fs8, 96(a1)
+; ILP32E-NEXT: flw fs9, 100(a1)
+; ILP32E-NEXT: flw fs10, 104(a1)
+; ILP32E-NEXT: flw fs11, 108(a1)
+; ILP32E-NEXT: fsw fs7, 124(a1)
+; ILP32E-NEXT: fsw fs6, 120(a1)
+; ILP32E-NEXT: fsw fs5, 116(a1)
+; ILP32E-NEXT: fsw fs4, 112(a1)
+; ILP32E-NEXT: fsw fs11, 108(a1)
+; ILP32E-NEXT: fsw fs10, 104(a1)
+; ILP32E-NEXT: fsw fs9, 100(a1)
+; ILP32E-NEXT: fsw fs8, 96(a1)
; ILP32E-NEXT: fsw fs3, 92(a1)
; ILP32E-NEXT: fsw fs2, 88(a1)
; ILP32E-NEXT: fsw fs1, 84(a1)
@@ -193,22 +193,22 @@ define void @callee() nounwind {
; LP64-NEXT: flw fs1, 84(a1)
; LP64-NEXT: flw fs2, 88(a1)
; LP64-NEXT: flw fs3, 92(a1)
-; LP64-NEXT: flw fs4, 96(a1)
-; LP64-NEXT: flw fs5, 100(a1)
-; LP64-NEXT: flw fs6, 104(a1)
-; LP64-NEXT: flw fs7, 108(a1)
-; LP64-NEXT: flw fs8, 124(a1)
-; LP64-NEXT: flw fs9, 120(a1)
-; LP64-NEXT: flw fs10, 116(a1)
-; LP64-NEXT: flw fs11, 112(a1)
-; LP64-NEXT: fsw fs8, 124(a1)
-; LP64-NEXT: fsw fs9, 120(a1)
-; LP64-NEXT: fsw fs10, 116(a1)
-; LP64-NEXT: fsw fs11, 112(a1)
-; LP64-NEXT: fsw fs7, 108(a1)
-; LP64-NEXT: fsw fs6, 104(a1)
-; LP64-NEXT: fsw fs5, 100(a1)
-; LP64-NEXT: fsw fs4, 96(a1)
+; LP64-NEXT: flw fs4, 112(a1)
+; LP64-NEXT: flw fs5, 116(a1)
+; LP64-NEXT: flw fs6, 120(a1)
+; LP64-NEXT: flw fs7, 124(a1)
+; LP64-NEXT: flw fs8, 96(a1)
+; LP64-NEXT: flw fs9, 100(a1)
+; LP64-NEXT: flw fs10, 104(a1)
+; LP64-NEXT: flw fs11, 108(a1)
+; LP64-NEXT: fsw fs7, 124(a1)
+; LP64-NEXT: fsw fs6, 120(a1)
+; LP64-NEXT: fsw fs5, 116(a1)
+; LP64-NEXT: fsw fs4, 112(a1)
+; LP64-NEXT: fsw fs11, 108(a1)
+; LP64-NEXT: fsw fs10, 104(a1)
+; LP64-NEXT: fsw fs9, 100(a1)
+; LP64-NEXT: fsw fs8, 96(a1)
; LP64-NEXT: fsw fs3, 92(a1)
; LP64-NEXT: fsw fs2, 88(a1)
; LP64-NEXT: fsw fs1, 84(a1)
@@ -263,22 +263,22 @@ define void @callee() nounwind {
; LP64E-NEXT: flw fs1, 84(a1)
; LP64E-NEXT: flw fs2, 88(a1)
; LP64E-NEXT: flw fs3, 92(a1)
-; LP64E-NEXT: flw fs4, 96(a1)
-; LP64E-NEXT: flw fs5, 100(a1)
-; LP64E-NEXT: flw fs6, 104(a1)
-; LP64E-NEXT: flw fs7, 108(a1)
-; LP64E-NEXT: flw fs8, 124(a1)
-; LP64E-NEXT: flw fs9, 120(a1)
-; LP64E-NEXT: flw fs10, 116(a1)
-; LP64E-NEXT: flw fs11, 112(a1)
-; LP64E-NEXT: fsw fs8, 124(a1)
-; LP64E-NEXT: fsw fs9, 120(a1)
-; LP64E-NEXT: fsw fs10, 116(a1)
-; LP64E-NEXT: fsw fs11, 112(a1)
-; LP64E-NEXT: fsw fs7, 108(a1)
-; LP64E-NEXT: fsw fs6, 104(a1)
-; LP64E-NEXT: fsw fs5, 100(a1)
-; LP64E-NEXT: fsw fs4, 96(a1)
+; LP64E-NEXT: flw fs4, 112(a1)
+; LP64E-NEXT: flw fs5, 116(a1)
+; LP64E-NEXT: flw fs6, 120(a1)
+; LP64E-NEXT: flw fs7, 124(a1)
+; LP64E-NEXT: flw fs8, 96(a1)
+; LP64E-NEXT: flw fs9, 100(a1)
+; LP64E-NEXT: flw fs10, 104(a1)
+; LP64E-NEXT: flw fs11, 108(a1)
+; LP64E-NEXT: fsw fs7, 124(a1)
+; LP64E-NEXT: fsw fs6, 120(a1)
+; LP64E-NEXT: fsw fs5, 116(a1)
+; LP64E-NEXT: fsw fs4, 112(a1)
+; LP64E-NEXT: fsw fs11, 108(a1)
+; LP64E-NEXT: fsw fs10, 104(a1)
+; LP64E-NEXT: fsw fs9, 100(a1)
+; LP64E-NEXT: fsw fs8, 96(a1)
; LP64E-NEXT: fsw fs3, 92(a1)
; LP64E-NEXT: fsw fs2, 88(a1)
; LP64E-NEXT: fsw fs1, 84(a1)
@@ -346,22 +346,22 @@ define void @callee() nounwind {
; ILP32F-NEXT: flw fs1, 84(a1)
; ILP32F-NEXT: flw fs2, 88(a1)
; ILP32F-NEXT: flw fs3, 92(a1)
-; ILP32F-NEXT: flw fs4, 96(a1)
-; ILP32F-NEXT: flw fs5, 100(a1)
-; ILP32F-NEXT: flw fs6, 104(a1)
-; ILP32F-NEXT: flw fs7, 108(a1)
-; ILP32F-NEXT: flw fs8, 124(a1)
-; ILP32F-NEXT: flw fs9, 120(a1)
-; ILP32F-NEXT: flw fs10, 116(a1)
-; ILP32F-NEXT: flw fs11, 112(a1)
-; ILP32F-NEXT: fsw fs8, 124(a1)
-; ILP32F-NEXT: fsw fs9, 120(a1)
-; ILP32F-NEXT: fsw fs10, 116(a1)
-; ILP32F-NEXT: fsw fs11, 112(a1)
-; ILP32F-NEXT: fsw fs7, 108(a1)
-; ILP32F-NEXT: fsw fs6, 104(a1)
-; ILP32F-NEXT: fsw fs5, 100(a1)
-; ILP32F-NEXT: fsw fs4, 96(a1)
+; ILP32F-NEXT: flw fs4, 112(a1)
+; ILP32F-NEXT: flw fs5, 116(a1)
+; ILP32F-NEXT: flw fs6, 120(a1)
+; ILP32F-NEXT: flw fs7, 124(a1)
+; ILP32F-NEXT: flw fs8, 96(a1)
+; ILP32F-NEXT: flw fs9, 100(a1)
+; ILP32F-NEXT: flw fs10, 104(a1)
+; ILP32F-NEXT: flw fs11, 108(a1)
+; ILP32F-NEXT: fsw fs7, 124(a1)
+; ILP32F-NEXT: fsw fs6, 120(a1)
+; ILP32F-NEXT: fsw fs5, 116(a1)
+; ILP32F-NEXT: fsw fs4, 112(a1)
+; ILP32F-NEXT: fsw fs11, 108(a1)
+; ILP32F-NEXT: fsw fs10, 104(a1)
+; ILP32F-NEXT: fsw fs9, 100(a1)
+; ILP32F-NEXT: fsw fs8, 96(a1)
; ILP32F-NEXT: fsw fs3, 92(a1)
; ILP32F-NEXT: fsw fs2, 88(a1)
; ILP32F-NEXT: fsw fs1, 84(a1)
@@ -442,22 +442,22 @@ define void @callee() nounwind {
; LP64F-NEXT: flw fs1, 84(a1)
; LP64F-NEXT: flw fs2, 88(a1)
; LP64F-NEXT: flw fs3, 92(a1)
-; LP64F-NEXT: flw fs4, 96(a1)
-; LP64F-NEXT: flw fs5, 100(a1)
-; LP64F-NEXT: flw fs6, 104(a1)
-; LP64F-NEXT: flw fs7, 108(a1)
-; LP64F-NEXT: flw fs8, 124(a1)
-; LP64F-NEXT: flw fs9, 120(a1)
-; LP64F-NEXT: flw fs10, 116(a1)
-; LP64F-NEXT: flw fs11, 112(a1)
-; LP64F-NEXT: fsw fs8, 124(a1)
-; LP64F-NEXT: fsw fs9, 120(a1)
-; LP64F-NEXT: fsw fs10, 116(a1)
-; LP64F-NEXT: fsw fs11, 112(a1)
-; LP64F-NEXT: fsw fs7, 108(a1)
-; LP64F-NEXT: fsw fs6, 104(a1)
-; LP64F-NEXT: fsw fs5, 100(a1)
-; LP64F-NEXT: fsw fs4, 96(a1)
+; LP64F-NEXT: flw fs4, 112(a1)
+; LP64F-NEXT: flw fs5, 116(a1)
+; LP64F-NEXT: flw fs6, 120(a1)
+; LP64F-NEXT: flw fs7, 124(a1)
+; LP64F-NEXT: flw fs8, 96(a1)
+; LP64F-NEXT: flw fs9, 100(a1)
+; LP64F-NEXT: flw fs10, 104(a1)
+; LP64F-NEXT: flw fs11, 108(a1)
+; LP64F-NEXT: fsw fs7, 124(a1)
+; LP64F-NEXT: fsw fs6, 120(a1)
+; LP64F-NEXT: fsw fs5, 116(a1)
+; LP64F-NEXT: fsw fs4, 112(a1)
+; LP64F-NEXT: fsw fs11, 108(a1)
+; LP64F-NEXT: fsw fs10, 104(a1)
+; LP64F-NEXT: fsw fs9, 100(a1)
+; LP64F-NEXT: fsw fs8, 96(a1)
; LP64F-NEXT: fsw fs3, 92(a1)
; LP64F-NEXT: fsw fs2, 88(a1)
; LP64F-NEXT: fsw fs1, 84(a1)
@@ -538,22 +538,22 @@ define void @callee() nounwind {
; ILP32D-NEXT: flw fs1, 84(a1)
; ILP32D-NEXT: flw fs2, 88(a1)
; ILP32D-NEXT: flw fs3, 92(a1)
-; ILP32D-NEXT: flw fs4, 96(a1)
-; ILP32D-NEXT: flw fs5, 100(a1)
-; ILP32D-NEXT: flw fs6, 104(a1)
-; ILP32D-NEXT: flw fs7, 108(a1)
-; ILP32D-NEXT: flw fs8, 124(a1)
-; ILP32D-NEXT: flw fs9, 120(a1)
-; ILP32D-NEXT: flw fs10, 116(a1)
-; ILP32D-NEXT: flw fs11, 112(a1)
-; ILP32D-NEXT: fsw fs8, 124(a1)
-; ILP32D-NEXT: fsw fs9, 120(a1)
-; ILP32D-NEXT: fsw fs10, 116(a1)
-; ILP32D-NEXT: fsw fs11, 112(a1)
-; ILP32D-NEXT: fsw fs7, 108(a1)
-; ILP32D-NEXT: fsw fs6, 104(a1)
-; ILP32D-NEXT: fsw fs5, 100(a1)
-; ILP32D-NEXT: fsw fs4, 96(a1)
+; ILP32D-NEXT: flw fs4, 112(a1)
+; ILP32D-NEXT: flw fs5, 116(a1)
+; ILP32D-NEXT: flw fs6, 120(a1)
+; ILP32D-NEXT: flw fs7, 124(a1)
+; ILP32D-NEXT: flw fs8, 96(a1)
+; ILP32D-NEXT: flw fs9, 100(a1)
+; ILP32D-NEXT: flw fs10, 104(a1)
+; ILP32D-NEXT: flw fs11, 108(a1)
+; ILP32D-NEXT: fsw fs7, 124(a1)
+; ILP32D-NEXT: fsw fs6, 120(a1)
+; ILP32D-NEXT: fsw fs5, 116(a1)
+; ILP32D-NEXT: fsw fs4, 112(a1)
+; ILP32D-NEXT: fsw fs11, 108(a1)
+; ILP32D-NEXT: fsw fs10, 104(a1)
+; ILP32D-NEXT: fsw fs9, 100(a1)
+; ILP32D-NEXT: fsw fs8, 96(a1)
; ILP32D-NEXT: fsw fs3, 92(a1)
; ILP32D-NEXT: fsw fs2, 88(a1)
; ILP32D-NEXT: fsw fs1, 84(a1)
@@ -634,22 +634,22 @@ define void @callee() nounwind {
; LP64D-NEXT: flw fs1, 84(a1)
; LP64D-NEXT: flw fs2, 88(a1)
; LP64D-NEXT: flw fs3, 92(a1)
-; LP64D-NEXT: flw fs4, 96(a1)
-; LP64D-NEXT: flw fs5, 100(a1)
-; LP64D-NEXT: flw fs6, 104(a1)
-; LP64D-NEXT: flw fs7, 108(a1)
-; LP64D-NEXT: flw fs8, 124(a1)
-; LP64D-NEXT: flw fs9, 120(a1)
-; LP64D-NEXT: flw fs10, 116(a1)
-; LP64D-NEXT: flw fs11, 112(a1)
-; LP64D-NEXT: fsw fs8, 124(a1)
-; LP64D-NEXT: fsw fs9, 120(a1)
-; LP64D-NEXT: fsw fs10, 116(a1)
-; LP64D-NEXT: fsw fs11, 112(a1)
-; LP64D-NEXT: fsw fs7, 108(a1)
-; LP64D-NEXT: fsw fs6, 104(a1)
-; LP64D-NEXT: fsw fs5, 100(a1)
-; LP64D-NEXT: fsw fs4, 96(a1)
+; LP64D-NEXT: flw fs4, 112(a1)
+; LP64D-NEXT: flw fs5, 116(a1)
+; LP64D-NEXT: flw fs6, 120(a1)
+; LP64D-NEXT: flw fs7, 124(a1)
+; LP64D-NEXT: flw fs8, 96(a1)
+; LP64D-NEXT: flw fs9, 100(a1)
+; LP64D-NEXT: flw fs10, 104(a1)
+; LP64D-NEXT: flw fs11, 108(a1)
+; LP64D-NEXT: fsw fs7, 124(a1)
+; LP64D-NEXT: fsw fs6, 120(a1)
+; LP64D-NEXT: fsw fs5, 116(a1)
+; LP64D-NEXT: fsw fs4, 112(a1)
+; LP64D-NEXT: fsw fs11, 108(a1)
+; LP64D-NEXT: fsw fs10, 104(a1)
+; LP64D-NEXT: fsw fs9, 100(a1)
+; LP64D-NEXT: fsw fs8, 96(a1)
; LP64D-NEXT: fsw fs3, 92(a1)
; LP64D-NEXT: fsw fs2, 88(a1)
; LP64D-NEXT: fsw fs1, 84(a1)
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
index 38e3c2d9256cdf..0501c700f57dfb 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
@@ -45,26 +45,26 @@ define void @callee() nounwind {
; ILP32-NEXT: fld ft11, 152(a1)
; ILP32-NEXT: fld fs0, 160(a1)
; ILP32-NEXT: fld fs1, 168(a1)
-; ILP32-NEXT: fld fs2, 176(a1)
-; ILP32-NEXT: fld fs3, 184(a1)
-; ILP32-NEXT: fld fs4, 192(a1)
-; ILP32-NEXT: fld fs5, 200(a1)
-; ILP32-NEXT: fld fs6, 208(a1)
-; ILP32-NEXT: fld fs7, 216(a1)
-; ILP32-NEXT: fld fs8, 248(a1)
-; ILP32-NEXT: fld fs9, 240(a1)
-; ILP32-NEXT: fld fs10, 232(a1)
-; ILP32-NEXT: fld fs11, 224(a1)
-; ILP32-NEXT: fsd fs8, 248(a1)
-; ILP32-NEXT: fsd fs9, 240(a1)
-; ILP32-NEXT: fsd fs10, 232(a1)
-; ILP32-NEXT: fsd fs11, 224(a1)
-; ILP32-NEXT: fsd fs7, 216(a1)
-; ILP32-NEXT: fsd fs6, 208(a1)
-; ILP32-NEXT: fsd fs5, 200(a1)
-; ILP32-NEXT: fsd fs4, 192(a1)
-; ILP32-NEXT: fsd fs3, 184(a1)
-; ILP32-NEXT: fsd fs2, 176(a1)
+; ILP32-NEXT: fld fs2, 208(a1)
+; ILP32-NEXT: fld fs3, 216(a1)
+; ILP32-NEXT: fld fs4, 224(a1)
+; ILP32-NEXT: fld fs5, 232(a1)
+; ILP32-NEXT: fld fs6, 240(a1)
+; ILP32-NEXT: fld fs7, 248(a1)
+; ILP32-NEXT: fld fs8, 176(a1)
+; ILP32-NEXT: fld fs9, 184(a1)
+; ILP32-NEXT: fld fs10, 192(a1)
+; ILP32-NEXT: fld fs11, 200(a1)
+; ILP32-NEXT: fsd fs7, 248(a1)
+; ILP32-NEXT: fsd fs6, 240(a1)
+; ILP32-NEXT: fsd fs5, 232(a1)
+; ILP32-NEXT: fsd fs4, 224(a1)
+; ILP32-NEXT: fsd fs3, 216(a1)
+; ILP32-NEXT: fsd fs2, 208(a1)
+; ILP32-NEXT: fsd fs11, 200(a1)
+; ILP32-NEXT: fsd fs10, 192(a1)
+; ILP32-NEXT: fsd fs9, 184(a1)
+; ILP32-NEXT: fsd fs8, 176(a1)
; ILP32-NEXT: fsd fs1, 168(a1)
; ILP32-NEXT: fsd fs0, 160(a1)
; ILP32-NEXT: fsd ft11, 152(a1)
@@ -115,26 +115,26 @@ define void @callee() nounwind {
; LP64-NEXT: fld ft11, 152(a1)
; LP64-NEXT: fld fs0, 160(a1)
; LP64-NEXT: fld fs1, 168(a1)
-; LP64-NEXT: fld fs2, 176(a1)
-; LP64-NEXT: fld fs3, 184(a1)
-; LP64-NEXT: fld fs4, 192(a1)
-; LP64-NEXT: fld fs5, 200(a1)
-; LP64-NEXT: fld fs6, 208(a1)
-; LP64-NEXT: fld fs7, 216(a1)
-; LP64-NEXT: fld fs8, 248(a1)
-; LP64-NEXT: fld fs9, 240(a1)
-; LP64-NEXT: fld fs10, 232(a1)
-; LP64-NEXT: fld fs11, 224(a1)
-; LP64-NEXT: fsd fs8, 248(a1)
-; LP64-NEXT: fsd fs9, 240(a1)
-; LP64-NEXT: fsd fs10, 232(a1)
-; LP64-NEXT: fsd fs11, 224(a1)
-; LP64-NEXT: fsd fs7, 216(a1)
-; LP64-NEXT: fsd fs6, 208(a1)
-; LP64-NEXT: fsd fs5, 200(a1)
-; LP64-NEXT: fsd fs4, 192(a1)
-; LP64-NEXT: fsd fs3, 184(a1)
-; LP64-NEXT: fsd fs2, 176(a1)
+; LP64-NEXT: fld fs2, 208(a1)
+; LP64-NEXT: fld fs3, 216(a1)
+; LP64-NEXT: fld fs4, 224(a1)
+; LP64-NEXT: fld fs5, 232(a1)
+; LP64-NEXT: fld fs6, 240(a1)
+; LP64-NEXT: fld fs7, 248(a1)
+; LP64-NEXT: fld fs8, 176(a1)
+; LP64-NEXT: fld fs9, 184(a1)
+; LP64-NEXT: fld fs10, 192(a1)
+; LP64-NEXT: fld fs11, 200(a1)
+; LP64-NEXT: fsd fs7, 248(a1)
+; LP64-NEXT: fsd fs6, 240(a1)
+; LP64-NEXT: fsd fs5, 232(a1)
+; LP64-NEXT: fsd fs4, 224(a1)
+; LP64-NEXT: fsd fs3, 216(a1)
+; LP64-NEXT: fsd fs2, 208(a1)
+; LP64-NEXT: fsd fs11, 200(a1)
+; LP64-NEXT: fsd fs10, 192(a1)
+; LP64-NEXT: fsd fs9, 184(a1)
+; LP64-NEXT: fsd fs8, 176(a1)
; LP64-NEXT: fsd fs1, 168(a1)
; LP64-NEXT: fsd fs0, 160(a1)
; LP64-NEXT: fsd ft11, 152(a1)
@@ -185,26 +185,26 @@ define void @callee() nounwind {
; LP64E-NEXT: fld ft11, 152(a1)
; LP64E-NEXT: fld fs0, 160(a1)
; LP64E-NEXT: fld fs1, 168(a1)
-; LP64E-NEXT: fld fs2, 176(a1)
-; LP64E-NEXT: fld fs3, 184(a1)
-; LP64E-NEXT: fld fs4, 192(a1)
-; LP64E-NEXT: fld fs5, 200(a1)
-; LP64E-NEXT: fld fs6, 208(a1)
-; LP64E-NEXT: fld fs7, 216(a1)
-; LP64E-NEXT: fld fs8, 248(a1)
-; LP64E-NEXT: fld fs9, 240(a1)
-; LP64E-NEXT: fld fs10, 232(a1)
-; LP64E-NEXT: fld fs11, 224(a1)
-; LP64E-NEXT: fsd fs8, 248(a1)
-; LP64E-NEXT: fsd fs9, 240(a1)
-; LP64E-NEXT: fsd fs10, 232(a1)
-; LP64E-NEXT: fsd fs11, 224(a1)
-; LP64E-NEXT: fsd fs7, 216(a1)
-; LP64E-NEXT: fsd fs6, 208(a1)
-; LP64E-NEXT: fsd fs5, 200(a1)
-; LP64E-NEXT: fsd fs4, 192(a1)
-; LP64E-NEXT: fsd fs3, 184(a1)
-; LP64E-NEXT: fsd fs2, 176(a1)
+; LP64E-NEXT: fld fs2, 208(a1)
+; LP64E-NEXT: fld fs3, 216(a1)
+; LP64E-NEXT: fld fs4, 224(a1)
+; LP64E-NEXT: fld fs5, 232(a1)
+; LP64E-NEXT: fld fs6, 240(a1)
+; LP64E-NEXT: fld fs7, 248(a1)
+; LP64E-NEXT: fld fs8, 176(a1)
+; LP64E-NEXT: fld fs9, 184(a1)
+; LP64E-NEXT: fld fs10, 192(a1)
+; LP64E-NEXT: fld fs11, 200(a1)
+; LP64E-NEXT: fsd fs7, 248(a1)
+; LP64E-NEXT: fsd fs6, 240(a1)
+; LP64E-NEXT: fsd fs5, 232(a1)
+; LP64E-NEXT: fsd fs4, 224(a1)
+; LP64E-NEXT: fsd fs3, 216(a1)
+; LP64E-NEXT: fsd fs2, 208(a1)
+; LP64E-NEXT: fsd fs11, 200(a1)
+; LP64E-NEXT: fsd fs10, 192(a1)
+; LP64E-NEXT: fsd fs9, 184(a1)
+; LP64E-NEXT: fsd fs8, 176(a1)
; LP64E-NEXT: fsd fs1, 168(a1)
; LP64E-NEXT: fsd fs0, 160(a1)
; LP64E-NEXT: fsd ft11, 152(a1)
@@ -268,26 +268,26 @@ define void @callee() nounwind {
; ILP32D-NEXT: fld ft11, 152(a1)
; ILP32D-NEXT: fld fs0, 160(a1)
; ILP32D-NEXT: fld fs1, 168(a1)
-; ILP32D-NEXT: fld fs2, 176(a1)
-; ILP32D-NEXT: fld fs3, 184(a1)
-; ILP32D-NEXT: fld fs4, 192(a1)
-; ILP32D-NEXT: fld fs5, 200(a1)
-; ILP32D-NEXT: fld fs6, 208(a1)
-; ILP32D-NEXT: fld fs7, 216(a1)
-; ILP32D-NEXT: fld fs8, 248(a1)
-; ILP32D-NEXT: fld fs9, 240(a1)
-; ILP32D-NEXT: fld fs10, 232(a1)
-; ILP32D-NEXT: fld fs11, 224(a1)
-; ILP32D-NEXT: fsd fs8, 248(a1)
-; ILP32D-NEXT: fsd fs9, 240(a1)
-; ILP32D-NEXT: fsd fs10, 232(a1)
-; ILP32D-NEXT: fsd fs11, 224(a1)
-; ILP32D-NEXT: fsd fs7, 216(a1)
-; ILP32D-NEXT: fsd fs6, 208(a1)
-; ILP32D-NEXT: fsd fs5, 200(a1)
-; ILP32D-NEXT: fsd fs4, 192(a1)
-; ILP32D-NEXT: fsd fs3, 184(a1)
-; ILP32D-NEXT: fsd fs2, 176(a1)
+; ILP32D-NEXT: fld fs2, 208(a1)
+; ILP32D-NEXT: fld fs3, 216(a1)
+; ILP32D-NEXT: fld fs4, 224(a1)
+; ILP32D-NEXT: fld fs5, 232(a1)
+; ILP32D-NEXT: fld fs6, 240(a1)
+; ILP32D-NEXT: fld fs7, 248(a1)
+; ILP32D-NEXT: fld fs8, 176(a1)
+; ILP32D-NEXT: fld fs9, 184(a1)
+; ILP32D-NEXT: fld fs10, 192(a1)
+; ILP32D-NEXT: fld fs11, 200(a1)
+; ILP32D-NEXT: fsd fs7, 248(a1)
+; ILP32D-NEXT: fsd fs6, 240(a1)
+; ILP32D-NEXT: fsd fs5, 232(a1)
+; ILP32D-NEXT: fsd fs4, 224(a1)
+; ILP32D-NEXT: fsd fs3, 216(a1)
+; ILP32D-NEXT: fsd fs2, 208(a1)
+; ILP32D-NEXT: fsd fs11, 200(a1)
+; ILP32D-NEXT: fsd fs10, 192(a1)
+; ILP32D-NEXT: fsd fs9, 184(a1)
+; ILP32D-NEXT: fsd fs8, 176(a1)
; ILP32D-NEXT: fsd fs1, 168(a1)
; ILP32D-NEXT: fsd fs0, 160(a1)
; ILP32D-NEXT: fsd ft11, 152(a1)
@@ -364,26 +364,26 @@ define void @callee() nounwind {
; LP64D-NEXT: fld ft11, 152(a1)
; LP64D-NEXT: fld fs0, 160(a1)
; LP64D-NEXT: fld fs1, 168(a1)
-; LP64D-NEXT: fld fs2, 176(a1)
-; LP64D-NEXT: fld fs3, 184(a1)
-; LP64D-NEXT: fld fs4, 192(a1)
-; LP64D-NEXT: fld fs5, 200(a1)
-; LP64D-NEXT: fld fs6, 208(a1)
-; LP64D-NEXT: fld fs7, 216(a1)
-; LP64D-NEXT: fld fs8, 248(a1)
-; LP64D-NEXT: fld fs9, 240(a1)
-; LP64D-NEXT: fld fs10, 232(a1)
-; LP64D-NEXT: fld fs11, 224(a1)
-; LP64D-NEXT: fsd fs8, 248(a1)
-; LP64D-NEXT: fsd fs9, 240(a1)
-; LP64D-NEXT: fsd fs10, 232(a1)
-; LP64D-NEXT: fsd fs11, 224(a1)
-; LP64D-NEXT: fsd fs7, 216(a1)
-; LP64D-NEXT: fsd fs6, 208(a1)
-; LP64D-NEXT: fsd fs5, 200(a1)
-; LP64D-NEXT: fsd fs4, 192(a1)
-; LP64D-NEXT: fsd fs3, 184(a1)
-; LP64D-NEXT: fsd fs2, 176(a1)
+; LP64D-NEXT: fld fs2, 208(a1)
+; LP64D-NEXT: fld fs3, 216(a1)
+; LP64D-NEXT: fld fs4, 224(a1)
+; LP64D-NEXT: fld fs5, 232(a1)
+; LP64D-NEXT: fld fs6, 240(a1)
+; LP64D-NEXT: fld fs7, 248(a1)
+; LP64D-NEXT: fld fs8, 176(a1)
+; LP64D-NEXT: fld fs9, 184(a1)
+; LP64D-NEXT: fld fs10, 192(a1)
+; LP64D-NEXT: fld fs11, 200(a1)
+; LP64D-NEXT: fsd fs7, 248(a1)
+; LP64D-NEXT: fsd fs6, 240(a1)
+; LP64D-NEXT: fsd fs5, 232(a1)
+; LP64D-NEXT: fsd fs4, 224(a1)
+; LP64D-NEXT: fsd fs3, 216(a1)
+; LP64D-NEXT: fsd fs2, 208(a1)
+; LP64D-NEXT: fsd fs11, 200(a1)
+; LP64D-NEXT: fsd fs10, 192(a1)
+; LP64D-NEXT: fsd fs9, 184(a1)
+; LP64D-NEXT: fsd fs8, 176(a1)
; LP64D-NEXT: fsd fs1, 168(a1)
; LP64D-NEXT: fsd fs0, 160(a1)
; LP64D-NEXT: fsd ft11, 152(a1)
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
index 5e8ed4509b5357..6d2263f74062df 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
@@ -54,16 +54,16 @@ define void @callee() nounwind {
; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lui a6, %hi(var)
-; RV32I-NEXT: lw a0, %lo(var)(a6)
+; RV32I-NEXT: lui a7, %hi(var)
+; RV32I-NEXT: lw a0, %lo(var)(a7)
; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw a0, %lo(var+4)(a6)
+; RV32I-NEXT: lw a0, %lo(var+4)(a7)
; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw a0, %lo(var+8)(a6)
+; RV32I-NEXT: lw a0, %lo(var+8)(a7)
; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw a0, %lo(var+12)(a6)
+; RV32I-NEXT: lw a0, %lo(var+12)(a7)
; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: addi a5, a6, %lo(var)
+; RV32I-NEXT: addi a5, a7, %lo(var)
; RV32I-NEXT: lw a0, 16(a5)
; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: lw a0, 20(a5)
@@ -86,22 +86,22 @@ define void @callee() nounwind {
; RV32I-NEXT: lw s8, 84(a5)
; RV32I-NEXT: lw s9, 88(a5)
; RV32I-NEXT: lw s10, 92(a5)
-; RV32I-NEXT: lw s11, 96(a5)
-; RV32I-NEXT: lw ra, 100(a5)
-; RV32I-NEXT: lw a7, 104(a5)
-; RV32I-NEXT: lw a4, 108(a5)
+; RV32I-NEXT: lw s11, 112(a5)
+; RV32I-NEXT: lw ra, 116(a5)
+; RV32I-NEXT: lw a3, 120(a5)
; RV32I-NEXT: lw a0, 124(a5)
-; RV32I-NEXT: lw a1, 120(a5)
-; RV32I-NEXT: lw a2, 116(a5)
-; RV32I-NEXT: lw a3, 112(a5)
+; RV32I-NEXT: lw a6, 96(a5)
+; RV32I-NEXT: lw a4, 100(a5)
+; RV32I-NEXT: lw a2, 104(a5)
+; RV32I-NEXT: lw a1, 108(a5)
; RV32I-NEXT: sw a0, 124(a5)
-; RV32I-NEXT: sw a1, 120(a5)
-; RV32I-NEXT: sw a2, 116(a5)
-; RV32I-NEXT: sw a3, 112(a5)
-; RV32I-NEXT: sw a4, 108(a5)
-; RV32I-NEXT: sw a7, 104(a5)
-; RV32I-NEXT: sw ra, 100(a5)
-; RV32I-NEXT: sw s11, 96(a5)
+; RV32I-NEXT: sw a3, 120(a5)
+; RV32I-NEXT: sw ra, 116(a5)
+; RV32I-NEXT: sw s11, 112(a5)
+; RV32I-NEXT: sw a1, 108(a5)
+; RV32I-NEXT: sw a2, 104(a5)
+; RV32I-NEXT: sw a4, 100(a5)
+; RV32I-NEXT: sw a6, 96(a5)
; RV32I-NEXT: sw s10, 92(a5)
; RV32I-NEXT: sw s9, 88(a5)
; RV32I-NEXT: sw s8, 84(a5)
@@ -125,13 +125,13 @@ define void @callee() nounwind {
; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: sw a0, 16(a5)
; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a0, %lo(var+12)(a6)
+; RV32I-NEXT: sw a0, %lo(var+12)(a7)
; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a0, %lo(var+8)(a6)
+; RV32I-NEXT: sw a0, %lo(var+8)(a7)
; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a0, %lo(var+4)(a6)
+; RV32I-NEXT: sw a0, %lo(var+4)(a7)
; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a0, %lo(var)(a6)
+; RV32I-NEXT: sw a0, %lo(var)(a7)
; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload
@@ -154,16 +154,16 @@ define void @callee() nounwind {
; RV32I-ILP32E-NEXT: sw ra, 32(sp) # 4-byte Folded Spill
; RV32I-ILP32E-NEXT: sw s0, 28(sp) # 4-byte Folded Spill
; RV32I-ILP32E-NEXT: sw s1, 24(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT: lui a6, %hi(var)
-; RV32I-ILP32E-NEXT: lw a0, %lo(var)(a6)
+; RV32I-ILP32E-NEXT: lui a7, %hi(var)
+; RV32I-ILP32E-NEXT: lw a0, %lo(var)(a7)
; RV32I-ILP32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT: lw a0, %lo(var+4)(a6)
+; RV32I-ILP32E-NEXT: lw a0, %lo(var+4)(a7)
; RV32I-ILP32E-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT: lw a0, %lo(var+8)(a6)
+; RV32I-ILP32E-NEXT: lw a0, %lo(var+8)(a7)
; RV32I-ILP32E-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT: lw a0, %lo(var+12)(a6)
+; RV32I-ILP32E-NEXT: lw a0, %lo(var+12)(a7)
; RV32I-ILP32E-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT: addi a5, a6, %lo(var)
+; RV32I-ILP32E-NEXT: addi a5, a7, %lo(var)
; RV32I-ILP32E-NEXT: lw a0, 16(a5)
; RV32I-ILP32E-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
; RV32I-ILP32E-NEXT: lw a0, 20(a5)
@@ -186,22 +186,22 @@ define void @callee() nounwind {
; RV32I-ILP32E-NEXT: lw s10, 84(a5)
; RV32I-ILP32E-NEXT: lw s11, 88(a5)
; RV32I-ILP32E-NEXT: lw s0, 92(a5)
-; RV32I-ILP32E-NEXT: lw s1, 96(a5)
-; RV32I-ILP32E-NEXT: lw ra, 100(a5)
-; RV32I-ILP32E-NEXT: lw a7, 104(a5)
-; RV32I-ILP32E-NEXT: lw a4, 108(a5)
+; RV32I-ILP32E-NEXT: lw s1, 112(a5)
+; RV32I-ILP32E-NEXT: lw ra, 116(a5)
+; RV32I-ILP32E-NEXT: lw a3, 120(a5)
; RV32I-ILP32E-NEXT: lw a0, 124(a5)
-; RV32I-ILP32E-NEXT: lw a1, 120(a5)
-; RV32I-ILP32E-NEXT: lw a2, 116(a5)
-; RV32I-ILP32E-NEXT: lw a3, 112(a5)
+; RV32I-ILP32E-NEXT: lw a6, 96(a5)
+; RV32I-ILP32E-NEXT: lw a4, 100(a5)
+; RV32I-ILP32E-NEXT: lw a2, 104(a5)
+; RV32I-ILP32E-NEXT: lw a1, 108(a5)
; RV32I-ILP32E-NEXT: sw a0, 124(a5)
-; RV32I-ILP32E-NEXT: sw a1, 120(a5)
-; RV32I-ILP32E-NEXT: sw a2, 116(a5)
-; RV32I-ILP32E-NEXT: sw a3, 112(a5)
-; RV32I-ILP32E-NEXT: sw a4, 108(a5)
-; RV32I-ILP32E-NEXT: sw a7, 104(a5)
-; RV32I-ILP32E-NEXT: sw ra, 100(a5)
-; RV32I-ILP32E-NEXT: sw s1, 96(a5)
+; RV32I-ILP32E-NEXT: sw a3, 120(a5)
+; RV32I-ILP32E-NEXT: sw ra, 116(a5)
+; RV32I-ILP32E-NEXT: sw s1, 112(a5)
+; RV32I-ILP32E-NEXT: sw a1, 108(a5)
+; RV32I-ILP32E-NEXT: sw a2, 104(a5)
+; RV32I-ILP32E-NEXT: sw a4, 100(a5)
+; RV32I-ILP32E-NEXT: sw a6, 96(a5)
; RV32I-ILP32E-NEXT: sw s0, 92(a5)
; RV32I-ILP32E-NEXT: sw s11, 88(a5)
; RV32I-ILP32E-NEXT: sw s10, 84(a5)
@@ -225,13 +225,13 @@ define void @callee() nounwind {
; RV32I-ILP32E-NEXT: lw a0, 4(sp) # 4-byte Folded Reload
; RV32I-ILP32E-NEXT: sw a0, 16(a5)
; RV32I-ILP32E-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT: sw a0, %lo(var+12)(a6)
+; RV32I-ILP32E-NEXT: sw a0, %lo(var+12)(a7)
; RV32I-ILP32E-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT: sw a0, %lo(var+8)(a6)
+; RV32I-ILP32E-NEXT: sw a0, %lo(var+8)(a7)
; RV32I-ILP32E-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT: sw a0, %lo(var+4)(a6)
+; RV32I-ILP32E-NEXT: sw a0, %lo(var+4)(a7)
; RV32I-ILP32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT: sw a0, %lo(var)(a6)
+; RV32I-ILP32E-NEXT: sw a0, %lo(var)(a7)
; RV32I-ILP32E-NEXT: lw ra, 32(sp) # 4-byte Folded Reload
; RV32I-ILP32E-NEXT: lw s0, 28(sp) # 4-byte Folded Reload
; RV32I-ILP32E-NEXT: lw s1, 24(sp) # 4-byte Folded Reload
@@ -255,16 +255,16 @@ define void @callee() nounwind {
; RV32I-WITH-FP-NEXT: sw s10, 32(sp) # 4-byte Folded Spill
; RV32I-WITH-FP-NEXT: sw s11, 28(sp) # 4-byte Folded Spill
; RV32I-WITH-FP-NEXT: addi s0, sp, 80
-; RV32I-WITH-FP-NEXT: lui a6, %hi(var)
-; RV32I-WITH-FP-NEXT: lw a0, %lo(var)(a6)
+; RV32I-WITH-FP-NEXT: lui t0, %hi(var)
+; RV32I-WITH-FP-NEXT: lw a0, %lo(var)(t0)
; RV32I-WITH-FP-NEXT: sw a0, -56(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT: lw a0, %lo(var+4)(a6)
+; RV32I-WITH-FP-NEXT: lw a0, %lo(var+4)(t0)
; RV32I-WITH-FP-NEXT: sw a0, -60(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT: lw a0, %lo(var+8)(a6)
+; RV32I-WITH-FP-NEXT: lw a0, %lo(var+8)(t0)
; RV32I-WITH-FP-NEXT: sw a0, -64(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT: lw a0, %lo(var+12)(a6)
+; RV32I-WITH-FP-NEXT: lw a0, %lo(var+12)(t0)
; RV32I-WITH-FP-NEXT: sw a0, -68(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT: addi a5, a6, %lo(var)
+; RV32I-WITH-FP-NEXT: addi a5, t0, %lo(var)
; RV32I-WITH-FP-NEXT: lw a0, 16(a5)
; RV32I-WITH-FP-NEXT: sw a0, -72(s0) # 4-byte Folded Spill
; RV32I-WITH-FP-NEXT: lw a0, 20(a5)
@@ -288,22 +288,22 @@ define void @callee() nounwind {
; RV32I-WITH-FP-NEXT: lw s9, 84(a5)
; RV32I-WITH-FP-NEXT: lw s10, 88(a5)
; RV32I-WITH-FP-NEXT: lw s11, 92(a5)
-; RV32I-WITH-FP-NEXT: lw ra, 96(a5)
-; RV32I-WITH-FP-NEXT: lw t0, 100(a5)
-; RV32I-WITH-FP-NEXT: lw a7, 104(a5)
-; RV32I-WITH-FP-NEXT: lw a4, 108(a5)
+; RV32I-WITH-FP-NEXT: lw ra, 112(a5)
+; RV32I-WITH-FP-NEXT: lw a4, 116(a5)
+; RV32I-WITH-FP-NEXT: lw a3, 120(a5)
; RV32I-WITH-FP-NEXT: lw a0, 124(a5)
-; RV32I-WITH-FP-NEXT: lw a1, 120(a5)
-; RV32I-WITH-FP-NEXT: lw a2, 116(a5)
-; RV32I-WITH-FP-NEXT: lw a3, 112(a5)
+; RV32I-WITH-FP-NEXT: lw a7, 96(a5)
+; RV32I-WITH-FP-NEXT: lw a6, 100(a5)
+; RV32I-WITH-FP-NEXT: lw a2, 104(a5)
+; RV32I-WITH-FP-NEXT: lw a1, 108(a5)
; RV32I-WITH-FP-NEXT: sw a0, 124(a5)
-; RV32I-WITH-FP-NEXT: sw a1, 120(a5)
-; RV32I-WITH-FP-NEXT: sw a2, 116(a5)
-; RV32I-WITH-FP-NEXT: sw a3, 112(a5)
-; RV32I-WITH-FP-NEXT: sw a4, 108(a5)
-; RV32I-WITH-FP-NEXT: sw a7, 104(a5)
-; RV32I-WITH-FP-NEXT: sw t0, 100(a5)
-; RV32I-WITH-FP-NEXT: sw ra, 96(a5)
+; RV32I-WITH-FP-NEXT: sw a3, 120(a5)
+; RV32I-WITH-FP-NEXT: sw a4, 116(a5)
+; RV32I-WITH-FP-NEXT: sw ra, 112(a5)
+; RV32I-WITH-FP-NEXT: sw a1, 108(a5)
+; RV32I-WITH-FP-NEXT: sw a2, 104(a5)
+; RV32I-WITH-FP-NEXT: sw a6, 100(a5)
+; RV32I-WITH-FP-NEXT: sw a7, 96(a5)
; RV32I-WITH-FP-NEXT: sw s11, 92(a5)
; RV32I-WITH-FP-NEXT: sw s10, 88(a5)
; RV32I-WITH-FP-NEXT: sw s9, 84(a5)
@@ -328,13 +328,13 @@ define void @callee() nounwind {
; RV32I-WITH-FP-NEXT: lw a0, -72(s0) # 4-byte Folded Reload
; RV32I-WITH-FP-NEXT: sw a0, 16(a5)
; RV32I-WITH-FP-NEXT: lw a0, -68(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT: sw a0, %lo(var+12)(a6)
+; RV32I-WITH-FP-NEXT: sw a0, %lo(var+12)(t0)
; RV32I-WITH-FP-NEXT: lw a0, -64(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT: sw a0, %lo(var+8)(a6)
+; RV32I-WITH-FP-NEXT: sw a0, %lo(var+8)(t0)
; RV32I-WITH-FP-NEXT: lw a0, -60(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT: sw a0, %lo(var+4)(a6)
+; RV32I-WITH-FP-NEXT: sw a0, %lo(var+4)(t0)
; RV32I-WITH-FP-NEXT: lw a0, -56(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT: sw a0, %lo(var)(a6)
+; RV32I-WITH-FP-NEXT: sw a0, %lo(var)(t0)
; RV32I-WITH-FP-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
; RV32I-WITH-FP-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
; RV32I-WITH-FP-NEXT: lw s1, 68(sp) # 4-byte Folded Reload
@@ -354,16 +354,16 @@ define void @callee() nounwind {
; RV32IZCMP-LABEL: callee:
; RV32IZCMP: # %bb.0:
; RV32IZCMP-NEXT: cm.push {ra, s0-s11}, -96
-; RV32IZCMP-NEXT: lui a6, %hi(var)
-; RV32IZCMP-NEXT: lw a0, %lo(var)(a6)
+; RV32IZCMP-NEXT: lui t0, %hi(var)
+; RV32IZCMP-NEXT: lw a0, %lo(var)(t0)
; RV32IZCMP-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT: lw a0, %lo(var+4)(a6)
+; RV32IZCMP-NEXT: lw a0, %lo(var+4)(t0)
; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT: lw a0, %lo(var+8)(a6)
+; RV32IZCMP-NEXT: lw a0, %lo(var+8)(t0)
; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT: lw a0, %lo(var+12)(a6)
+; RV32IZCMP-NEXT: lw a0, %lo(var+12)(t0)
; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT: addi a5, a6, %lo(var)
+; RV32IZCMP-NEXT: addi a5, t0, %lo(var)
; RV32IZCMP-NEXT: lw a0, 16(a5)
; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
; RV32IZCMP-NEXT: lw a0, 20(a5)
@@ -386,22 +386,22 @@ define void @callee() nounwind {
; RV32IZCMP-NEXT: lw t3, 84(a5)
; RV32IZCMP-NEXT: lw t2, 88(a5)
; RV32IZCMP-NEXT: lw t1, 92(a5)
-; RV32IZCMP-NEXT: lw t0, 96(a5)
-; RV32IZCMP-NEXT: lw s0, 100(a5)
-; RV32IZCMP-NEXT: lw a7, 104(a5)
-; RV32IZCMP-NEXT: lw a4, 108(a5)
+; RV32IZCMP-NEXT: lw a7, 112(a5)
+; RV32IZCMP-NEXT: lw s0, 116(a5)
+; RV32IZCMP-NEXT: lw a3, 120(a5)
; RV32IZCMP-NEXT: lw a0, 124(a5)
-; RV32IZCMP-NEXT: lw a1, 120(a5)
-; RV32IZCMP-NEXT: lw a2, 116(a5)
-; RV32IZCMP-NEXT: lw a3, 112(a5)
+; RV32IZCMP-NEXT: lw a6, 96(a5)
+; RV32IZCMP-NEXT: lw a4, 100(a5)
+; RV32IZCMP-NEXT: lw a2, 104(a5)
+; RV32IZCMP-NEXT: lw a1, 108(a5)
; RV32IZCMP-NEXT: sw a0, 124(a5)
-; RV32IZCMP-NEXT: sw a1, 120(a5)
-; RV32IZCMP-NEXT: sw a2, 116(a5)
-; RV32IZCMP-NEXT: sw a3, 112(a5)
-; RV32IZCMP-NEXT: sw a4, 108(a5)
-; RV32IZCMP-NEXT: sw a7, 104(a5)
-; RV32IZCMP-NEXT: sw s0, 100(a5)
-; RV32IZCMP-NEXT: sw t0, 96(a5)
+; RV32IZCMP-NEXT: sw a3, 120(a5)
+; RV32IZCMP-NEXT: sw s0, 116(a5)
+; RV32IZCMP-NEXT: sw a7, 112(a5)
+; RV32IZCMP-NEXT: sw a1, 108(a5)
+; RV32IZCMP-NEXT: sw a2, 104(a5)
+; RV32IZCMP-NEXT: sw a4, 100(a5)
+; RV32IZCMP-NEXT: sw a6, 96(a5)
; RV32IZCMP-NEXT: sw t1, 92(a5)
; RV32IZCMP-NEXT: sw t2, 88(a5)
; RV32IZCMP-NEXT: sw t3, 84(a5)
@@ -425,13 +425,13 @@ define void @callee() nounwind {
; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
; RV32IZCMP-NEXT: sw a0, 16(a5)
; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT: sw a0, %lo(var+12)(a6)
+; RV32IZCMP-NEXT: sw a0, %lo(var+12)(t0)
; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT: sw a0, %lo(var+8)(a6)
+; RV32IZCMP-NEXT: sw a0, %lo(var+8)(t0)
; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT: sw a0, %lo(var+4)(a6)
+; RV32IZCMP-NEXT: sw a0, %lo(var+4)(t0)
; RV32IZCMP-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT: sw a0, %lo(var)(a6)
+; RV32IZCMP-NEXT: sw a0, %lo(var)(t0)
; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 96
;
; RV32IZCMP-WITH-FP-LABEL: callee:
@@ -451,16 +451,16 @@ define void @callee() nounwind {
; RV32IZCMP-WITH-FP-NEXT: sw s10, 32(sp) # 4-byte Folded Spill
; RV32IZCMP-WITH-FP-NEXT: sw s11, 28(sp) # 4-byte Folded Spill
; RV32IZCMP-WITH-FP-NEXT: addi s0, sp, 80
-; RV32IZCMP-WITH-FP-NEXT: lui a6, %hi(var)
-; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(a6)
+; RV32IZCMP-WITH-FP-NEXT: lui t1, %hi(var)
+; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(t1)
; RV32IZCMP-WITH-FP-NEXT: sw a0, -56(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(a6)
+; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(t1)
; RV32IZCMP-WITH-FP-NEXT: sw a0, -60(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(a6)
+; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(t1)
; RV32IZCMP-WITH-FP-NEXT: sw a0, -64(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(a6)
+; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(t1)
; RV32IZCMP-WITH-FP-NEXT: sw a0, -68(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT: addi a5, a6, %lo(var)
+; RV32IZCMP-WITH-FP-NEXT: addi a5, t1, %lo(var)
; RV32IZCMP-WITH-FP-NEXT: lw a0, 16(a5)
; RV32IZCMP-WITH-FP-NEXT: sw a0, -72(s0) # 4-byte Folded Spill
; RV32IZCMP-WITH-FP-NEXT: lw a0, 20(a5)
@@ -484,22 +484,22 @@ define void @callee() nounwind {
; RV32IZCMP-WITH-FP-NEXT: lw t3, 84(a5)
; RV32IZCMP-WITH-FP-NEXT: lw t2, 88(a5)
; RV32IZCMP-WITH-FP-NEXT: lw s1, 92(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw t1, 96(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw t0, 100(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw a7, 104(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw a4, 108(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw t0, 112(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw a4, 116(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw a3, 120(a5)
; RV32IZCMP-WITH-FP-NEXT: lw a0, 124(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw a1, 120(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw a2, 116(a5)
-; RV32IZCMP-WITH-FP-NEXT: lw a3, 112(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw a7, 96(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw a6, 100(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw a2, 104(a5)
+; RV32IZCMP-WITH-FP-NEXT: lw a1, 108(a5)
; RV32IZCMP-WITH-FP-NEXT: sw a0, 124(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw a1, 120(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw a2, 116(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw a3, 112(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw a4, 108(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw a7, 104(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw t0, 100(a5)
-; RV32IZCMP-WITH-FP-NEXT: sw t1, 96(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw a3, 120(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw a4, 116(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw t0, 112(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw a1, 108(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw a2, 104(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw a6, 100(a5)
+; RV32IZCMP-WITH-FP-NEXT: sw a7, 96(a5)
; RV32IZCMP-WITH-FP-NEXT: sw s1, 92(a5)
; RV32IZCMP-WITH-FP-NEXT: sw t2, 88(a5)
; RV32IZCMP-WITH-FP-NEXT: sw t3, 84(a5)
@@ -524,13 +524,13 @@ define void @callee() nounwind {
; RV32IZCMP-WITH-FP-NEXT: lw a0, -72(s0) # 4-byte Folded Reload
; RV32IZCMP-WITH-FP-NEXT: sw a0, 16(a5)
; RV32IZCMP-WITH-FP-NEXT: lw a0, -68(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(a6)
+; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(t1)
; RV32IZCMP-WITH-FP-NEXT: lw a0, -64(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(a6)
+; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(t1)
; RV32IZCMP-WITH-FP-NEXT: lw a0, -60(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(a6)
+; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(t1)
; RV32IZCMP-WITH-FP-NEXT: lw a0, -56(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(a6)
+; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(t1)
; RV32IZCMP-WITH-FP-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
; RV32IZCMP-WITH-FP-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
; RV32IZCMP-WITH-FP-NEXT: lw s1, 68(sp) # 4-byte Folded Reload
@@ -563,16 +563,16 @@ define void @callee() nounwind {
; RV64I-NEXT: sd s9, 72(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s10, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s11, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lui a6, %hi(var)
-; RV64I-NEXT: lw a0, %lo(var)(a6)
+; RV64I-NEXT: lui a7, %hi(var)
+; RV64I-NEXT: lw a0, %lo(var)(a7)
; RV64I-NEXT: sd a0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lw a0, %lo(var+4)(a6)
+; RV64I-NEXT: lw a0, %lo(var+4)(a7)
; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lw a0, %lo(var+8)(a6)
+; RV64I-NEXT: lw a0, %lo(var+8)(a7)
; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lw a0, %lo(var+12)(a6)
+; RV64I-NEXT: lw a0, %lo(var+12)(a7)
; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: addi a5, a6, %lo(var)
+; RV64I-NEXT: addi a5, a7, %lo(var)
; RV64I-NEXT: lw a0, 16(a5)
; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: lw a0, 20(a5)
@@ -595,22 +595,22 @@ define void @callee() nounwind {
; RV64I-NEXT: lw s8, 84(a5)
; RV64I-NEXT: lw s9, 88(a5)
; RV64I-NEXT: lw s10, 92(a5)
-; RV64I-NEXT: lw s11, 96(a5)
-; RV64I-NEXT: lw ra, 100(a5)
-; RV64I-NEXT: lw a7, 104(a5)
-; RV64I-NEXT: lw a4, 108(a5)
+; RV64I-NEXT: lw s11, 112(a5)
+; RV64I-NEXT: lw ra, 116(a5)
+; RV64I-NEXT: lw a3, 120(a5)
; RV64I-NEXT: lw a0, 124(a5)
-; RV64I-NEXT: lw a1, 120(a5)
-; RV64I-NEXT: lw a2, 116(a5)
-; RV64I-NEXT: lw a3, 112(a5)
+; RV64I-NEXT: lw a6, 96(a5)
+; RV64I-NEXT: lw a4, 100(a5)
+; RV64I-NEXT: lw a2, 104(a5)
+; RV64I-NEXT: lw a1, 108(a5)
; RV64I-NEXT: sw a0, 124(a5)
-; RV64I-NEXT: sw a1, 120(a5)
-; RV64I-NEXT: sw a2, 116(a5)
-; RV64I-NEXT: sw a3, 112(a5)
-; RV64I-NEXT: sw a4, 108(a5)
-; RV64I-NEXT: sw a7, 104(a5)
-; RV64I-NEXT: sw ra, 100(a5)
-; RV64I-NEXT: sw s11, 96(a5)
+; RV64I-NEXT: sw a3, 120(a5)
+; RV64I-NEXT: sw ra, 116(a5)
+; RV64I-NEXT: sw s11, 112(a5)
+; RV64I-NEXT: sw a1, 108(a5)
+; RV64I-NEXT: sw a2, 104(a5)
+; RV64I-NEXT: sw a4, 100(a5)
+; RV64I-NEXT: sw a6, 96(a5)
; RV64I-NEXT: sw s10, 92(a5)
; RV64I-NEXT: sw s9, 88(a5)
; RV64I-NEXT: sw s8, 84(a5)
@@ -634,13 +634,13 @@ define void @callee() nounwind {
; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: sw a0, 16(a5)
; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sw a0, %lo(var+12)(a6)
+; RV64I-NEXT: sw a0, %lo(var+12)(a7)
; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sw a0, %lo(var+8)(a6)
+; RV64I-NEXT: sw a0, %lo(var+8)(a7)
; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sw a0, %lo(var+4)(a6)
+; RV64I-NEXT: sw a0, %lo(var+4)(a7)
; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sw a0, %lo(var)(a6)
+; RV64I-NEXT: sw a0, %lo(var)(a7)
; RV64I-NEXT: ld ra, 152(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 144(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 136(sp) # 8-byte Folded Reload
@@ -663,16 +663,16 @@ define void @callee() nounwind {
; RV64I-LP64E-NEXT: sd ra, 64(sp) # 8-byte Folded Spill
; RV64I-LP64E-NEXT: sd s0, 56(sp) # 8-byte Folded Spill
; RV64I-LP64E-NEXT: sd s1, 48(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT: lui a6, %hi(var)
-; RV64I-LP64E-NEXT: lw a0, %lo(var)(a6)
+; RV64I-LP64E-NEXT: lui a7, %hi(var)
+; RV64I-LP64E-NEXT: lw a0, %lo(var)(a7)
; RV64I-LP64E-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT: lw a0, %lo(var+4)(a6)
+; RV64I-LP64E-NEXT: lw a0, %lo(var+4)(a7)
; RV64I-LP64E-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT: lw a0, %lo(var+8)(a6)
+; RV64I-LP64E-NEXT: lw a0, %lo(var+8)(a7)
; RV64I-LP64E-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT: lw a0, %lo(var+12)(a6)
+; RV64I-LP64E-NEXT: lw a0, %lo(var+12)(a7)
; RV64I-LP64E-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT: addi a5, a6, %lo(var)
+; RV64I-LP64E-NEXT: addi a5, a7, %lo(var)
; RV64I-LP64E-NEXT: lw a0, 16(a5)
; RV64I-LP64E-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
; RV64I-LP64E-NEXT: lw a0, 20(a5)
@@ -695,22 +695,22 @@ define void @callee() nounwind {
; RV64I-LP64E-NEXT: lw s10, 84(a5)
; RV64I-LP64E-NEXT: lw s11, 88(a5)
; RV64I-LP64E-NEXT: lw s0, 92(a5)
-; RV64I-LP64E-NEXT: lw s1, 96(a5)
-; RV64I-LP64E-NEXT: lw ra, 100(a5)
-; RV64I-LP64E-NEXT: lw a7, 104(a5)
-; RV64I-LP64E-NEXT: lw a4, 108(a5)
+; RV64I-LP64E-NEXT: lw s1, 112(a5)
+; RV64I-LP64E-NEXT: lw ra, 116(a5)
+; RV64I-LP64E-NEXT: lw a3, 120(a5)
; RV64I-LP64E-NEXT: lw a0, 124(a5)
-; RV64I-LP64E-NEXT: lw a1, 120(a5)
-; RV64I-LP64E-NEXT: lw a2, 116(a5)
-; RV64I-LP64E-NEXT: lw a3, 112(a5)
+; RV64I-LP64E-NEXT: lw a6, 96(a5)
+; RV64I-LP64E-NEXT: lw a4, 100(a5)
+; RV64I-LP64E-NEXT: lw a2, 104(a5)
+; RV64I-LP64E-NEXT: lw a1, 108(a5)
; RV64I-LP64E-NEXT: sw a0, 124(a5)
-; RV64I-LP64E-NEXT: sw a1, 120(a5)
-; RV64I-LP64E-NEXT: sw a2, 116(a5)
-; RV64I-LP64E-NEXT: sw a3, 112(a5)
-; RV64I-LP64E-NEXT: sw a4, 108(a5)
-; RV64I-LP64E-NEXT: sw a7, 104(a5)
-; RV64I-LP64E-NEXT: sw ra, 100(a5)
-; RV64I-LP64E-NEXT: sw s1, 96(a5)
+; RV64I-LP64E-NEXT: sw a3, 120(a5)
+; RV64I-LP64E-NEXT: sw ra, 116(a5)
+; RV64I-LP64E-NEXT: sw s1, 112(a5)
+; RV64I-LP64E-NEXT: sw a1, 108(a5)
+; RV64I-LP64E-NEXT: sw a2, 104(a5)
+; RV64I-LP64E-NEXT: sw a4, 100(a5)
+; RV64I-LP64E-NEXT: sw a6, 96(a5)
; RV64I-LP64E-NEXT: sw s0, 92(a5)
; RV64I-LP64E-NEXT: sw s11, 88(a5)
; RV64I-LP64E-NEXT: sw s10, 84(a5)
@@ -734,13 +734,13 @@ define void @callee() nounwind {
; RV64I-LP64E-NEXT: ld a0, 8(sp) # 8-byte Folded Reload
; RV64I-LP64E-NEXT: sw a0, 16(a5)
; RV64I-LP64E-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT: sw a0, %lo(var+12)(a6)
+; RV64I-LP64E-NEXT: sw a0, %lo(var+12)(a7)
; RV64I-LP64E-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT: sw a0, %lo(var+8)(a6)
+; RV64I-LP64E-NEXT: sw a0, %lo(var+8)(a7)
; RV64I-LP64E-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT: sw a0, %lo(var+4)(a6)
+; RV64I-LP64E-NEXT: sw a0, %lo(var+4)(a7)
; RV64I-LP64E-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT: sw a0, %lo(var)(a6)
+; RV64I-LP64E-NEXT: sw a0, %lo(var)(a7)
; RV64I-LP64E-NEXT: ld ra, 64(sp) # 8-byte Folded Reload
; RV64I-LP64E-NEXT: ld s0, 56(sp) # 8-byte Folded Reload
; RV64I-LP64E-NEXT: ld s1, 48(sp) # 8-byte Folded Reload
@@ -764,16 +764,16 @@ define void @callee() nounwind {
; RV64I-WITH-FP-NEXT: sd s10, 64(sp) # 8-byte Folded Spill
; RV64I-WITH-FP-NEXT: sd s11, 56(sp) # 8-byte Folded Spill
; RV64I-WITH-FP-NEXT: addi s0, sp, 160
-; RV64I-WITH-FP-NEXT: lui a6, %hi(var)
-; RV64I-WITH-FP-NEXT: lw a0, %lo(var)(a6)
+; RV64I-WITH-FP-NEXT: lui t0, %hi(var)
+; RV64I-WITH-FP-NEXT: lw a0, %lo(var)(t0)
; RV64I-WITH-FP-NEXT: sd a0, -112(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT: lw a0, %lo(var+4)(a6)
+; RV64I-WITH-FP-NEXT: lw a0, %lo(var+4)(t0)
; RV64I-WITH-FP-NEXT: sd a0, -120(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT: lw a0, %lo(var+8)(a6)
+; RV64I-WITH-FP-NEXT: lw a0, %lo(var+8)(t0)
; RV64I-WITH-FP-NEXT: sd a0, -128(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT: lw a0, %lo(var+12)(a6)
+; RV64I-WITH-FP-NEXT: lw a0, %lo(var+12)(t0)
; RV64I-WITH-FP-NEXT: sd a0, -136(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT: addi a5, a6, %lo(var)
+; RV64I-WITH-FP-NEXT: addi a5, t0, %lo(var)
; RV64I-WITH-FP-NEXT: lw a0, 16(a5)
; RV64I-WITH-FP-NEXT: sd a0, -144(s0) # 8-byte Folded Spill
; RV64I-WITH-FP-NEXT: lw a0, 20(a5)
@@ -797,22 +797,22 @@ define void @callee() nounwind {
; RV64I-WITH-FP-NEXT: lw s9, 84(a5)
; RV64I-WITH-FP-NEXT: lw s10, 88(a5)
; RV64I-WITH-FP-NEXT: lw s11, 92(a5)
-; RV64I-WITH-FP-NEXT: lw ra, 96(a5)
-; RV64I-WITH-FP-NEXT: lw t0, 100(a5)
-; RV64I-WITH-FP-NEXT: lw a7, 104(a5)
-; RV64I-WITH-FP-NEXT: lw a4, 108(a5)
+; RV64I-WITH-FP-NEXT: lw ra, 112(a5)
+; RV64I-WITH-FP-NEXT: lw a4, 116(a5)
+; RV64I-WITH-FP-NEXT: lw a3, 120(a5)
; RV64I-WITH-FP-NEXT: lw a0, 124(a5)
-; RV64I-WITH-FP-NEXT: lw a1, 120(a5)
-; RV64I-WITH-FP-NEXT: lw a2, 116(a5)
-; RV64I-WITH-FP-NEXT: lw a3, 112(a5)
+; RV64I-WITH-FP-NEXT: lw a7, 96(a5)
+; RV64I-WITH-FP-NEXT: lw a6, 100(a5)
+; RV64I-WITH-FP-NEXT: lw a2, 104(a5)
+; RV64I-WITH-FP-NEXT: lw a1, 108(a5)
; RV64I-WITH-FP-NEXT: sw a0, 124(a5)
-; RV64I-WITH-FP-NEXT: sw a1, 120(a5)
-; RV64I-WITH-FP-NEXT: sw a2, 116(a5)
-; RV64I-WITH-FP-NEXT: sw a3, 112(a5)
-; RV64I-WITH-FP-NEXT: sw a4, 108(a5)
-; RV64I-WITH-FP-NEXT: sw a7, 104(a5)
-; RV64I-WITH-FP-NEXT: sw t0, 100(a5)
-; RV64I-WITH-FP-NEXT: sw ra, 96(a5)
+; RV64I-WITH-FP-NEXT: sw a3, 120(a5)
+; RV64I-WITH-FP-NEXT: sw a4, 116(a5)
+; RV64I-WITH-FP-NEXT: sw ra, 112(a5)
+; RV64I-WITH-FP-NEXT: sw a1, 108(a5)
+; RV64I-WITH-FP-NEXT: sw a2, 104(a5)
+; RV64I-WITH-FP-NEXT: sw a6, 100(a5)
+; RV64I-WITH-FP-NEXT: sw a7, 96(a5)
; RV64I-WITH-FP-NEXT: sw s11, 92(a5)
; RV64I-WITH-FP-NEXT: sw s10, 88(a5)
; RV64I-WITH-FP-NEXT: sw s9, 84(a5)
@@ -837,13 +837,13 @@ define void @callee() nounwind {
; RV64I-WITH-FP-NEXT: ld a0, -144(s0) # 8-byte Folded Reload
; RV64I-WITH-FP-NEXT: sw a0, 16(a5)
; RV64I-WITH-FP-NEXT: ld a0, -136(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT: sw a0, %lo(var+12)(a6)
+; RV64I-WITH-FP-NEXT: sw a0, %lo(var+12)(t0)
; RV64I-WITH-FP-NEXT: ld a0, -128(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT: sw a0, %lo(var+8)(a6)
+; RV64I-WITH-FP-NEXT: sw a0, %lo(var+8)(t0)
; RV64I-WITH-FP-NEXT: ld a0, -120(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT: sw a0, %lo(var+4)(a6)
+; RV64I-WITH-FP-NEXT: sw a0, %lo(var+4)(t0)
; RV64I-WITH-FP-NEXT: ld a0, -112(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT: sw a0, %lo(var)(a6)
+; RV64I-WITH-FP-NEXT: sw a0, %lo(var)(t0)
; RV64I-WITH-FP-NEXT: ld ra, 152(sp) # 8-byte Folded Reload
; RV64I-WITH-FP-NEXT: ld s0, 144(sp) # 8-byte Folded Reload
; RV64I-WITH-FP-NEXT: ld s1, 136(sp) # 8-byte Folded Reload
@@ -863,16 +863,16 @@ define void @callee() nounwind {
; RV64IZCMP-LABEL: callee:
; RV64IZCMP: # %bb.0:
; RV64IZCMP-NEXT: cm.push {ra, s0-s11}, -160
-; RV64IZCMP-NEXT: lui a6, %hi(var)
-; RV64IZCMP-NEXT: lw a0, %lo(var)(a6)
+; RV64IZCMP-NEXT: lui t0, %hi(var)
+; RV64IZCMP-NEXT: lw a0, %lo(var)(t0)
; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT: lw a0, %lo(var+4)(a6)
+; RV64IZCMP-NEXT: lw a0, %lo(var+4)(t0)
; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT: lw a0, %lo(var+8)(a6)
+; RV64IZCMP-NEXT: lw a0, %lo(var+8)(t0)
; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT: lw a0, %lo(var+12)(a6)
+; RV64IZCMP-NEXT: lw a0, %lo(var+12)(t0)
; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT: addi a5, a6, %lo(var)
+; RV64IZCMP-NEXT: addi a5, t0, %lo(var)
; RV64IZCMP-NEXT: lw a0, 16(a5)
; RV64IZCMP-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
; RV64IZCMP-NEXT: lw a0, 20(a5)
@@ -895,22 +895,22 @@ define void @callee() nounwind {
; RV64IZCMP-NEXT: lw t3, 84(a5)
; RV64IZCMP-NEXT: lw t2, 88(a5)
; RV64IZCMP-NEXT: lw t1, 92(a5)
-; RV64IZCMP-NEXT: lw t0, 96(a5)
-; RV64IZCMP-NEXT: lw s0, 100(a5)
-; RV64IZCMP-NEXT: lw a7, 104(a5)
-; RV64IZCMP-NEXT: lw a4, 108(a5)
+; RV64IZCMP-NEXT: lw a7, 112(a5)
+; RV64IZCMP-NEXT: lw s0, 116(a5)
+; RV64IZCMP-NEXT: lw a3, 120(a5)
; RV64IZCMP-NEXT: lw a0, 124(a5)
-; RV64IZCMP-NEXT: lw a1, 120(a5)
-; RV64IZCMP-NEXT: lw a2, 116(a5)
-; RV64IZCMP-NEXT: lw a3, 112(a5)
+; RV64IZCMP-NEXT: lw a6, 96(a5)
+; RV64IZCMP-NEXT: lw a4, 100(a5)
+; RV64IZCMP-NEXT: lw a2, 104(a5)
+; RV64IZCMP-NEXT: lw a1, 108(a5)
; RV64IZCMP-NEXT: sw a0, 124(a5)
-; RV64IZCMP-NEXT: sw a1, 120(a5)
-; RV64IZCMP-NEXT: sw a2, 116(a5)
-; RV64IZCMP-NEXT: sw a3, 112(a5)
-; RV64IZCMP-NEXT: sw a4, 108(a5)
-; RV64IZCMP-NEXT: sw a7, 104(a5)
-; RV64IZCMP-NEXT: sw s0, 100(a5)
-; RV64IZCMP-NEXT: sw t0, 96(a5)
+; RV64IZCMP-NEXT: sw a3, 120(a5)
+; RV64IZCMP-NEXT: sw s0, 116(a5)
+; RV64IZCMP-NEXT: sw a7, 112(a5)
+; RV64IZCMP-NEXT: sw a1, 108(a5)
+; RV64IZCMP-NEXT: sw a2, 104(a5)
+; RV64IZCMP-NEXT: sw a4, 100(a5)
+; RV64IZCMP-NEXT: sw a6, 96(a5)
; RV64IZCMP-NEXT: sw t1, 92(a5)
; RV64IZCMP-NEXT: sw t2, 88(a5)
; RV64IZCMP-NEXT: sw t3, 84(a5)
@@ -934,13 +934,13 @@ define void @callee() nounwind {
; RV64IZCMP-NEXT: ld a0, 8(sp) # 8-byte Folded Reload
; RV64IZCMP-NEXT: sw a0, 16(a5)
; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT: sw a0, %lo(var+12)(a6)
+; RV64IZCMP-NEXT: sw a0, %lo(var+12)(t0)
; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT: sw a0, %lo(var+8)(a6)
+; RV64IZCMP-NEXT: sw a0, %lo(var+8)(t0)
; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT: sw a0, %lo(var+4)(a6)
+; RV64IZCMP-NEXT: sw a0, %lo(var+4)(t0)
; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT: sw a0, %lo(var)(a6)
+; RV64IZCMP-NEXT: sw a0, %lo(var)(t0)
; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160
;
; RV64IZCMP-WITH-FP-LABEL: callee:
@@ -960,16 +960,16 @@ define void @callee() nounwind {
; RV64IZCMP-WITH-FP-NEXT: sd s10, 64(sp) # 8-byte Folded Spill
; RV64IZCMP-WITH-FP-NEXT: sd s11, 56(sp) # 8-byte Folded Spill
; RV64IZCMP-WITH-FP-NEXT: addi s0, sp, 160
-; RV64IZCMP-WITH-FP-NEXT: lui a6, %hi(var)
-; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(a6)
+; RV64IZCMP-WITH-FP-NEXT: lui t1, %hi(var)
+; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(t1)
; RV64IZCMP-WITH-FP-NEXT: sd a0, -112(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(a6)
+; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(t1)
; RV64IZCMP-WITH-FP-NEXT: sd a0, -120(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(a6)
+; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(t1)
; RV64IZCMP-WITH-FP-NEXT: sd a0, -128(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(a6)
+; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(t1)
; RV64IZCMP-WITH-FP-NEXT: sd a0, -136(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT: addi a5, a6, %lo(var)
+; RV64IZCMP-WITH-FP-NEXT: addi a5, t1, %lo(var)
; RV64IZCMP-WITH-FP-NEXT: lw a0, 16(a5)
; RV64IZCMP-WITH-FP-NEXT: sd a0, -144(s0) # 8-byte Folded Spill
; RV64IZCMP-WITH-FP-NEXT: lw a0, 20(a5)
@@ -993,22 +993,22 @@ define void @callee() nounwind {
; RV64IZCMP-WITH-FP-NEXT: lw t3, 84(a5)
; RV64IZCMP-WITH-FP-NEXT: lw t2, 88(a5)
; RV64IZCMP-WITH-FP-NEXT: lw s1, 92(a5)
-; RV64IZCMP-WITH-FP-NEXT: lw t1, 96(a5)
-; RV64IZCMP-WITH-FP-NEXT: lw t0, 100(a5)
-; RV64IZCMP-WITH-FP-NEXT: lw a7, 104(a5)
-; RV64IZCMP-WITH-FP-NEXT: lw a4, 108(a5)
+; RV64IZCMP-WITH-FP-NEXT: lw t0, 112(a5)
+; RV64IZCMP-WITH-FP-NEXT: lw a4, 116(a5)
+; RV64IZCMP-WITH-FP-NEXT: lw a3, 120(a5)
; RV64IZCMP-WITH-FP-NEXT: lw a0, 124(a5)
-; RV64IZCMP-WITH-FP-NEXT: lw a1, 120(a5)
-; RV64IZCMP-WITH-FP-NEXT: lw a2, 116(a5)
-; RV64IZCMP-WITH-FP-NEXT: lw a3, 112(a5)
+; RV64IZCMP-WITH-FP-NEXT: lw a7, 96(a5)
+; RV64IZCMP-WITH-FP-NEXT: lw a6, 100(a5)
+; RV64IZCMP-WITH-FP-NEXT: lw a2, 104(a5)
+; RV64IZCMP-WITH-FP-NEXT: lw a1, 108(a5)
; RV64IZCMP-WITH-FP-NEXT: sw a0, 124(a5)
-; RV64IZCMP-WITH-FP-NEXT: sw a1, 120(a5)
-; RV64IZCMP-WITH-FP-NEXT: sw a2, 116(a5)
-; RV64IZCMP-WITH-FP-NEXT: sw a3, 112(a5)
-; RV64IZCMP-WITH-FP-NEXT: sw a4, 108(a5)
-; RV64IZCMP-WITH-FP-NEXT: sw a7, 104(a5)
-; RV64IZCMP-WITH-FP-NEXT: sw t0, 100(a5)
-; RV64IZCMP-WITH-FP-NEXT: sw t1, 96(a5)
+; RV64IZCMP-WITH-FP-NEXT: sw a3, 120(a5)
+; RV64IZCMP-WITH-FP-NEXT: sw a4, 116(a5)
+; RV64IZCMP-WITH-FP-NEXT: sw t0, 112(a5)
+; RV64IZCMP-WITH-FP-NEXT: sw a1, 108(a5)
+; RV64IZCMP-WITH-FP-NEXT: sw a2, 104(a5)
+; RV64IZCMP-WITH-FP-NEXT: sw a6, 100(a5)
+; RV64IZCMP-WITH-FP-NEXT: sw a7, 96(a5)
; RV64IZCMP-WITH-FP-NEXT: sw s1, 92(a5)
; RV64IZCMP-WITH-FP-NEXT: sw t2, 88(a5)
; RV64IZCMP-WITH-FP-NEXT: sw t3, 84(a5)
@@ -1033,13 +1033,13 @@ define void @callee() nounwind {
; RV64IZCMP-WITH-FP-NEXT: ld a0, -144(s0) # 8-byte Folded Reload
; RV64IZCMP-WITH-FP-NEXT: sw a0, 16(a5)
; RV64IZCMP-WITH-FP-NEXT: ld a0, -136(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(a6)
+; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(t1)
; RV64IZCMP-WITH-FP-NEXT: ld a0, -128(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(a6)
+; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(t1)
; RV64IZCMP-WITH-FP-NEXT: ld a0, -120(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(a6)
+; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(t1)
; RV64IZCMP-WITH-FP-NEXT: ld a0, -112(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(a6)
+; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(t1)
; RV64IZCMP-WITH-FP-NEXT: ld ra, 152(sp) # 8-byte Folded Reload
; RV64IZCMP-WITH-FP-NEXT: ld s0, 144(sp) # 8-byte Folded Reload
; RV64IZCMP-WITH-FP-NEXT: ld s1, 136(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index 231ed159ab2061..bb082b0314d599 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -190,21 +190,21 @@ define i32 @caller_many_scalars() nounwind {
define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind {
; RV32I-FPELIM-LABEL: callee_large_scalars:
; RV32I-FPELIM: # %bb.0:
-; RV32I-FPELIM-NEXT: lw a2, 0(a1)
-; RV32I-FPELIM-NEXT: lw a3, 0(a0)
-; RV32I-FPELIM-NEXT: lw a4, 4(a1)
-; RV32I-FPELIM-NEXT: lw a5, 12(a1)
-; RV32I-FPELIM-NEXT: lw a6, 12(a0)
-; RV32I-FPELIM-NEXT: lw a7, 4(a0)
+; RV32I-FPELIM-NEXT: lw a2, 0(a0)
+; RV32I-FPELIM-NEXT: lw a3, 4(a0)
+; RV32I-FPELIM-NEXT: lw a4, 12(a1)
+; RV32I-FPELIM-NEXT: lw a5, 12(a0)
+; RV32I-FPELIM-NEXT: lw a6, 0(a1)
+; RV32I-FPELIM-NEXT: lw a7, 4(a1)
; RV32I-FPELIM-NEXT: lw a1, 8(a1)
; RV32I-FPELIM-NEXT: lw a0, 8(a0)
-; RV32I-FPELIM-NEXT: xor a5, a6, a5
-; RV32I-FPELIM-NEXT: xor a4, a7, a4
-; RV32I-FPELIM-NEXT: or a4, a4, a5
+; RV32I-FPELIM-NEXT: xor a4, a5, a4
+; RV32I-FPELIM-NEXT: xor a3, a3, a7
+; RV32I-FPELIM-NEXT: or a3, a3, a4
; RV32I-FPELIM-NEXT: xor a0, a0, a1
-; RV32I-FPELIM-NEXT: xor a2, a3, a2
-; RV32I-FPELIM-NEXT: or a0, a2, a0
-; RV32I-FPELIM-NEXT: or a0, a0, a4
+; RV32I-FPELIM-NEXT: xor a1, a2, a6
+; RV32I-FPELIM-NEXT: or a0, a1, a0
+; RV32I-FPELIM-NEXT: or a0, a0, a3
; RV32I-FPELIM-NEXT: seqz a0, a0
; RV32I-FPELIM-NEXT: ret
;
@@ -214,21 +214,21 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind {
; RV32I-WITHFP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-WITHFP-NEXT: addi s0, sp, 16
-; RV32I-WITHFP-NEXT: lw a2, 0(a1)
-; RV32I-WITHFP-NEXT: lw a3, 0(a0)
-; RV32I-WITHFP-NEXT: lw a4, 4(a1)
-; RV32I-WITHFP-NEXT: lw a5, 12(a1)
-; RV32I-WITHFP-NEXT: lw a6, 12(a0)
-; RV32I-WITHFP-NEXT: lw a7, 4(a0)
+; RV32I-WITHFP-NEXT: lw a2, 0(a0)
+; RV32I-WITHFP-NEXT: lw a3, 4(a0)
+; RV32I-WITHFP-NEXT: lw a4, 12(a1)
+; RV32I-WITHFP-NEXT: lw a5, 12(a0)
+; RV32I-WITHFP-NEXT: lw a6, 0(a1)
+; RV32I-WITHFP-NEXT: lw a7, 4(a1)
; RV32I-WITHFP-NEXT: lw a1, 8(a1)
; RV32I-WITHFP-NEXT: lw a0, 8(a0)
-; RV32I-WITHFP-NEXT: xor a5, a6, a5
-; RV32I-WITHFP-NEXT: xor a4, a7, a4
-; RV32I-WITHFP-NEXT: or a4, a4, a5
+; RV32I-WITHFP-NEXT: xor a4, a5, a4
+; RV32I-WITHFP-NEXT: xor a3, a3, a7
+; RV32I-WITHFP-NEXT: or a3, a3, a4
; RV32I-WITHFP-NEXT: xor a0, a0, a1
-; RV32I-WITHFP-NEXT: xor a2, a3, a2
-; RV32I-WITHFP-NEXT: or a0, a2, a0
-; RV32I-WITHFP-NEXT: or a0, a0, a4
+; RV32I-WITHFP-NEXT: xor a1, a2, a6
+; RV32I-WITHFP-NEXT: or a0, a1, a0
+; RV32I-WITHFP-NEXT: or a0, a0, a3
; RV32I-WITHFP-NEXT: seqz a0, a0
; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
@@ -297,21 +297,21 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d,
; RV32I-FPELIM-LABEL: callee_large_scalars_exhausted_regs:
; RV32I-FPELIM: # %bb.0:
; RV32I-FPELIM-NEXT: lw a0, 4(sp)
-; RV32I-FPELIM-NEXT: lw a1, 0(a0)
-; RV32I-FPELIM-NEXT: lw a2, 0(a7)
-; RV32I-FPELIM-NEXT: lw a3, 4(a0)
-; RV32I-FPELIM-NEXT: lw a4, 12(a0)
-; RV32I-FPELIM-NEXT: lw a5, 12(a7)
-; RV32I-FPELIM-NEXT: lw a6, 4(a7)
+; RV32I-FPELIM-NEXT: lw a1, 0(a7)
+; RV32I-FPELIM-NEXT: lw a2, 4(a7)
+; RV32I-FPELIM-NEXT: lw a3, 12(a0)
+; RV32I-FPELIM-NEXT: lw a4, 12(a7)
+; RV32I-FPELIM-NEXT: lw a5, 0(a0)
+; RV32I-FPELIM-NEXT: lw a6, 4(a0)
; RV32I-FPELIM-NEXT: lw a0, 8(a0)
; RV32I-FPELIM-NEXT: lw a7, 8(a7)
-; RV32I-FPELIM-NEXT: xor a4, a5, a4
-; RV32I-FPELIM-NEXT: xor a3, a6, a3
-; RV32I-FPELIM-NEXT: or a3, a3, a4
+; RV32I-FPELIM-NEXT: xor a3, a4, a3
+; RV32I-FPELIM-NEXT: xor a2, a2, a6
+; RV32I-FPELIM-NEXT: or a2, a2, a3
; RV32I-FPELIM-NEXT: xor a0, a7, a0
-; RV32I-FPELIM-NEXT: xor a1, a2, a1
+; RV32I-FPELIM-NEXT: xor a1, a1, a5
; RV32I-FPELIM-NEXT: or a0, a1, a0
-; RV32I-FPELIM-NEXT: or a0, a0, a3
+; RV32I-FPELIM-NEXT: or a0, a0, a2
; RV32I-FPELIM-NEXT: seqz a0, a0
; RV32I-FPELIM-NEXT: ret
;
@@ -322,21 +322,21 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d,
; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-WITHFP-NEXT: addi s0, sp, 16
; RV32I-WITHFP-NEXT: lw a0, 4(s0)
-; RV32I-WITHFP-NEXT: lw a1, 0(a0)
-; RV32I-WITHFP-NEXT: lw a2, 0(a7)
-; RV32I-WITHFP-NEXT: lw a3, 4(a0)
-; RV32I-WITHFP-NEXT: lw a4, 12(a0)
-; RV32I-WITHFP-NEXT: lw a5, 12(a7)
-; RV32I-WITHFP-NEXT: lw a6, 4(a7)
+; RV32I-WITHFP-NEXT: lw a1, 0(a7)
+; RV32I-WITHFP-NEXT: lw a2, 4(a7)
+; RV32I-WITHFP-NEXT: lw a3, 12(a0)
+; RV32I-WITHFP-NEXT: lw a4, 12(a7)
+; RV32I-WITHFP-NEXT: lw a5, 0(a0)
+; RV32I-WITHFP-NEXT: lw a6, 4(a0)
; RV32I-WITHFP-NEXT: lw a0, 8(a0)
; RV32I-WITHFP-NEXT: lw a7, 8(a7)
-; RV32I-WITHFP-NEXT: xor a4, a5, a4
-; RV32I-WITHFP-NEXT: xor a3, a6, a3
-; RV32I-WITHFP-NEXT: or a3, a3, a4
+; RV32I-WITHFP-NEXT: xor a3, a4, a3
+; RV32I-WITHFP-NEXT: xor a2, a2, a6
+; RV32I-WITHFP-NEXT: or a2, a2, a3
; RV32I-WITHFP-NEXT: xor a0, a7, a0
-; RV32I-WITHFP-NEXT: xor a1, a2, a1
+; RV32I-WITHFP-NEXT: xor a1, a1, a5
; RV32I-WITHFP-NEXT: or a0, a1, a0
-; RV32I-WITHFP-NEXT: or a0, a0, a3
+; RV32I-WITHFP-NEXT: or a0, a0, a2
; RV32I-WITHFP-NEXT: seqz a0, a0
; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll
index d08cf577b1bdd3..708cb00d1c45c6 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll
@@ -1267,21 +1267,21 @@ define i32 @caller_many_scalars() {
define i32 @callee_large_scalars(i128 %a, fp128 %b) {
; ILP32E-FPELIM-LABEL: callee_large_scalars:
; ILP32E-FPELIM: # %bb.0:
-; ILP32E-FPELIM-NEXT: lw a2, 0(a1)
-; ILP32E-FPELIM-NEXT: lw a3, 0(a0)
-; ILP32E-FPELIM-NEXT: lw a4, 4(a1)
-; ILP32E-FPELIM-NEXT: lw a5, 12(a1)
-; ILP32E-FPELIM-NEXT: lw a6, 12(a0)
-; ILP32E-FPELIM-NEXT: lw a7, 4(a0)
+; ILP32E-FPELIM-NEXT: lw a2, 0(a0)
+; ILP32E-FPELIM-NEXT: lw a3, 4(a0)
+; ILP32E-FPELIM-NEXT: lw a4, 12(a1)
+; ILP32E-FPELIM-NEXT: lw a5, 12(a0)
+; ILP32E-FPELIM-NEXT: lw a6, 0(a1)
+; ILP32E-FPELIM-NEXT: lw a7, 4(a1)
; ILP32E-FPELIM-NEXT: lw a1, 8(a1)
; ILP32E-FPELIM-NEXT: lw a0, 8(a0)
-; ILP32E-FPELIM-NEXT: xor a5, a6, a5
-; ILP32E-FPELIM-NEXT: xor a4, a7, a4
-; ILP32E-FPELIM-NEXT: or a4, a4, a5
+; ILP32E-FPELIM-NEXT: xor a4, a5, a4
+; ILP32E-FPELIM-NEXT: xor a3, a3, a7
+; ILP32E-FPELIM-NEXT: or a3, a3, a4
; ILP32E-FPELIM-NEXT: xor a0, a0, a1
-; ILP32E-FPELIM-NEXT: xor a2, a3, a2
-; ILP32E-FPELIM-NEXT: or a0, a2, a0
-; ILP32E-FPELIM-NEXT: or a0, a0, a4
+; ILP32E-FPELIM-NEXT: xor a1, a2, a6
+; ILP32E-FPELIM-NEXT: or a0, a1, a0
+; ILP32E-FPELIM-NEXT: or a0, a0, a3
; ILP32E-FPELIM-NEXT: seqz a0, a0
; ILP32E-FPELIM-NEXT: ret
;
@@ -1295,21 +1295,21 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) {
; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
; ILP32E-WITHFP-NEXT: addi s0, sp, 8
; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
-; ILP32E-WITHFP-NEXT: lw a2, 0(a1)
-; ILP32E-WITHFP-NEXT: lw a3, 0(a0)
-; ILP32E-WITHFP-NEXT: lw a4, 4(a1)
-; ILP32E-WITHFP-NEXT: lw a5, 12(a1)
-; ILP32E-WITHFP-NEXT: lw a6, 12(a0)
-; ILP32E-WITHFP-NEXT: lw a7, 4(a0)
+; ILP32E-WITHFP-NEXT: lw a2, 0(a0)
+; ILP32E-WITHFP-NEXT: lw a3, 4(a0)
+; ILP32E-WITHFP-NEXT: lw a4, 12(a1)
+; ILP32E-WITHFP-NEXT: lw a5, 12(a0)
+; ILP32E-WITHFP-NEXT: lw a6, 0(a1)
+; ILP32E-WITHFP-NEXT: lw a7, 4(a1)
; ILP32E-WITHFP-NEXT: lw a1, 8(a1)
; ILP32E-WITHFP-NEXT: lw a0, 8(a0)
-; ILP32E-WITHFP-NEXT: xor a5, a6, a5
-; ILP32E-WITHFP-NEXT: xor a4, a7, a4
-; ILP32E-WITHFP-NEXT: or a4, a4, a5
+; ILP32E-WITHFP-NEXT: xor a4, a5, a4
+; ILP32E-WITHFP-NEXT: xor a3, a3, a7
+; ILP32E-WITHFP-NEXT: or a3, a3, a4
; ILP32E-WITHFP-NEXT: xor a0, a0, a1
-; ILP32E-WITHFP-NEXT: xor a2, a3, a2
-; ILP32E-WITHFP-NEXT: or a0, a2, a0
-; ILP32E-WITHFP-NEXT: or a0, a0, a4
+; ILP32E-WITHFP-NEXT: xor a1, a2, a6
+; ILP32E-WITHFP-NEXT: or a0, a1, a0
+; ILP32E-WITHFP-NEXT: or a0, a0, a3
; ILP32E-WITHFP-NEXT: seqz a0, a0
; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload
@@ -1318,21 +1318,21 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) {
;
; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_scalars:
; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 0(a1)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 0(a0)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 4(a1)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 12(a1)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 12(a0)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 4(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 0(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 4(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 12(a1)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 12(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 0(a1)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 4(a1)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 8(a1)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 8(a0)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a5, a6, a5
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a7, a4
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a4, a4, a5
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a5, a4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a3, a3, a7
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a3, a3, a4
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a0, a1
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a3, a2
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a2, a0
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a2, a6
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a1, a0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a3
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a0, a0
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret
;
@@ -1344,21 +1344,21 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) {
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 0(a1)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 0(a0)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 4(a1)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 12(a1)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 12(a0)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 4(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 0(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 4(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 12(a1)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 12(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 0(a1)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 4(a1)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 8(a1)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 8(a0)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a5, a6, a5
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a7, a4
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a4, a4, a5
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a5, a4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a3, a3, a7
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a3, a3, a4
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a0, a1
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a3, a2
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a2, a0
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a2, a6
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a1, a0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a3
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a0, a0
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
%b_bitcast = bitcast fp128 %b to i128
@@ -1492,23 +1492,23 @@ define i32 @caller_large_scalars() {
define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i128 %h, i32 %i, fp128 %j) {
; ILP32E-FPELIM-LABEL: callee_large_scalars_exhausted_regs:
; ILP32E-FPELIM: # %bb.0:
-; ILP32E-FPELIM-NEXT: lw a0, 12(sp)
-; ILP32E-FPELIM-NEXT: lw a1, 4(sp)
+; ILP32E-FPELIM-NEXT: lw a0, 4(sp)
+; ILP32E-FPELIM-NEXT: lw a1, 12(sp)
; ILP32E-FPELIM-NEXT: lw a2, 0(a0)
-; ILP32E-FPELIM-NEXT: lw a3, 0(a1)
-; ILP32E-FPELIM-NEXT: lw a4, 4(a0)
+; ILP32E-FPELIM-NEXT: lw a3, 4(a0)
+; ILP32E-FPELIM-NEXT: lw a4, 12(a1)
; ILP32E-FPELIM-NEXT: lw a5, 12(a0)
-; ILP32E-FPELIM-NEXT: lw a6, 12(a1)
+; ILP32E-FPELIM-NEXT: lw a6, 0(a1)
; ILP32E-FPELIM-NEXT: lw a7, 4(a1)
-; ILP32E-FPELIM-NEXT: lw a0, 8(a0)
; ILP32E-FPELIM-NEXT: lw a1, 8(a1)
-; ILP32E-FPELIM-NEXT: xor a5, a6, a5
-; ILP32E-FPELIM-NEXT: xor a4, a7, a4
-; ILP32E-FPELIM-NEXT: or a4, a4, a5
-; ILP32E-FPELIM-NEXT: xor a0, a1, a0
-; ILP32E-FPELIM-NEXT: xor a2, a3, a2
-; ILP32E-FPELIM-NEXT: or a0, a2, a0
-; ILP32E-FPELIM-NEXT: or a0, a0, a4
+; ILP32E-FPELIM-NEXT: lw a0, 8(a0)
+; ILP32E-FPELIM-NEXT: xor a4, a5, a4
+; ILP32E-FPELIM-NEXT: xor a3, a3, a7
+; ILP32E-FPELIM-NEXT: or a3, a3, a4
+; ILP32E-FPELIM-NEXT: xor a0, a0, a1
+; ILP32E-FPELIM-NEXT: xor a1, a2, a6
+; ILP32E-FPELIM-NEXT: or a0, a1, a0
+; ILP32E-FPELIM-NEXT: or a0, a0, a3
; ILP32E-FPELIM-NEXT: seqz a0, a0
; ILP32E-FPELIM-NEXT: ret
;
@@ -1522,23 +1522,23 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d,
; ILP32E-WITHFP-NEXT: .cfi_offset s0, -8
; ILP32E-WITHFP-NEXT: addi s0, sp, 8
; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0
-; ILP32E-WITHFP-NEXT: lw a0, 12(s0)
-; ILP32E-WITHFP-NEXT: lw a1, 4(s0)
+; ILP32E-WITHFP-NEXT: lw a0, 4(s0)
+; ILP32E-WITHFP-NEXT: lw a1, 12(s0)
; ILP32E-WITHFP-NEXT: lw a2, 0(a0)
-; ILP32E-WITHFP-NEXT: lw a3, 0(a1)
-; ILP32E-WITHFP-NEXT: lw a4, 4(a0)
+; ILP32E-WITHFP-NEXT: lw a3, 4(a0)
+; ILP32E-WITHFP-NEXT: lw a4, 12(a1)
; ILP32E-WITHFP-NEXT: lw a5, 12(a0)
-; ILP32E-WITHFP-NEXT: lw a6, 12(a1)
+; ILP32E-WITHFP-NEXT: lw a6, 0(a1)
; ILP32E-WITHFP-NEXT: lw a7, 4(a1)
-; ILP32E-WITHFP-NEXT: lw a0, 8(a0)
; ILP32E-WITHFP-NEXT: lw a1, 8(a1)
-; ILP32E-WITHFP-NEXT: xor a5, a6, a5
-; ILP32E-WITHFP-NEXT: xor a4, a7, a4
-; ILP32E-WITHFP-NEXT: or a4, a4, a5
-; ILP32E-WITHFP-NEXT: xor a0, a1, a0
-; ILP32E-WITHFP-NEXT: xor a2, a3, a2
-; ILP32E-WITHFP-NEXT: or a0, a2, a0
-; ILP32E-WITHFP-NEXT: or a0, a0, a4
+; ILP32E-WITHFP-NEXT: lw a0, 8(a0)
+; ILP32E-WITHFP-NEXT: xor a4, a5, a4
+; ILP32E-WITHFP-NEXT: xor a3, a3, a7
+; ILP32E-WITHFP-NEXT: or a3, a3, a4
+; ILP32E-WITHFP-NEXT: xor a0, a0, a1
+; ILP32E-WITHFP-NEXT: xor a1, a2, a6
+; ILP32E-WITHFP-NEXT: or a0, a1, a0
+; ILP32E-WITHFP-NEXT: or a0, a0, a3
; ILP32E-WITHFP-NEXT: seqz a0, a0
; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload
; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload
@@ -1547,23 +1547,23 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d,
;
; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_large_scalars_exhausted_regs:
; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0:
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 12(sp)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 4(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 4(sp)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 12(sp)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 0(a0)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 0(a1)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 4(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 4(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 12(a1)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 12(a0)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 12(a1)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 0(a1)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 4(a1)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 8(a0)
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 8(a1)
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a5, a6, a5
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a7, a4
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a4, a4, a5
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a1, a0
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a3, a2
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a2, a0
-; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 8(a0)
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a5, a4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a3, a3, a7
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a3, a3, a4
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a0, a1
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a2, a6
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a1, a0
+; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a3
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a0, a0
; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret
;
@@ -1575,23 +1575,23 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d,
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_offset s0, -8
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 12(s0)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 4(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 4(s0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 12(s0)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 0(a0)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 0(a1)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 4(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 4(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 12(a1)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 12(a0)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 12(a1)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 0(a1)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 4(a1)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 8(a0)
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 8(a1)
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a5, a6, a5
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a7, a4
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a4, a4, a5
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a1, a0
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a3, a2
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a2, a0
-; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 8(a0)
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a5, a4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a3, a3, a7
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a3, a3, a4
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a0, a1
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a2, a6
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a1, a0
+; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a3
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a0, a0
; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1
%j_bitcast = bitcast fp128 %j to i128
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll
index 67123466354c41..a0e1b002b7260d 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll
@@ -106,21 +106,21 @@ define i32 @caller_many_scalars() nounwind {
define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind {
; RV64I-LABEL: callee_large_scalars:
; RV64I: # %bb.0:
-; RV64I-NEXT: ld a2, 0(a1)
-; RV64I-NEXT: ld a3, 0(a0)
-; RV64I-NEXT: ld a4, 8(a1)
-; RV64I-NEXT: ld a5, 24(a1)
-; RV64I-NEXT: ld a6, 24(a0)
-; RV64I-NEXT: ld a7, 8(a0)
+; RV64I-NEXT: ld a2, 0(a0)
+; RV64I-NEXT: ld a3, 8(a0)
+; RV64I-NEXT: ld a4, 24(a1)
+; RV64I-NEXT: ld a5, 24(a0)
+; RV64I-NEXT: ld a6, 0(a1)
+; RV64I-NEXT: ld a7, 8(a1)
; RV64I-NEXT: ld a1, 16(a1)
; RV64I-NEXT: ld a0, 16(a0)
-; RV64I-NEXT: xor a5, a6, a5
-; RV64I-NEXT: xor a4, a7, a4
-; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: xor a4, a5, a4
+; RV64I-NEXT: xor a3, a3, a7
+; RV64I-NEXT: or a3, a3, a4
; RV64I-NEXT: xor a0, a0, a1
-; RV64I-NEXT: xor a2, a3, a2
-; RV64I-NEXT: or a0, a2, a0
-; RV64I-NEXT: or a0, a0, a4
+; RV64I-NEXT: xor a1, a2, a6
+; RV64I-NEXT: or a0, a1, a0
+; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: ret
%1 = icmp eq i256 %a, %b
@@ -161,21 +161,21 @@ define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d,
; RV64I-LABEL: callee_large_scalars_exhausted_regs:
; RV64I: # %bb.0:
; RV64I-NEXT: ld a0, 8(sp)
-; RV64I-NEXT: ld a1, 0(a0)
-; RV64I-NEXT: ld a2, 0(a7)
-; RV64I-NEXT: ld a3, 8(a0)
-; RV64I-NEXT: ld a4, 24(a0)
-; RV64I-NEXT: ld a5, 24(a7)
-; RV64I-NEXT: ld a6, 8(a7)
+; RV64I-NEXT: ld a1, 0(a7)
+; RV64I-NEXT: ld a2, 8(a7)
+; RV64I-NEXT: ld a3, 24(a0)
+; RV64I-NEXT: ld a4, 24(a7)
+; RV64I-NEXT: ld a5, 0(a0)
+; RV64I-NEXT: ld a6, 8(a0)
; RV64I-NEXT: ld a0, 16(a0)
; RV64I-NEXT: ld a7, 16(a7)
-; RV64I-NEXT: xor a4, a5, a4
-; RV64I-NEXT: xor a3, a6, a3
-; RV64I-NEXT: or a3, a3, a4
+; RV64I-NEXT: xor a3, a4, a3
+; RV64I-NEXT: xor a2, a2, a6
+; RV64I-NEXT: or a2, a2, a3
; RV64I-NEXT: xor a0, a7, a0
-; RV64I-NEXT: xor a1, a2, a1
+; RV64I-NEXT: xor a1, a1, a5
; RV64I-NEXT: or a0, a1, a0
-; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: ret
%1 = icmp eq i256 %h, %j
diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll
index 35900f8a0717aa..603491bf3d3003 100644
--- a/llvm/test/CodeGen/RISCV/forced-atomics.ll
+++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll
@@ -3348,8 +3348,8 @@ define i64 @rmw64_max_seq_cst(ptr %p) nounwind {
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lw a1, 4(a0)
; RV32-NEXT: lw a4, 0(a0)
+; RV32-NEXT: lw a1, 4(a0)
; RV32-NEXT: j .LBB49_2
; RV32-NEXT: .LBB49_1: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1
@@ -3362,8 +3362,8 @@ define i64 @rmw64_max_seq_cst(ptr %p) nounwind {
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: lw a4, 0(sp)
+; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: bnez a0, .LBB49_6
; RV32-NEXT: .LBB49_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3453,8 +3453,8 @@ define i64 @rmw64_min_seq_cst(ptr %p) nounwind {
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lw a1, 4(a0)
; RV32-NEXT: lw a4, 0(a0)
+; RV32-NEXT: lw a1, 4(a0)
; RV32-NEXT: j .LBB50_2
; RV32-NEXT: .LBB50_1: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1
@@ -3467,8 +3467,8 @@ define i64 @rmw64_min_seq_cst(ptr %p) nounwind {
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: lw a4, 0(sp)
+; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: bnez a0, .LBB50_6
; RV32-NEXT: .LBB50_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3560,8 +3560,8 @@ define i64 @rmw64_umax_seq_cst(ptr %p) nounwind {
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lw a1, 4(a0)
; RV32-NEXT: lw a4, 0(a0)
+; RV32-NEXT: lw a1, 4(a0)
; RV32-NEXT: j .LBB51_2
; RV32-NEXT: .LBB51_1: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1
@@ -3574,8 +3574,8 @@ define i64 @rmw64_umax_seq_cst(ptr %p) nounwind {
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: lw a4, 0(sp)
+; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: bnez a0, .LBB51_4
; RV32-NEXT: .LBB51_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3652,8 +3652,8 @@ define i64 @rmw64_umin_seq_cst(ptr %p) nounwind {
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lw a1, 4(a0)
; RV32-NEXT: lw a4, 0(a0)
+; RV32-NEXT: lw a1, 4(a0)
; RV32-NEXT: j .LBB52_2
; RV32-NEXT: .LBB52_1: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1
@@ -3666,8 +3666,8 @@ define i64 @rmw64_umin_seq_cst(ptr %p) nounwind {
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: lw a4, 0(sp)
+; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: bnez a0, .LBB52_4
; RV32-NEXT: .LBB52_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
@@ -3802,30 +3802,30 @@ define double @rmw64_fadd_seq_cst(ptr %p) nounwind {
; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lw s1, 4(a0)
-; RV32-NEXT: lw s2, 0(a0)
+; RV32-NEXT: lw s1, 0(a0)
+; RV32-NEXT: lw s2, 4(a0)
; RV32-NEXT: .LBB54_1: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: lui a3, 261888
-; RV32-NEXT: mv a0, s2
-; RV32-NEXT: mv a1, s1
+; RV32-NEXT: mv a0, s1
+; RV32-NEXT: mv a1, s2
; RV32-NEXT: li a2, 0
; RV32-NEXT: call __adddf3
; RV32-NEXT: mv a2, a0
; RV32-NEXT: mv a3, a1
-; RV32-NEXT: sw s2, 8(sp)
-; RV32-NEXT: sw s1, 12(sp)
+; RV32-NEXT: sw s1, 8(sp)
+; RV32-NEXT: sw s2, 12(sp)
; RV32-NEXT: addi a1, sp, 8
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw s1, 12(sp)
-; RV32-NEXT: lw s2, 8(sp)
+; RV32-NEXT: lw s1, 8(sp)
+; RV32-NEXT: lw s2, 12(sp)
; RV32-NEXT: beqz a0, .LBB54_1
; RV32-NEXT: # %bb.2: # %atomicrmw.end
-; RV32-NEXT: mv a0, s2
-; RV32-NEXT: mv a1, s1
+; RV32-NEXT: mv a0, s1
+; RV32-NEXT: mv a1, s2
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -3937,30 +3937,30 @@ define double @rmw64_fsub_seq_cst(ptr %p) nounwind {
; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lw s1, 4(a0)
-; RV32-NEXT: lw s2, 0(a0)
+; RV32-NEXT: lw s1, 0(a0)
+; RV32-NEXT: lw s2, 4(a0)
; RV32-NEXT: .LBB55_1: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: lui a3, 786176
-; RV32-NEXT: mv a0, s2
-; RV32-NEXT: mv a1, s1
+; RV32-NEXT: mv a0, s1
+; RV32-NEXT: mv a1, s2
; RV32-NEXT: li a2, 0
; RV32-NEXT: call __adddf3
; RV32-NEXT: mv a2, a0
; RV32-NEXT: mv a3, a1
-; RV32-NEXT: sw s2, 8(sp)
-; RV32-NEXT: sw s1, 12(sp)
+; RV32-NEXT: sw s1, 8(sp)
+; RV32-NEXT: sw s2, 12(sp)
; RV32-NEXT: addi a1, sp, 8
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw s1, 12(sp)
-; RV32-NEXT: lw s2, 8(sp)
+; RV32-NEXT: lw s1, 8(sp)
+; RV32-NEXT: lw s2, 12(sp)
; RV32-NEXT: beqz a0, .LBB55_1
; RV32-NEXT: # %bb.2: # %atomicrmw.end
-; RV32-NEXT: mv a0, s2
-; RV32-NEXT: mv a1, s1
+; RV32-NEXT: mv a0, s1
+; RV32-NEXT: mv a1, s2
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -4072,30 +4072,30 @@ define double @rmw64_fmin_seq_cst(ptr %p) nounwind {
; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lw s1, 4(a0)
-; RV32-NEXT: lw s2, 0(a0)
+; RV32-NEXT: lw s1, 0(a0)
+; RV32-NEXT: lw s2, 4(a0)
; RV32-NEXT: .LBB56_1: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: lui a3, 261888
-; RV32-NEXT: mv a0, s2
-; RV32-NEXT: mv a1, s1
+; RV32-NEXT: mv a0, s1
+; RV32-NEXT: mv a1, s2
; RV32-NEXT: li a2, 0
; RV32-NEXT: call fmin
; RV32-NEXT: mv a2, a0
; RV32-NEXT: mv a3, a1
-; RV32-NEXT: sw s2, 8(sp)
-; RV32-NEXT: sw s1, 12(sp)
+; RV32-NEXT: sw s1, 8(sp)
+; RV32-NEXT: sw s2, 12(sp)
; RV32-NEXT: addi a1, sp, 8
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw s1, 12(sp)
-; RV32-NEXT: lw s2, 8(sp)
+; RV32-NEXT: lw s1, 8(sp)
+; RV32-NEXT: lw s2, 12(sp)
; RV32-NEXT: beqz a0, .LBB56_1
; RV32-NEXT: # %bb.2: # %atomicrmw.end
-; RV32-NEXT: mv a0, s2
-; RV32-NEXT: mv a1, s1
+; RV32-NEXT: mv a0, s1
+; RV32-NEXT: mv a1, s2
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -4207,30 +4207,30 @@ define double @rmw64_fmax_seq_cst(ptr %p) nounwind {
; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lw s1, 4(a0)
-; RV32-NEXT: lw s2, 0(a0)
+; RV32-NEXT: lw s1, 0(a0)
+; RV32-NEXT: lw s2, 4(a0)
; RV32-NEXT: .LBB57_1: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: lui a3, 261888
-; RV32-NEXT: mv a0, s2
-; RV32-NEXT: mv a1, s1
+; RV32-NEXT: mv a0, s1
+; RV32-NEXT: mv a1, s2
; RV32-NEXT: li a2, 0
; RV32-NEXT: call fmax
; RV32-NEXT: mv a2, a0
; RV32-NEXT: mv a3, a1
-; RV32-NEXT: sw s2, 8(sp)
-; RV32-NEXT: sw s1, 12(sp)
+; RV32-NEXT: sw s1, 8(sp)
+; RV32-NEXT: sw s2, 12(sp)
; RV32-NEXT: addi a1, sp, 8
; RV32-NEXT: li a4, 5
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a0, s0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw s1, 12(sp)
-; RV32-NEXT: lw s2, 8(sp)
+; RV32-NEXT: lw s1, 8(sp)
+; RV32-NEXT: lw s2, 12(sp)
; RV32-NEXT: beqz a0, .LBB57_1
; RV32-NEXT: # %bb.2: # %atomicrmw.end
-; RV32-NEXT: mv a0, s2
-; RV32-NEXT: mv a1, s1
+; RV32-NEXT: mv a0, s1
+; RV32-NEXT: mv a1, s2
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -4346,8 +4346,8 @@ define i64 @cmpxchg64_monotonic(ptr %p) nounwind {
; RV32-NEXT: li a4, 0
; RV32-NEXT: li a5, 0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: lw a0, 0(sp)
+; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
@@ -4406,8 +4406,8 @@ define i64 @cmpxchg64_seq_cst(ptr %p) nounwind {
; RV32-NEXT: li a5, 5
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __atomic_compare_exchange_8
-; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: lw a0, 0(sp)
+; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
@@ -4531,25 +4531,25 @@ define i128 @rmw128(ptr %p) nounwind {
; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a1
-; RV32-NEXT: lw a1, 12(a1)
-; RV32-NEXT: lw a2, 8(s0)
-; RV32-NEXT: lw a3, 4(s0)
-; RV32-NEXT: lw a4, 0(s0)
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: lw a2, 4(s0)
+; RV32-NEXT: lw a3, 8(s0)
+; RV32-NEXT: lw a4, 12(s0)
; RV32-NEXT: mv s1, a0
; RV32-NEXT: .LBB62_1: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32-NEXT: addi a0, a4, 1
+; RV32-NEXT: addi a0, a1, 1
; RV32-NEXT: seqz a5, a0
-; RV32-NEXT: add a5, a3, a5
+; RV32-NEXT: add a5, a2, a5
; RV32-NEXT: or a6, a0, a5
; RV32-NEXT: seqz a6, a6
-; RV32-NEXT: add a6, a2, a6
-; RV32-NEXT: sltu a7, a6, a2
-; RV32-NEXT: add a7, a1, a7
-; RV32-NEXT: sw a4, 16(sp)
-; RV32-NEXT: sw a3, 20(sp)
-; RV32-NEXT: sw a2, 24(sp)
-; RV32-NEXT: sw a1, 28(sp)
+; RV32-NEXT: add a6, a3, a6
+; RV32-NEXT: sltu a7, a6, a3
+; RV32-NEXT: add a7, a4, a7
+; RV32-NEXT: sw a1, 16(sp)
+; RV32-NEXT: sw a2, 20(sp)
+; RV32-NEXT: sw a3, 24(sp)
+; RV32-NEXT: sw a4, 28(sp)
; RV32-NEXT: sw a5, 4(sp)
; RV32-NEXT: sw a0, 0(sp)
; RV32-NEXT: sw a6, 8(sp)
@@ -4561,16 +4561,16 @@ define i128 @rmw128(ptr %p) nounwind {
; RV32-NEXT: li a5, 5
; RV32-NEXT: mv a1, s0
; RV32-NEXT: call __atomic_compare_exchange
-; RV32-NEXT: lw a1, 28(sp)
-; RV32-NEXT: lw a2, 24(sp)
-; RV32-NEXT: lw a3, 20(sp)
-; RV32-NEXT: lw a4, 16(sp)
+; RV32-NEXT: lw a1, 16(sp)
+; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: lw a3, 24(sp)
+; RV32-NEXT: lw a4, 28(sp)
; RV32-NEXT: beqz a0, .LBB62_1
; RV32-NEXT: # %bb.2: # %atomicrmw.end
-; RV32-NEXT: sw a4, 0(s1)
-; RV32-NEXT: sw a3, 4(s1)
-; RV32-NEXT: sw a2, 8(s1)
-; RV32-NEXT: sw a1, 12(s1)
+; RV32-NEXT: sw a1, 0(s1)
+; RV32-NEXT: sw a2, 4(s1)
+; RV32-NEXT: sw a3, 8(s1)
+; RV32-NEXT: sw a4, 12(s1)
; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
@@ -4639,8 +4639,8 @@ define i128 @cmpxchg128(ptr %p) nounwind {
; RV64-NEXT: li a5, 5
; RV64-NEXT: li a3, 0
; RV64-NEXT: call __atomic_compare_exchange_16
-; RV64-NEXT: ld a1, 8(sp)
; RV64-NEXT: ld a0, 0(sp)
+; RV64-NEXT: ld a1, 8(sp)
; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 32
; RV64-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index deb5a6d4013d49..c44f4942e9e699 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -1043,24 +1043,24 @@ define i64 @stest_f64i64(double %x) {
; RV32IF-NEXT: mv a1, a0
; RV32IF-NEXT: addi a0, sp, 8
; RV32IF-NEXT: call __fixdfti
-; RV32IF-NEXT: lw a0, 16(sp)
-; RV32IF-NEXT: lw a2, 20(sp)
+; RV32IF-NEXT: lw a3, 8(sp)
; RV32IF-NEXT: lw a1, 12(sp)
-; RV32IF-NEXT: lw a4, 8(sp)
-; RV32IF-NEXT: lui a3, 524288
-; RV32IF-NEXT: addi a5, a3, -1
+; RV32IF-NEXT: lw a2, 16(sp)
+; RV32IF-NEXT: lw a4, 20(sp)
+; RV32IF-NEXT: lui a0, 524288
+; RV32IF-NEXT: addi a5, a0, -1
; RV32IF-NEXT: beq a1, a5, .LBB18_2
; RV32IF-NEXT: # %bb.1: # %entry
; RV32IF-NEXT: sltu a6, a1, a5
-; RV32IF-NEXT: or a7, a0, a2
+; RV32IF-NEXT: or a7, a2, a4
; RV32IF-NEXT: bnez a7, .LBB18_3
; RV32IF-NEXT: j .LBB18_4
; RV32IF-NEXT: .LBB18_2:
-; RV32IF-NEXT: sltiu a6, a4, -1
-; RV32IF-NEXT: or a7, a0, a2
+; RV32IF-NEXT: sltiu a6, a3, -1
+; RV32IF-NEXT: or a7, a2, a4
; RV32IF-NEXT: beqz a7, .LBB18_4
; RV32IF-NEXT: .LBB18_3: # %entry
-; RV32IF-NEXT: slti a6, a2, 0
+; RV32IF-NEXT: slti a6, a4, 0
; RV32IF-NEXT: .LBB18_4: # %entry
; RV32IF-NEXT: addi a7, a6, -1
; RV32IF-NEXT: neg t0, a6
@@ -1068,21 +1068,21 @@ define i64 @stest_f64i64(double %x) {
; RV32IF-NEXT: # %bb.5: # %entry
; RV32IF-NEXT: mv a1, a5
; RV32IF-NEXT: .LBB18_6: # %entry
-; RV32IF-NEXT: or a4, a7, a4
+; RV32IF-NEXT: or a3, a7, a3
+; RV32IF-NEXT: and a4, t0, a4
; RV32IF-NEXT: and a2, t0, a2
-; RV32IF-NEXT: and a5, t0, a0
-; RV32IF-NEXT: beq a1, a3, .LBB18_8
+; RV32IF-NEXT: beq a1, a0, .LBB18_8
; RV32IF-NEXT: # %bb.7: # %entry
-; RV32IF-NEXT: sltu a0, a3, a1
+; RV32IF-NEXT: sltu a0, a0, a1
; RV32IF-NEXT: j .LBB18_9
; RV32IF-NEXT: .LBB18_8:
-; RV32IF-NEXT: snez a0, a4
+; RV32IF-NEXT: snez a0, a3
; RV32IF-NEXT: .LBB18_9: # %entry
-; RV32IF-NEXT: and a5, a5, a2
-; RV32IF-NEXT: li a3, -1
-; RV32IF-NEXT: beq a5, a3, .LBB18_11
+; RV32IF-NEXT: and a2, a2, a4
+; RV32IF-NEXT: li a5, -1
+; RV32IF-NEXT: beq a2, a5, .LBB18_11
; RV32IF-NEXT: # %bb.10: # %entry
-; RV32IF-NEXT: slti a0, a2, 0
+; RV32IF-NEXT: slti a0, a4, 0
; RV32IF-NEXT: xori a0, a0, 1
; RV32IF-NEXT: .LBB18_11: # %entry
; RV32IF-NEXT: bnez a0, .LBB18_13
@@ -1090,7 +1090,7 @@ define i64 @stest_f64i64(double %x) {
; RV32IF-NEXT: lui a1, 524288
; RV32IF-NEXT: .LBB18_13: # %entry
; RV32IF-NEXT: neg a0, a0
-; RV32IF-NEXT: and a0, a0, a4
+; RV32IF-NEXT: and a0, a0, a3
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 32
; RV32IF-NEXT: ret
@@ -1142,24 +1142,24 @@ define i64 @stest_f64i64(double %x) {
; RV32IFD-NEXT: .cfi_offset ra, -4
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call __fixdfti
-; RV32IFD-NEXT: lw a0, 16(sp)
-; RV32IFD-NEXT: lw a2, 20(sp)
+; RV32IFD-NEXT: lw a3, 8(sp)
; RV32IFD-NEXT: lw a1, 12(sp)
-; RV32IFD-NEXT: lw a4, 8(sp)
-; RV32IFD-NEXT: lui a3, 524288
-; RV32IFD-NEXT: addi a5, a3, -1
+; RV32IFD-NEXT: lw a2, 16(sp)
+; RV32IFD-NEXT: lw a4, 20(sp)
+; RV32IFD-NEXT: lui a0, 524288
+; RV32IFD-NEXT: addi a5, a0, -1
; RV32IFD-NEXT: beq a1, a5, .LBB18_2
; RV32IFD-NEXT: # %bb.1: # %entry
; RV32IFD-NEXT: sltu a6, a1, a5
-; RV32IFD-NEXT: or a7, a0, a2
+; RV32IFD-NEXT: or a7, a2, a4
; RV32IFD-NEXT: bnez a7, .LBB18_3
; RV32IFD-NEXT: j .LBB18_4
; RV32IFD-NEXT: .LBB18_2:
-; RV32IFD-NEXT: sltiu a6, a4, -1
-; RV32IFD-NEXT: or a7, a0, a2
+; RV32IFD-NEXT: sltiu a6, a3, -1
+; RV32IFD-NEXT: or a7, a2, a4
; RV32IFD-NEXT: beqz a7, .LBB18_4
; RV32IFD-NEXT: .LBB18_3: # %entry
-; RV32IFD-NEXT: slti a6, a2, 0
+; RV32IFD-NEXT: slti a6, a4, 0
; RV32IFD-NEXT: .LBB18_4: # %entry
; RV32IFD-NEXT: addi a7, a6, -1
; RV32IFD-NEXT: neg t0, a6
@@ -1167,21 +1167,21 @@ define i64 @stest_f64i64(double %x) {
; RV32IFD-NEXT: # %bb.5: # %entry
; RV32IFD-NEXT: mv a1, a5
; RV32IFD-NEXT: .LBB18_6: # %entry
-; RV32IFD-NEXT: or a4, a7, a4
+; RV32IFD-NEXT: or a3, a7, a3
+; RV32IFD-NEXT: and a4, t0, a4
; RV32IFD-NEXT: and a2, t0, a2
-; RV32IFD-NEXT: and a5, t0, a0
-; RV32IFD-NEXT: beq a1, a3, .LBB18_8
+; RV32IFD-NEXT: beq a1, a0, .LBB18_8
; RV32IFD-NEXT: # %bb.7: # %entry
-; RV32IFD-NEXT: sltu a0, a3, a1
+; RV32IFD-NEXT: sltu a0, a0, a1
; RV32IFD-NEXT: j .LBB18_9
; RV32IFD-NEXT: .LBB18_8:
-; RV32IFD-NEXT: snez a0, a4
+; RV32IFD-NEXT: snez a0, a3
; RV32IFD-NEXT: .LBB18_9: # %entry
-; RV32IFD-NEXT: and a5, a5, a2
-; RV32IFD-NEXT: li a3, -1
-; RV32IFD-NEXT: beq a5, a3, .LBB18_11
+; RV32IFD-NEXT: and a2, a2, a4
+; RV32IFD-NEXT: li a5, -1
+; RV32IFD-NEXT: beq a2, a5, .LBB18_11
; RV32IFD-NEXT: # %bb.10: # %entry
-; RV32IFD-NEXT: slti a0, a2, 0
+; RV32IFD-NEXT: slti a0, a4, 0
; RV32IFD-NEXT: xori a0, a0, 1
; RV32IFD-NEXT: .LBB18_11: # %entry
; RV32IFD-NEXT: bnez a0, .LBB18_13
@@ -1189,7 +1189,7 @@ define i64 @stest_f64i64(double %x) {
; RV32IFD-NEXT: lui a1, 524288
; RV32IFD-NEXT: .LBB18_13: # %entry
; RV32IFD-NEXT: neg a0, a0
-; RV32IFD-NEXT: and a0, a0, a4
+; RV32IFD-NEXT: and a0, a0, a3
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
@@ -1440,24 +1440,24 @@ define i64 @stest_f32i64(float %x) {
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti
-; RV32-NEXT: lw a0, 16(sp)
-; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: lw a3, 8(sp)
; RV32-NEXT: lw a1, 12(sp)
-; RV32-NEXT: lw a4, 8(sp)
-; RV32-NEXT: lui a3, 524288
-; RV32-NEXT: addi a5, a3, -1
+; RV32-NEXT: lw a2, 16(sp)
+; RV32-NEXT: lw a4, 20(sp)
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: addi a5, a0, -1
; RV32-NEXT: beq a1, a5, .LBB21_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: or a7, a0, a2
+; RV32-NEXT: or a7, a2, a4
; RV32-NEXT: bnez a7, .LBB21_3
; RV32-NEXT: j .LBB21_4
; RV32-NEXT: .LBB21_2:
-; RV32-NEXT: sltiu a6, a4, -1
-; RV32-NEXT: or a7, a0, a2
+; RV32-NEXT: sltiu a6, a3, -1
+; RV32-NEXT: or a7, a2, a4
; RV32-NEXT: beqz a7, .LBB21_4
; RV32-NEXT: .LBB21_3: # %entry
-; RV32-NEXT: slti a6, a2, 0
+; RV32-NEXT: slti a6, a4, 0
; RV32-NEXT: .LBB21_4: # %entry
; RV32-NEXT: addi a7, a6, -1
; RV32-NEXT: neg t0, a6
@@ -1465,21 +1465,21 @@ define i64 @stest_f32i64(float %x) {
; RV32-NEXT: # %bb.5: # %entry
; RV32-NEXT: mv a1, a5
; RV32-NEXT: .LBB21_6: # %entry
-; RV32-NEXT: or a4, a7, a4
+; RV32-NEXT: or a3, a7, a3
+; RV32-NEXT: and a4, t0, a4
; RV32-NEXT: and a2, t0, a2
-; RV32-NEXT: and a5, t0, a0
-; RV32-NEXT: beq a1, a3, .LBB21_8
+; RV32-NEXT: beq a1, a0, .LBB21_8
; RV32-NEXT: # %bb.7: # %entry
-; RV32-NEXT: sltu a0, a3, a1
+; RV32-NEXT: sltu a0, a0, a1
; RV32-NEXT: j .LBB21_9
; RV32-NEXT: .LBB21_8:
-; RV32-NEXT: snez a0, a4
+; RV32-NEXT: snez a0, a3
; RV32-NEXT: .LBB21_9: # %entry
-; RV32-NEXT: and a5, a5, a2
-; RV32-NEXT: li a3, -1
-; RV32-NEXT: beq a5, a3, .LBB21_11
+; RV32-NEXT: and a2, a2, a4
+; RV32-NEXT: li a5, -1
+; RV32-NEXT: beq a2, a5, .LBB21_11
; RV32-NEXT: # %bb.10: # %entry
-; RV32-NEXT: slti a0, a2, 0
+; RV32-NEXT: slti a0, a4, 0
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: .LBB21_11: # %entry
; RV32-NEXT: bnez a0, .LBB21_13
@@ -1487,7 +1487,7 @@ define i64 @stest_f32i64(float %x) {
; RV32-NEXT: lui a1, 524288
; RV32-NEXT: .LBB21_13: # %entry
; RV32-NEXT: neg a0, a0
-; RV32-NEXT: and a0, a0, a4
+; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
@@ -1657,24 +1657,24 @@ define i64 @stest_f16i64(half %x) {
; RV32-NEXT: call __extendhfsf2
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti
-; RV32-NEXT: lw a0, 16(sp)
-; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: lw a3, 8(sp)
; RV32-NEXT: lw a1, 12(sp)
-; RV32-NEXT: lw a4, 8(sp)
-; RV32-NEXT: lui a3, 524288
-; RV32-NEXT: addi a5, a3, -1
+; RV32-NEXT: lw a2, 16(sp)
+; RV32-NEXT: lw a4, 20(sp)
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: addi a5, a0, -1
; RV32-NEXT: beq a1, a5, .LBB24_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: or a7, a0, a2
+; RV32-NEXT: or a7, a2, a4
; RV32-NEXT: bnez a7, .LBB24_3
; RV32-NEXT: j .LBB24_4
; RV32-NEXT: .LBB24_2:
-; RV32-NEXT: sltiu a6, a4, -1
-; RV32-NEXT: or a7, a0, a2
+; RV32-NEXT: sltiu a6, a3, -1
+; RV32-NEXT: or a7, a2, a4
; RV32-NEXT: beqz a7, .LBB24_4
; RV32-NEXT: .LBB24_3: # %entry
-; RV32-NEXT: slti a6, a2, 0
+; RV32-NEXT: slti a6, a4, 0
; RV32-NEXT: .LBB24_4: # %entry
; RV32-NEXT: addi a7, a6, -1
; RV32-NEXT: neg t0, a6
@@ -1682,21 +1682,21 @@ define i64 @stest_f16i64(half %x) {
; RV32-NEXT: # %bb.5: # %entry
; RV32-NEXT: mv a1, a5
; RV32-NEXT: .LBB24_6: # %entry
-; RV32-NEXT: or a4, a7, a4
+; RV32-NEXT: or a3, a7, a3
+; RV32-NEXT: and a4, t0, a4
; RV32-NEXT: and a2, t0, a2
-; RV32-NEXT: and a5, t0, a0
-; RV32-NEXT: beq a1, a3, .LBB24_8
+; RV32-NEXT: beq a1, a0, .LBB24_8
; RV32-NEXT: # %bb.7: # %entry
-; RV32-NEXT: sltu a0, a3, a1
+; RV32-NEXT: sltu a0, a0, a1
; RV32-NEXT: j .LBB24_9
; RV32-NEXT: .LBB24_8:
-; RV32-NEXT: snez a0, a4
+; RV32-NEXT: snez a0, a3
; RV32-NEXT: .LBB24_9: # %entry
-; RV32-NEXT: and a5, a5, a2
-; RV32-NEXT: li a3, -1
-; RV32-NEXT: beq a5, a3, .LBB24_11
+; RV32-NEXT: and a2, a2, a4
+; RV32-NEXT: li a5, -1
+; RV32-NEXT: beq a2, a5, .LBB24_11
; RV32-NEXT: # %bb.10: # %entry
-; RV32-NEXT: slti a0, a2, 0
+; RV32-NEXT: slti a0, a4, 0
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: .LBB24_11: # %entry
; RV32-NEXT: bnez a0, .LBB24_13
@@ -1704,7 +1704,7 @@ define i64 @stest_f16i64(half %x) {
; RV32-NEXT: lui a1, 524288
; RV32-NEXT: .LBB24_13: # %entry
; RV32-NEXT: neg a0, a0
-; RV32-NEXT: and a0, a0, a4
+; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
@@ -2891,24 +2891,24 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IF-NEXT: mv a1, a0
; RV32IF-NEXT: addi a0, sp, 8
; RV32IF-NEXT: call __fixdfti
-; RV32IF-NEXT: lw a0, 16(sp)
-; RV32IF-NEXT: lw a2, 20(sp)
+; RV32IF-NEXT: lw a3, 8(sp)
; RV32IF-NEXT: lw a1, 12(sp)
-; RV32IF-NEXT: lw a4, 8(sp)
-; RV32IF-NEXT: lui a3, 524288
-; RV32IF-NEXT: addi a5, a3, -1
+; RV32IF-NEXT: lw a2, 16(sp)
+; RV32IF-NEXT: lw a4, 20(sp)
+; RV32IF-NEXT: lui a0, 524288
+; RV32IF-NEXT: addi a5, a0, -1
; RV32IF-NEXT: beq a1, a5, .LBB45_2
; RV32IF-NEXT: # %bb.1: # %entry
; RV32IF-NEXT: sltu a6, a1, a5
-; RV32IF-NEXT: or a7, a0, a2
+; RV32IF-NEXT: or a7, a2, a4
; RV32IF-NEXT: bnez a7, .LBB45_3
; RV32IF-NEXT: j .LBB45_4
; RV32IF-NEXT: .LBB45_2:
-; RV32IF-NEXT: sltiu a6, a4, -1
-; RV32IF-NEXT: or a7, a0, a2
+; RV32IF-NEXT: sltiu a6, a3, -1
+; RV32IF-NEXT: or a7, a2, a4
; RV32IF-NEXT: beqz a7, .LBB45_4
; RV32IF-NEXT: .LBB45_3: # %entry
-; RV32IF-NEXT: slti a6, a2, 0
+; RV32IF-NEXT: slti a6, a4, 0
; RV32IF-NEXT: .LBB45_4: # %entry
; RV32IF-NEXT: addi a7, a6, -1
; RV32IF-NEXT: neg t0, a6
@@ -2916,21 +2916,21 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IF-NEXT: # %bb.5: # %entry
; RV32IF-NEXT: mv a1, a5
; RV32IF-NEXT: .LBB45_6: # %entry
-; RV32IF-NEXT: or a4, a7, a4
+; RV32IF-NEXT: or a3, a7, a3
+; RV32IF-NEXT: and a4, t0, a4
; RV32IF-NEXT: and a2, t0, a2
-; RV32IF-NEXT: and a5, t0, a0
-; RV32IF-NEXT: beq a1, a3, .LBB45_8
+; RV32IF-NEXT: beq a1, a0, .LBB45_8
; RV32IF-NEXT: # %bb.7: # %entry
-; RV32IF-NEXT: sltu a0, a3, a1
+; RV32IF-NEXT: sltu a0, a0, a1
; RV32IF-NEXT: j .LBB45_9
; RV32IF-NEXT: .LBB45_8:
-; RV32IF-NEXT: snez a0, a4
+; RV32IF-NEXT: snez a0, a3
; RV32IF-NEXT: .LBB45_9: # %entry
-; RV32IF-NEXT: and a5, a5, a2
-; RV32IF-NEXT: li a3, -1
-; RV32IF-NEXT: beq a5, a3, .LBB45_11
+; RV32IF-NEXT: and a2, a2, a4
+; RV32IF-NEXT: li a5, -1
+; RV32IF-NEXT: beq a2, a5, .LBB45_11
; RV32IF-NEXT: # %bb.10: # %entry
-; RV32IF-NEXT: slti a0, a2, 0
+; RV32IF-NEXT: slti a0, a4, 0
; RV32IF-NEXT: xori a0, a0, 1
; RV32IF-NEXT: .LBB45_11: # %entry
; RV32IF-NEXT: bnez a0, .LBB45_13
@@ -2938,7 +2938,7 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IF-NEXT: lui a1, 524288
; RV32IF-NEXT: .LBB45_13: # %entry
; RV32IF-NEXT: neg a0, a0
-; RV32IF-NEXT: and a0, a0, a4
+; RV32IF-NEXT: and a0, a0, a3
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 32
; RV32IF-NEXT: ret
@@ -2990,24 +2990,24 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: .cfi_offset ra, -4
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call __fixdfti
-; RV32IFD-NEXT: lw a0, 16(sp)
-; RV32IFD-NEXT: lw a2, 20(sp)
+; RV32IFD-NEXT: lw a3, 8(sp)
; RV32IFD-NEXT: lw a1, 12(sp)
-; RV32IFD-NEXT: lw a4, 8(sp)
-; RV32IFD-NEXT: lui a3, 524288
-; RV32IFD-NEXT: addi a5, a3, -1
+; RV32IFD-NEXT: lw a2, 16(sp)
+; RV32IFD-NEXT: lw a4, 20(sp)
+; RV32IFD-NEXT: lui a0, 524288
+; RV32IFD-NEXT: addi a5, a0, -1
; RV32IFD-NEXT: beq a1, a5, .LBB45_2
; RV32IFD-NEXT: # %bb.1: # %entry
; RV32IFD-NEXT: sltu a6, a1, a5
-; RV32IFD-NEXT: or a7, a0, a2
+; RV32IFD-NEXT: or a7, a2, a4
; RV32IFD-NEXT: bnez a7, .LBB45_3
; RV32IFD-NEXT: j .LBB45_4
; RV32IFD-NEXT: .LBB45_2:
-; RV32IFD-NEXT: sltiu a6, a4, -1
-; RV32IFD-NEXT: or a7, a0, a2
+; RV32IFD-NEXT: sltiu a6, a3, -1
+; RV32IFD-NEXT: or a7, a2, a4
; RV32IFD-NEXT: beqz a7, .LBB45_4
; RV32IFD-NEXT: .LBB45_3: # %entry
-; RV32IFD-NEXT: slti a6, a2, 0
+; RV32IFD-NEXT: slti a6, a4, 0
; RV32IFD-NEXT: .LBB45_4: # %entry
; RV32IFD-NEXT: addi a7, a6, -1
; RV32IFD-NEXT: neg t0, a6
@@ -3015,21 +3015,21 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: # %bb.5: # %entry
; RV32IFD-NEXT: mv a1, a5
; RV32IFD-NEXT: .LBB45_6: # %entry
-; RV32IFD-NEXT: or a4, a7, a4
+; RV32IFD-NEXT: or a3, a7, a3
+; RV32IFD-NEXT: and a4, t0, a4
; RV32IFD-NEXT: and a2, t0, a2
-; RV32IFD-NEXT: and a5, t0, a0
-; RV32IFD-NEXT: beq a1, a3, .LBB45_8
+; RV32IFD-NEXT: beq a1, a0, .LBB45_8
; RV32IFD-NEXT: # %bb.7: # %entry
-; RV32IFD-NEXT: sltu a0, a3, a1
+; RV32IFD-NEXT: sltu a0, a0, a1
; RV32IFD-NEXT: j .LBB45_9
; RV32IFD-NEXT: .LBB45_8:
-; RV32IFD-NEXT: snez a0, a4
+; RV32IFD-NEXT: snez a0, a3
; RV32IFD-NEXT: .LBB45_9: # %entry
-; RV32IFD-NEXT: and a5, a5, a2
-; RV32IFD-NEXT: li a3, -1
-; RV32IFD-NEXT: beq a5, a3, .LBB45_11
+; RV32IFD-NEXT: and a2, a2, a4
+; RV32IFD-NEXT: li a5, -1
+; RV32IFD-NEXT: beq a2, a5, .LBB45_11
; RV32IFD-NEXT: # %bb.10: # %entry
-; RV32IFD-NEXT: slti a0, a2, 0
+; RV32IFD-NEXT: slti a0, a4, 0
; RV32IFD-NEXT: xori a0, a0, 1
; RV32IFD-NEXT: .LBB45_11: # %entry
; RV32IFD-NEXT: bnez a0, .LBB45_13
@@ -3037,7 +3037,7 @@ define i64 @stest_f64i64_mm(double %x) {
; RV32IFD-NEXT: lui a1, 524288
; RV32IFD-NEXT: .LBB45_13: # %entry
; RV32IFD-NEXT: neg a0, a0
-; RV32IFD-NEXT: and a0, a0, a4
+; RV32IFD-NEXT: and a0, a0, a3
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
@@ -3246,24 +3246,24 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti
-; RV32-NEXT: lw a0, 16(sp)
-; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: lw a3, 8(sp)
; RV32-NEXT: lw a1, 12(sp)
-; RV32-NEXT: lw a4, 8(sp)
-; RV32-NEXT: lui a3, 524288
-; RV32-NEXT: addi a5, a3, -1
+; RV32-NEXT: lw a2, 16(sp)
+; RV32-NEXT: lw a4, 20(sp)
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: addi a5, a0, -1
; RV32-NEXT: beq a1, a5, .LBB48_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: or a7, a0, a2
+; RV32-NEXT: or a7, a2, a4
; RV32-NEXT: bnez a7, .LBB48_3
; RV32-NEXT: j .LBB48_4
; RV32-NEXT: .LBB48_2:
-; RV32-NEXT: sltiu a6, a4, -1
-; RV32-NEXT: or a7, a0, a2
+; RV32-NEXT: sltiu a6, a3, -1
+; RV32-NEXT: or a7, a2, a4
; RV32-NEXT: beqz a7, .LBB48_4
; RV32-NEXT: .LBB48_3: # %entry
-; RV32-NEXT: slti a6, a2, 0
+; RV32-NEXT: slti a6, a4, 0
; RV32-NEXT: .LBB48_4: # %entry
; RV32-NEXT: addi a7, a6, -1
; RV32-NEXT: neg t0, a6
@@ -3271,21 +3271,21 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: # %bb.5: # %entry
; RV32-NEXT: mv a1, a5
; RV32-NEXT: .LBB48_6: # %entry
-; RV32-NEXT: or a4, a7, a4
+; RV32-NEXT: or a3, a7, a3
+; RV32-NEXT: and a4, t0, a4
; RV32-NEXT: and a2, t0, a2
-; RV32-NEXT: and a5, t0, a0
-; RV32-NEXT: beq a1, a3, .LBB48_8
+; RV32-NEXT: beq a1, a0, .LBB48_8
; RV32-NEXT: # %bb.7: # %entry
-; RV32-NEXT: sltu a0, a3, a1
+; RV32-NEXT: sltu a0, a0, a1
; RV32-NEXT: j .LBB48_9
; RV32-NEXT: .LBB48_8:
-; RV32-NEXT: snez a0, a4
+; RV32-NEXT: snez a0, a3
; RV32-NEXT: .LBB48_9: # %entry
-; RV32-NEXT: and a5, a5, a2
-; RV32-NEXT: li a3, -1
-; RV32-NEXT: beq a5, a3, .LBB48_11
+; RV32-NEXT: and a2, a2, a4
+; RV32-NEXT: li a5, -1
+; RV32-NEXT: beq a2, a5, .LBB48_11
; RV32-NEXT: # %bb.10: # %entry
-; RV32-NEXT: slti a0, a2, 0
+; RV32-NEXT: slti a0, a4, 0
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: .LBB48_11: # %entry
; RV32-NEXT: bnez a0, .LBB48_13
@@ -3293,7 +3293,7 @@ define i64 @stest_f32i64_mm(float %x) {
; RV32-NEXT: lui a1, 524288
; RV32-NEXT: .LBB48_13: # %entry
; RV32-NEXT: neg a0, a0
-; RV32-NEXT: and a0, a0, a4
+; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
@@ -3437,24 +3437,24 @@ define i64 @stest_f16i64_mm(half %x) {
; RV32-NEXT: call __extendhfsf2
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti
-; RV32-NEXT: lw a0, 16(sp)
-; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: lw a3, 8(sp)
; RV32-NEXT: lw a1, 12(sp)
-; RV32-NEXT: lw a4, 8(sp)
-; RV32-NEXT: lui a3, 524288
-; RV32-NEXT: addi a5, a3, -1
+; RV32-NEXT: lw a2, 16(sp)
+; RV32-NEXT: lw a4, 20(sp)
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: addi a5, a0, -1
; RV32-NEXT: beq a1, a5, .LBB51_2
; RV32-NEXT: # %bb.1: # %entry
; RV32-NEXT: sltu a6, a1, a5
-; RV32-NEXT: or a7, a0, a2
+; RV32-NEXT: or a7, a2, a4
; RV32-NEXT: bnez a7, .LBB51_3
; RV32-NEXT: j .LBB51_4
; RV32-NEXT: .LBB51_2:
-; RV32-NEXT: sltiu a6, a4, -1
-; RV32-NEXT: or a7, a0, a2
+; RV32-NEXT: sltiu a6, a3, -1
+; RV32-NEXT: or a7, a2, a4
; RV32-NEXT: beqz a7, .LBB51_4
; RV32-NEXT: .LBB51_3: # %entry
-; RV32-NEXT: slti a6, a2, 0
+; RV32-NEXT: slti a6, a4, 0
; RV32-NEXT: .LBB51_4: # %entry
; RV32-NEXT: addi a7, a6, -1
; RV32-NEXT: neg t0, a6
@@ -3462,21 +3462,21 @@ define i64 @stest_f16i64_mm(half %x) {
; RV32-NEXT: # %bb.5: # %entry
; RV32-NEXT: mv a1, a5
; RV32-NEXT: .LBB51_6: # %entry
-; RV32-NEXT: or a4, a7, a4
+; RV32-NEXT: or a3, a7, a3
+; RV32-NEXT: and a4, t0, a4
; RV32-NEXT: and a2, t0, a2
-; RV32-NEXT: and a5, t0, a0
-; RV32-NEXT: beq a1, a3, .LBB51_8
+; RV32-NEXT: beq a1, a0, .LBB51_8
; RV32-NEXT: # %bb.7: # %entry
-; RV32-NEXT: sltu a0, a3, a1
+; RV32-NEXT: sltu a0, a0, a1
; RV32-NEXT: j .LBB51_9
; RV32-NEXT: .LBB51_8:
-; RV32-NEXT: snez a0, a4
+; RV32-NEXT: snez a0, a3
; RV32-NEXT: .LBB51_9: # %entry
-; RV32-NEXT: and a5, a5, a2
-; RV32-NEXT: li a3, -1
-; RV32-NEXT: beq a5, a3, .LBB51_11
+; RV32-NEXT: and a2, a2, a4
+; RV32-NEXT: li a5, -1
+; RV32-NEXT: beq a2, a5, .LBB51_11
; RV32-NEXT: # %bb.10: # %entry
-; RV32-NEXT: slti a0, a2, 0
+; RV32-NEXT: slti a0, a4, 0
; RV32-NEXT: xori a0, a0, 1
; RV32-NEXT: .LBB51_11: # %entry
; RV32-NEXT: bnez a0, .LBB51_13
@@ -3484,7 +3484,7 @@ define i64 @stest_f16i64_mm(half %x) {
; RV32-NEXT: lui a1, 524288
; RV32-NEXT: .LBB51_13: # %entry
; RV32-NEXT: neg a0, a0
-; RV32-NEXT: and a0, a0, a4
+; RV32-NEXT: and a0, a0, a3
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/legalize-fneg.ll b/llvm/test/CodeGen/RISCV/legalize-fneg.ll
index 13d03c5217fb1b..dfd62e8d5f9f56 100644
--- a/llvm/test/CodeGen/RISCV/legalize-fneg.ll
+++ b/llvm/test/CodeGen/RISCV/legalize-fneg.ll
@@ -56,16 +56,16 @@ entry:
define void @test3(ptr %a, ptr %b) nounwind {
; RV32-LABEL: test3:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: lw a2, 4(a1)
-; RV32-NEXT: lw a3, 12(a1)
+; RV32-NEXT: lw a2, 12(a1)
+; RV32-NEXT: lw a3, 4(a1)
; RV32-NEXT: lw a4, 8(a1)
; RV32-NEXT: lw a1, 0(a1)
; RV32-NEXT: lui a5, 524288
-; RV32-NEXT: xor a3, a3, a5
+; RV32-NEXT: xor a2, a2, a5
; RV32-NEXT: sw a4, 8(a0)
; RV32-NEXT: sw a1, 0(a0)
-; RV32-NEXT: sw a2, 4(a0)
-; RV32-NEXT: sw a3, 12(a0)
+; RV32-NEXT: sw a3, 4(a0)
+; RV32-NEXT: sw a2, 12(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: test3:
diff --git a/llvm/test/CodeGen/RISCV/llvm.exp10.ll b/llvm/test/CodeGen/RISCV/llvm.exp10.ll
index 6fde86733b07f7..0941f6a73da280 100644
--- a/llvm/test/CodeGen/RISCV/llvm.exp10.ll
+++ b/llvm/test/CodeGen/RISCV/llvm.exp10.ll
@@ -222,32 +222,32 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) {
; RV64IFD-NEXT: .cfi_offset s1, -24
; RV64IFD-NEXT: .cfi_offset s2, -32
; RV64IFD-NEXT: .cfi_offset fs0, -40
-; RV64IFD-NEXT: lhu s1, 16(a1)
-; RV64IFD-NEXT: lhu s2, 0(a1)
-; RV64IFD-NEXT: lhu a1, 8(a1)
+; RV64IFD-NEXT: lhu s1, 0(a1)
+; RV64IFD-NEXT: lhu a2, 8(a1)
+; RV64IFD-NEXT: lhu s2, 16(a1)
; RV64IFD-NEXT: mv s0, a0
-; RV64IFD-NEXT: fmv.w.x fa0, a1
+; RV64IFD-NEXT: fmv.w.x fa0, a2
; RV64IFD-NEXT: call __extendhfsf2
; RV64IFD-NEXT: call exp10f
; RV64IFD-NEXT: call __truncsfhf2
; RV64IFD-NEXT: fmv.s fs0, fa0
-; RV64IFD-NEXT: fmv.w.x fa0, s2
+; RV64IFD-NEXT: fmv.w.x fa0, s1
; RV64IFD-NEXT: call __extendhfsf2
; RV64IFD-NEXT: call exp10f
; RV64IFD-NEXT: fmv.x.w a0, fs0
-; RV64IFD-NEXT: slli s2, a0, 16
+; RV64IFD-NEXT: slli s1, a0, 16
; RV64IFD-NEXT: call __truncsfhf2
; RV64IFD-NEXT: fmv.x.w a0, fa0
; RV64IFD-NEXT: slli a0, a0, 48
; RV64IFD-NEXT: srli a0, a0, 48
-; RV64IFD-NEXT: or s2, a0, s2
-; RV64IFD-NEXT: fmv.w.x fa0, s1
+; RV64IFD-NEXT: or s1, a0, s1
+; RV64IFD-NEXT: fmv.w.x fa0, s2
; RV64IFD-NEXT: call __extendhfsf2
; RV64IFD-NEXT: call exp10f
; RV64IFD-NEXT: call __truncsfhf2
; RV64IFD-NEXT: fmv.x.w a0, fa0
; RV64IFD-NEXT: sh a0, 4(s0)
-; RV64IFD-NEXT: sw s2, 0(s0)
+; RV64IFD-NEXT: sw s1, 0(s0)
; RV64IFD-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64IFD-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
@@ -349,27 +349,27 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) {
; RV64IFD-NEXT: .cfi_offset fs0, -48
; RV64IFD-NEXT: .cfi_offset fs1, -56
; RV64IFD-NEXT: .cfi_offset fs2, -64
-; RV64IFD-NEXT: lhu s1, 24(a1)
-; RV64IFD-NEXT: lhu s2, 0(a1)
-; RV64IFD-NEXT: lhu s3, 8(a1)
-; RV64IFD-NEXT: lhu a1, 16(a1)
+; RV64IFD-NEXT: lhu s1, 0(a1)
+; RV64IFD-NEXT: lhu s2, 8(a1)
+; RV64IFD-NEXT: lhu a2, 16(a1)
+; RV64IFD-NEXT: lhu s3, 24(a1)
; RV64IFD-NEXT: mv s0, a0
-; RV64IFD-NEXT: fmv.w.x fa0, a1
+; RV64IFD-NEXT: fmv.w.x fa0, a2
; RV64IFD-NEXT: call __extendhfsf2
; RV64IFD-NEXT: call exp10f
; RV64IFD-NEXT: call __truncsfhf2
; RV64IFD-NEXT: fmv.s fs0, fa0
-; RV64IFD-NEXT: fmv.w.x fa0, s3
+; RV64IFD-NEXT: fmv.w.x fa0, s2
; RV64IFD-NEXT: call __extendhfsf2
; RV64IFD-NEXT: call exp10f
; RV64IFD-NEXT: call __truncsfhf2
; RV64IFD-NEXT: fmv.s fs1, fa0
-; RV64IFD-NEXT: fmv.w.x fa0, s2
+; RV64IFD-NEXT: fmv.w.x fa0, s1
; RV64IFD-NEXT: call __extendhfsf2
; RV64IFD-NEXT: call exp10f
; RV64IFD-NEXT: call __truncsfhf2
; RV64IFD-NEXT: fmv.s fs2, fa0
-; RV64IFD-NEXT: fmv.w.x fa0, s1
+; RV64IFD-NEXT: fmv.w.x fa0, s3
; RV64IFD-NEXT: call __extendhfsf2
; RV64IFD-NEXT: call exp10f
; RV64IFD-NEXT: fmv.x.w s1, fs2
diff --git a/llvm/test/CodeGen/RISCV/llvm.frexp.ll b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
index 30f9dd1e516585..442b0cf5b4a856 100644
--- a/llvm/test/CodeGen/RISCV/llvm.frexp.ll
+++ b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
@@ -738,25 +738,25 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi
; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw s0, 12(a1)
-; RV32I-NEXT: lw s1, 8(a1)
-; RV32I-NEXT: lw s2, 4(a1)
; RV32I-NEXT: lw a2, 0(a1)
+; RV32I-NEXT: lw s0, 4(a1)
+; RV32I-NEXT: lw s1, 8(a1)
+; RV32I-NEXT: lw s2, 12(a1)
; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: call frexpf
; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: addi a1, sp, 12
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call frexpf
-; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call frexpf
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: addi a1, sp, 20
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call frexpf
; RV32I-NEXT: lw a1, 8(sp)
; RV32I-NEXT: lw a2, 12(sp)
@@ -764,7 +764,7 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi
; RV32I-NEXT: lw a4, 20(sp)
; RV32I-NEXT: sw a0, 12(s3)
; RV32I-NEXT: sw s1, 8(s3)
-; RV32I-NEXT: sw s2, 4(s3)
+; RV32I-NEXT: sw s0, 4(s3)
; RV32I-NEXT: sw s4, 0(s3)
; RV32I-NEXT: sw a4, 28(s3)
; RV32I-NEXT: sw a3, 24(s3)
@@ -788,25 +788,25 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi
; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lw s0, 24(a1)
-; RV64I-NEXT: lw s1, 16(a1)
-; RV64I-NEXT: lw s2, 8(a1)
; RV64I-NEXT: lw a2, 0(a1)
+; RV64I-NEXT: lw s0, 8(a1)
+; RV64I-NEXT: lw s1, 16(a1)
+; RV64I-NEXT: lw s2, 24(a1)
; RV64I-NEXT: mv s3, a0
; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: call frexpf
; RV64I-NEXT: mv s4, a0
; RV64I-NEXT: addi a1, sp, 4
-; RV64I-NEXT: mv a0, s2
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call frexpf
-; RV64I-NEXT: mv s2, a0
+; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: addi a1, sp, 8
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call frexpf
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: addi a1, sp, 12
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call frexpf
; RV64I-NEXT: lw a1, 0(sp)
; RV64I-NEXT: lw a2, 4(sp)
@@ -814,7 +814,7 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi
; RV64I-NEXT: lw a4, 12(sp)
; RV64I-NEXT: sw a0, 12(s3)
; RV64I-NEXT: sw s1, 8(s3)
-; RV64I-NEXT: sw s2, 4(s3)
+; RV64I-NEXT: sw s0, 4(s3)
; RV64I-NEXT: sw s4, 0(s3)
; RV64I-NEXT: sw a4, 28(s3)
; RV64I-NEXT: sw a3, 24(s3)
@@ -1006,29 +1006,29 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi
; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw s0, 12(a1)
-; RV32I-NEXT: lw s1, 8(a1)
-; RV32I-NEXT: lw s2, 4(a1)
; RV32I-NEXT: lw a2, 0(a1)
+; RV32I-NEXT: lw s0, 4(a1)
+; RV32I-NEXT: lw s1, 8(a1)
+; RV32I-NEXT: lw s2, 12(a1)
; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: call frexpf
; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: addi a1, sp, 12
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call frexpf
-; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: addi a1, sp, 16
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call frexpf
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: addi a1, sp, 20
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call frexpf
; RV32I-NEXT: sw a0, 12(s3)
; RV32I-NEXT: sw s1, 8(s3)
-; RV32I-NEXT: sw s2, 4(s3)
+; RV32I-NEXT: sw s0, 4(s3)
; RV32I-NEXT: sw s4, 0(s3)
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
@@ -1048,29 +1048,29 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi
; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lw s0, 24(a1)
-; RV64I-NEXT: lw s1, 16(a1)
-; RV64I-NEXT: lw s2, 8(a1)
; RV64I-NEXT: lw a2, 0(a1)
+; RV64I-NEXT: lw s0, 8(a1)
+; RV64I-NEXT: lw s1, 16(a1)
+; RV64I-NEXT: lw s2, 24(a1)
; RV64I-NEXT: mv s3, a0
; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: call frexpf
; RV64I-NEXT: mv s4, a0
; RV64I-NEXT: addi a1, sp, 4
-; RV64I-NEXT: mv a0, s2
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call frexpf
-; RV64I-NEXT: mv s2, a0
+; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: addi a1, sp, 8
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call frexpf
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: addi a1, sp, 12
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call frexpf
; RV64I-NEXT: sw a0, 12(s3)
; RV64I-NEXT: sw s1, 8(s3)
-; RV64I-NEXT: sw s2, 4(s3)
+; RV64I-NEXT: sw s0, 4(s3)
; RV64I-NEXT: sw s4, 0(s3)
; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
@@ -1254,22 +1254,22 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw s0, 12(a1)
-; RV32I-NEXT: lw s1, 8(a1)
-; RV32I-NEXT: lw s2, 4(a1)
; RV32I-NEXT: lw a2, 0(a1)
+; RV32I-NEXT: lw s0, 4(a1)
+; RV32I-NEXT: lw s1, 8(a1)
+; RV32I-NEXT: lw s2, 12(a1)
; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: addi a1, sp, 12
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: call frexpf
; RV32I-NEXT: addi a1, sp, 16
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call frexpf
; RV32I-NEXT: addi a1, sp, 20
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call frexpf
; RV32I-NEXT: addi a1, sp, 24
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call frexpf
; RV32I-NEXT: lw a0, 24(sp)
; RV32I-NEXT: lw a1, 20(sp)
@@ -1295,22 +1295,22 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lw s0, 24(a1)
-; RV64I-NEXT: lw s1, 16(a1)
-; RV64I-NEXT: lw s2, 8(a1)
; RV64I-NEXT: lw a2, 0(a1)
+; RV64I-NEXT: lw s0, 8(a1)
+; RV64I-NEXT: lw s1, 16(a1)
+; RV64I-NEXT: lw s2, 24(a1)
; RV64I-NEXT: mv s3, a0
; RV64I-NEXT: addi a1, sp, 8
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: call frexpf
; RV64I-NEXT: addi a1, sp, 12
-; RV64I-NEXT: mv a0, s2
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call frexpf
; RV64I-NEXT: addi a1, sp, 16
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call frexpf
; RV64I-NEXT: addi a1, sp, 20
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call frexpf
; RV64I-NEXT: lw a0, 20(sp)
; RV64I-NEXT: lw a1, 16(sp)
@@ -1584,16 +1584,16 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind {
; RV32IFD-NEXT: addi a2, sp, 36
; RV32IFD-NEXT: sw a3, 0(sp)
; RV32IFD-NEXT: call frexpl
-; RV32IFD-NEXT: lw a0, 36(sp)
+; RV32IFD-NEXT: lw a0, 24(sp)
; RV32IFD-NEXT: lw a1, 28(sp)
-; RV32IFD-NEXT: lw a2, 24(sp)
+; RV32IFD-NEXT: lw a2, 16(sp)
; RV32IFD-NEXT: lw a3, 20(sp)
-; RV32IFD-NEXT: lw a4, 16(sp)
+; RV32IFD-NEXT: lw a4, 36(sp)
; RV32IFD-NEXT: sw a1, 12(s0)
-; RV32IFD-NEXT: sw a2, 8(s0)
+; RV32IFD-NEXT: sw a0, 8(s0)
; RV32IFD-NEXT: sw a3, 4(s0)
-; RV32IFD-NEXT: sw a4, 0(s0)
-; RV32IFD-NEXT: sw a0, 16(s0)
+; RV32IFD-NEXT: sw a2, 0(s0)
+; RV32IFD-NEXT: sw a4, 16(s0)
; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 48
@@ -1637,16 +1637,16 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind {
; RV32IZFINXZDINX-NEXT: addi a2, sp, 36
; RV32IZFINXZDINX-NEXT: sw a3, 0(sp)
; RV32IZFINXZDINX-NEXT: call frexpl
-; RV32IZFINXZDINX-NEXT: lw a0, 36(sp)
+; RV32IZFINXZDINX-NEXT: lw a0, 24(sp)
; RV32IZFINXZDINX-NEXT: lw a1, 28(sp)
-; RV32IZFINXZDINX-NEXT: lw a2, 24(sp)
+; RV32IZFINXZDINX-NEXT: lw a2, 16(sp)
; RV32IZFINXZDINX-NEXT: lw a3, 20(sp)
-; RV32IZFINXZDINX-NEXT: lw a4, 16(sp)
+; RV32IZFINXZDINX-NEXT: lw a4, 36(sp)
; RV32IZFINXZDINX-NEXT: sw a1, 12(s0)
-; RV32IZFINXZDINX-NEXT: sw a2, 8(s0)
+; RV32IZFINXZDINX-NEXT: sw a0, 8(s0)
; RV32IZFINXZDINX-NEXT: sw a3, 4(s0)
-; RV32IZFINXZDINX-NEXT: sw a4, 0(s0)
-; RV32IZFINXZDINX-NEXT: sw a0, 16(s0)
+; RV32IZFINXZDINX-NEXT: sw a2, 0(s0)
+; RV32IZFINXZDINX-NEXT: sw a4, 16(s0)
; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32IZFINXZDINX-NEXT: addi sp, sp, 48
@@ -1690,16 +1690,16 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind {
; RV32I-NEXT: addi a2, sp, 36
; RV32I-NEXT: sw a3, 0(sp)
; RV32I-NEXT: call frexpl
-; RV32I-NEXT: lw a0, 36(sp)
+; RV32I-NEXT: lw a0, 24(sp)
; RV32I-NEXT: lw a1, 28(sp)
-; RV32I-NEXT: lw a2, 24(sp)
+; RV32I-NEXT: lw a2, 16(sp)
; RV32I-NEXT: lw a3, 20(sp)
-; RV32I-NEXT: lw a4, 16(sp)
+; RV32I-NEXT: lw a4, 36(sp)
; RV32I-NEXT: sw a1, 12(s0)
-; RV32I-NEXT: sw a2, 8(s0)
+; RV32I-NEXT: sw a0, 8(s0)
; RV32I-NEXT: sw a3, 4(s0)
-; RV32I-NEXT: sw a4, 0(s0)
-; RV32I-NEXT: sw a0, 16(s0)
+; RV32I-NEXT: sw a2, 0(s0)
+; RV32I-NEXT: sw a4, 16(s0)
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 48
diff --git a/llvm/test/CodeGen/RISCV/memcpy.ll b/llvm/test/CodeGen/RISCV/memcpy.ll
index 02f582339d0b78..41c27d83defe61 100644
--- a/llvm/test/CodeGen/RISCV/memcpy.ll
+++ b/llvm/test/CodeGen/RISCV/memcpy.ll
@@ -25,16 +25,16 @@ define i32 @t0() {
; RV32: # %bb.0: # %entry
; RV32-NEXT: lui a0, %hi(src)
; RV32-NEXT: lw a1, %lo(src)(a0)
-; RV32-NEXT: lui a2, %hi(dst)
-; RV32-NEXT: sw a1, %lo(dst)(a2)
; RV32-NEXT: addi a0, a0, %lo(src)
-; RV32-NEXT: lbu a1, 10(a0)
+; RV32-NEXT: lw a2, 4(a0)
; RV32-NEXT: lh a3, 8(a0)
-; RV32-NEXT: lw a0, 4(a0)
-; RV32-NEXT: addi a2, a2, %lo(dst)
-; RV32-NEXT: sb a1, 10(a2)
-; RV32-NEXT: sh a3, 8(a2)
-; RV32-NEXT: sw a0, 4(a2)
+; RV32-NEXT: lbu a0, 10(a0)
+; RV32-NEXT: lui a4, %hi(dst)
+; RV32-NEXT: sw a1, %lo(dst)(a4)
+; RV32-NEXT: addi a1, a4, %lo(dst)
+; RV32-NEXT: sb a0, 10(a1)
+; RV32-NEXT: sh a3, 8(a1)
+; RV32-NEXT: sw a2, 4(a1)
; RV32-NEXT: li a0, 0
; RV32-NEXT: ret
;
@@ -42,14 +42,14 @@ define i32 @t0() {
; RV64: # %bb.0: # %entry
; RV64-NEXT: lui a0, %hi(src)
; RV64-NEXT: ld a1, %lo(src)(a0)
-; RV64-NEXT: lui a2, %hi(dst)
; RV64-NEXT: addi a0, a0, %lo(src)
-; RV64-NEXT: lbu a3, 10(a0)
-; RV64-NEXT: lh a0, 8(a0)
-; RV64-NEXT: sd a1, %lo(dst)(a2)
-; RV64-NEXT: addi a1, a2, %lo(dst)
-; RV64-NEXT: sb a3, 10(a1)
-; RV64-NEXT: sh a0, 8(a1)
+; RV64-NEXT: lh a2, 8(a0)
+; RV64-NEXT: lbu a0, 10(a0)
+; RV64-NEXT: lui a3, %hi(dst)
+; RV64-NEXT: sd a1, %lo(dst)(a3)
+; RV64-NEXT: addi a1, a3, %lo(dst)
+; RV64-NEXT: sb a0, 10(a1)
+; RV64-NEXT: sh a2, 8(a1)
; RV64-NEXT: li a0, 0
; RV64-NEXT: ret
;
@@ -57,14 +57,14 @@ define i32 @t0() {
; RV32-FAST: # %bb.0: # %entry
; RV32-FAST-NEXT: lui a0, %hi(src)
; RV32-FAST-NEXT: lw a1, %lo(src)(a0)
-; RV32-FAST-NEXT: lui a2, %hi(dst)
; RV32-FAST-NEXT: addi a0, a0, %lo(src)
-; RV32-FAST-NEXT: lw a3, 7(a0)
-; RV32-FAST-NEXT: lw a0, 4(a0)
-; RV32-FAST-NEXT: sw a1, %lo(dst)(a2)
-; RV32-FAST-NEXT: addi a1, a2, %lo(dst)
-; RV32-FAST-NEXT: sw a3, 7(a1)
-; RV32-FAST-NEXT: sw a0, 4(a1)
+; RV32-FAST-NEXT: lw a2, 4(a0)
+; RV32-FAST-NEXT: lw a0, 7(a0)
+; RV32-FAST-NEXT: lui a3, %hi(dst)
+; RV32-FAST-NEXT: sw a1, %lo(dst)(a3)
+; RV32-FAST-NEXT: addi a1, a3, %lo(dst)
+; RV32-FAST-NEXT: sw a0, 7(a1)
+; RV32-FAST-NEXT: sw a2, 4(a1)
; RV32-FAST-NEXT: li a0, 0
; RV32-FAST-NEXT: ret
;
@@ -166,16 +166,16 @@ define void @t2(ptr nocapture %C) nounwind {
; RV64-FAST-NEXT: lui a1, %hi(.L.str2)
; RV64-FAST-NEXT: ld a2, %lo(.L.str2)(a1)
; RV64-FAST-NEXT: sd a2, 0(a0)
-; RV64-FAST-NEXT: lui a2, 1156
-; RV64-FAST-NEXT: addi a2, a2, 332
; RV64-FAST-NEXT: addi a1, a1, %lo(.L.str2)
-; RV64-FAST-NEXT: ld a3, 24(a1)
-; RV64-FAST-NEXT: ld a4, 16(a1)
-; RV64-FAST-NEXT: ld a1, 8(a1)
-; RV64-FAST-NEXT: sw a2, 32(a0)
-; RV64-FAST-NEXT: sd a3, 24(a0)
-; RV64-FAST-NEXT: sd a4, 16(a0)
-; RV64-FAST-NEXT: sd a1, 8(a0)
+; RV64-FAST-NEXT: ld a2, 8(a1)
+; RV64-FAST-NEXT: ld a3, 16(a1)
+; RV64-FAST-NEXT: ld a1, 24(a1)
+; RV64-FAST-NEXT: lui a4, 1156
+; RV64-FAST-NEXT: addi a4, a4, 332
+; RV64-FAST-NEXT: sw a4, 32(a0)
+; RV64-FAST-NEXT: sd a1, 24(a0)
+; RV64-FAST-NEXT: sd a3, 16(a0)
+; RV64-FAST-NEXT: sd a2, 8(a0)
; RV64-FAST-NEXT: ret
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str2, i64 36, i1 false)
diff --git a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
index db41b262718141..cf290a0b8682da 100644
--- a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
+++ b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
@@ -1,12 +1,14 @@
; REQUIRES: asserts
-; RUN: llc -mtriple=riscv32 -verify-misched -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: llc -mtriple=riscv32 -verify-misched -riscv-misched-load-clustering=false \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
-; RUN: llc -mtriple=riscv64 -verify-misched -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: llc -mtriple=riscv64 -verify-misched -riscv-misched-load-clustering=false \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
-; RUN: llc -mtriple=riscv32 -riscv-misched-load-clustering -verify-misched \
+; RUN: llc -mtriple=riscv32 -verify-misched \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=LDCLUSTER %s
-; RUN: llc -mtriple=riscv64 -riscv-misched-load-clustering -verify-misched \
+; RUN: llc -mtriple=riscv64 -verify-misched \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=LDCLUSTER %s
diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index 14f2777fdd06d2..e9b84b3cd97ed2 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -1351,48 +1351,48 @@ define i128 @muli128_m3840(i128 %a) nounwind {
; RV32IM-NEXT: addi sp, sp, -16
; RV32IM-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32IM-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 12(a1)
-; RV32IM-NEXT: lw a3, 8(a1)
-; RV32IM-NEXT: lw a4, 0(a1)
-; RV32IM-NEXT: lw a1, 4(a1)
+; RV32IM-NEXT: lw a2, 0(a1)
+; RV32IM-NEXT: lw a3, 4(a1)
+; RV32IM-NEXT: lw a4, 8(a1)
+; RV32IM-NEXT: lw a1, 12(a1)
; RV32IM-NEXT: li a5, -15
; RV32IM-NEXT: slli a5, a5, 8
-; RV32IM-NEXT: mulhu a6, a4, a5
-; RV32IM-NEXT: mul a7, a1, a5
+; RV32IM-NEXT: mulhu a6, a2, a5
+; RV32IM-NEXT: mul a7, a3, a5
; RV32IM-NEXT: add a6, a7, a6
; RV32IM-NEXT: sltu a7, a6, a7
-; RV32IM-NEXT: mulhu t0, a1, a5
+; RV32IM-NEXT: mulhu t0, a3, a5
; RV32IM-NEXT: add a7, t0, a7
-; RV32IM-NEXT: sub a6, a6, a4
-; RV32IM-NEXT: neg t0, a4
+; RV32IM-NEXT: sub a6, a6, a2
+; RV32IM-NEXT: neg t0, a2
; RV32IM-NEXT: sltu t1, a6, t0
; RV32IM-NEXT: li t2, -1
-; RV32IM-NEXT: mulhu t3, a4, t2
+; RV32IM-NEXT: mulhu t3, a2, t2
; RV32IM-NEXT: add t1, t3, t1
; RV32IM-NEXT: add t1, a7, t1
-; RV32IM-NEXT: sub t4, t1, a1
-; RV32IM-NEXT: mul t5, a3, a5
-; RV32IM-NEXT: sub t5, t5, a4
+; RV32IM-NEXT: sub t4, t1, a3
+; RV32IM-NEXT: mul t5, a4, a5
+; RV32IM-NEXT: sub t5, t5, a2
; RV32IM-NEXT: add t6, t4, t5
; RV32IM-NEXT: sltu s0, t6, t4
-; RV32IM-NEXT: neg s1, a1
+; RV32IM-NEXT: neg s1, a3
; RV32IM-NEXT: sltu t4, t4, s1
; RV32IM-NEXT: sltu a7, t1, a7
-; RV32IM-NEXT: mulhu t1, a1, t2
+; RV32IM-NEXT: mulhu t1, a3, t2
; RV32IM-NEXT: add a7, t1, a7
; RV32IM-NEXT: add a7, a7, t4
; RV32IM-NEXT: sltu t0, t5, t0
-; RV32IM-NEXT: mul a2, a2, a5
-; RV32IM-NEXT: mulhu t1, a3, a5
-; RV32IM-NEXT: sub a3, t1, a3
-; RV32IM-NEXT: add a2, a3, a2
+; RV32IM-NEXT: mul a1, a1, a5
+; RV32IM-NEXT: mulhu t1, a4, a5
+; RV32IM-NEXT: sub a4, t1, a4
; RV32IM-NEXT: add a1, a4, a1
-; RV32IM-NEXT: sub a1, t3, a1
-; RV32IM-NEXT: add a1, a1, a2
+; RV32IM-NEXT: add a3, a2, a3
+; RV32IM-NEXT: sub a3, t3, a3
+; RV32IM-NEXT: add a1, a3, a1
; RV32IM-NEXT: add a1, a1, t0
; RV32IM-NEXT: add a1, a7, a1
; RV32IM-NEXT: add a1, a1, s0
-; RV32IM-NEXT: mul a2, a4, a5
+; RV32IM-NEXT: mul a2, a2, a5
; RV32IM-NEXT: sw a2, 0(a0)
; RV32IM-NEXT: sw a6, 4(a0)
; RV32IM-NEXT: sw t6, 8(a0)
@@ -1436,39 +1436,39 @@ define i128 @muli128_m63(i128 %a) nounwind {
; RV32I-LABEL: muli128_m63:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a2, 0(a1)
-; RV32I-NEXT: lw a4, 12(a1)
+; RV32I-NEXT: lw a3, 4(a1)
; RV32I-NEXT: lw a6, 8(a1)
-; RV32I-NEXT: lw a1, 4(a1)
-; RV32I-NEXT: slli a3, a2, 6
-; RV32I-NEXT: sltu a5, a2, a3
+; RV32I-NEXT: lw a5, 12(a1)
+; RV32I-NEXT: slli a1, a2, 6
+; RV32I-NEXT: sltu a4, a2, a1
; RV32I-NEXT: srli a7, a2, 26
-; RV32I-NEXT: slli t0, a1, 6
+; RV32I-NEXT: slli t0, a3, 6
; RV32I-NEXT: or a7, t0, a7
-; RV32I-NEXT: mv t0, a5
-; RV32I-NEXT: beq a1, a7, .LBB37_2
+; RV32I-NEXT: mv t0, a4
+; RV32I-NEXT: beq a3, a7, .LBB37_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltu t0, a1, a7
+; RV32I-NEXT: sltu t0, a3, a7
; RV32I-NEXT: .LBB37_2:
-; RV32I-NEXT: srli t1, a1, 26
+; RV32I-NEXT: srli t1, a3, 26
; RV32I-NEXT: slli t2, a6, 6
; RV32I-NEXT: or t1, t2, t1
; RV32I-NEXT: sub t2, a6, t1
; RV32I-NEXT: sltu t3, t2, t0
; RV32I-NEXT: sltu t1, a6, t1
; RV32I-NEXT: srli a6, a6, 26
-; RV32I-NEXT: slli t4, a4, 6
+; RV32I-NEXT: slli t4, a5, 6
; RV32I-NEXT: or a6, t4, a6
-; RV32I-NEXT: sub a4, a4, a6
-; RV32I-NEXT: sub a4, a4, t1
-; RV32I-NEXT: sub a4, a4, t3
+; RV32I-NEXT: sub a5, a5, a6
+; RV32I-NEXT: sub a5, a5, t1
+; RV32I-NEXT: sub a5, a5, t3
; RV32I-NEXT: sub a6, t2, t0
-; RV32I-NEXT: sub a1, a1, a7
-; RV32I-NEXT: sub a1, a1, a5
-; RV32I-NEXT: sub a2, a2, a3
+; RV32I-NEXT: sub a3, a3, a7
+; RV32I-NEXT: sub a3, a3, a4
+; RV32I-NEXT: sub a2, a2, a1
; RV32I-NEXT: sw a2, 0(a0)
-; RV32I-NEXT: sw a1, 4(a0)
+; RV32I-NEXT: sw a3, 4(a0)
; RV32I-NEXT: sw a6, 8(a0)
-; RV32I-NEXT: sw a4, 12(a0)
+; RV32I-NEXT: sw a5, 12(a0)
; RV32I-NEXT: ret
;
; RV32IM-LABEL: muli128_m63:
@@ -1476,52 +1476,52 @@ define i128 @muli128_m63(i128 %a) nounwind {
; RV32IM-NEXT: addi sp, sp, -16
; RV32IM-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32IM-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw a2, 12(a1)
-; RV32IM-NEXT: lw a3, 0(a1)
-; RV32IM-NEXT: lw a4, 4(a1)
-; RV32IM-NEXT: lw a1, 8(a1)
+; RV32IM-NEXT: lw a2, 0(a1)
+; RV32IM-NEXT: lw a3, 4(a1)
+; RV32IM-NEXT: lw a4, 8(a1)
+; RV32IM-NEXT: lw a1, 12(a1)
; RV32IM-NEXT: li a5, -63
-; RV32IM-NEXT: mulhu a6, a3, a5
-; RV32IM-NEXT: slli a7, a4, 6
-; RV32IM-NEXT: sub a7, a4, a7
+; RV32IM-NEXT: mulhu a6, a2, a5
+; RV32IM-NEXT: slli a7, a3, 6
+; RV32IM-NEXT: sub a7, a3, a7
; RV32IM-NEXT: add a6, a7, a6
; RV32IM-NEXT: sltu a7, a6, a7
-; RV32IM-NEXT: mulhu t0, a4, a5
+; RV32IM-NEXT: mulhu t0, a3, a5
; RV32IM-NEXT: add a7, t0, a7
-; RV32IM-NEXT: sub a6, a6, a3
-; RV32IM-NEXT: neg t0, a3
+; RV32IM-NEXT: sub a6, a6, a2
+; RV32IM-NEXT: neg t0, a2
; RV32IM-NEXT: sltu t1, a6, t0
; RV32IM-NEXT: li t2, -1
-; RV32IM-NEXT: mulhu t3, a3, t2
+; RV32IM-NEXT: mulhu t3, a2, t2
; RV32IM-NEXT: add t1, t3, t1
; RV32IM-NEXT: add t1, a7, t1
-; RV32IM-NEXT: sub t4, t1, a4
-; RV32IM-NEXT: slli t5, a1, 6
-; RV32IM-NEXT: sub t6, a1, a3
+; RV32IM-NEXT: sub t4, t1, a3
+; RV32IM-NEXT: slli t5, a4, 6
+; RV32IM-NEXT: sub t6, a4, a2
; RV32IM-NEXT: sub t5, t6, t5
; RV32IM-NEXT: add t6, t4, t5
; RV32IM-NEXT: sltu s0, t6, t4
-; RV32IM-NEXT: neg s1, a4
+; RV32IM-NEXT: neg s1, a3
; RV32IM-NEXT: sltu t4, t4, s1
; RV32IM-NEXT: sltu a7, t1, a7
-; RV32IM-NEXT: mulhu t1, a4, t2
+; RV32IM-NEXT: mulhu t1, a3, t2
; RV32IM-NEXT: add a7, t1, a7
; RV32IM-NEXT: add a7, a7, t4
; RV32IM-NEXT: sltu t0, t5, t0
-; RV32IM-NEXT: slli t1, a2, 6
-; RV32IM-NEXT: sub a2, a2, t1
-; RV32IM-NEXT: mulhu a5, a1, a5
-; RV32IM-NEXT: sub a5, a5, a1
-; RV32IM-NEXT: add a2, a5, a2
-; RV32IM-NEXT: add a4, a3, a4
-; RV32IM-NEXT: sub a1, t3, a4
-; RV32IM-NEXT: add a1, a1, a2
+; RV32IM-NEXT: slli t1, a1, 6
+; RV32IM-NEXT: sub a1, a1, t1
+; RV32IM-NEXT: mulhu a5, a4, a5
+; RV32IM-NEXT: sub a5, a5, a4
+; RV32IM-NEXT: add a1, a5, a1
+; RV32IM-NEXT: add a3, a2, a3
+; RV32IM-NEXT: sub a3, t3, a3
+; RV32IM-NEXT: add a1, a3, a1
; RV32IM-NEXT: add a1, a1, t0
; RV32IM-NEXT: add a1, a7, a1
; RV32IM-NEXT: add a1, a1, s0
-; RV32IM-NEXT: slli a2, a3, 6
-; RV32IM-NEXT: sub a3, a3, a2
-; RV32IM-NEXT: sw a3, 0(a0)
+; RV32IM-NEXT: slli a3, a2, 6
+; RV32IM-NEXT: sub a2, a2, a3
+; RV32IM-NEXT: sw a2, 0(a0)
; RV32IM-NEXT: sw a6, 4(a0)
; RV32IM-NEXT: sw t6, 8(a0)
; RV32IM-NEXT: sw a1, 12(a0)
diff --git a/llvm/test/CodeGen/RISCV/nontemporal.ll b/llvm/test/CodeGen/RISCV/nontemporal.ll
index 4c5c36fc72d14d..55bd32e4857345 100644
--- a/llvm/test/CodeGen/RISCV/nontemporal.ll
+++ b/llvm/test/CodeGen/RISCV/nontemporal.ll
@@ -915,30 +915,30 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64-NEXT: lbu a7, 40(a1)
; CHECK-RV64-NEXT: lbu t0, 48(a1)
; CHECK-RV64-NEXT: lbu t1, 56(a1)
-; CHECK-RV64-NEXT: lbu t2, 64(a1)
-; CHECK-RV64-NEXT: lbu t3, 72(a1)
-; CHECK-RV64-NEXT: lbu t4, 80(a1)
-; CHECK-RV64-NEXT: lbu t5, 88(a1)
-; CHECK-RV64-NEXT: lbu t6, 120(a1)
-; CHECK-RV64-NEXT: lbu s0, 112(a1)
-; CHECK-RV64-NEXT: lbu s1, 104(a1)
-; CHECK-RV64-NEXT: lbu a1, 96(a1)
+; CHECK-RV64-NEXT: lbu t2, 96(a1)
+; CHECK-RV64-NEXT: lbu t3, 104(a1)
+; CHECK-RV64-NEXT: lbu t4, 112(a1)
+; CHECK-RV64-NEXT: lbu t5, 120(a1)
+; CHECK-RV64-NEXT: lbu t6, 64(a1)
+; CHECK-RV64-NEXT: lbu s0, 72(a1)
+; CHECK-RV64-NEXT: lbu s1, 80(a1)
+; CHECK-RV64-NEXT: lbu a1, 88(a1)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t6, 15(a0)
+; CHECK-RV64-NEXT: sb t5, 15(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb s0, 14(a0)
+; CHECK-RV64-NEXT: sb t4, 14(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb s1, 13(a0)
+; CHECK-RV64-NEXT: sb t3, 13(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb a1, 12(a0)
+; CHECK-RV64-NEXT: sb t2, 12(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t5, 11(a0)
+; CHECK-RV64-NEXT: sb a1, 11(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t4, 10(a0)
+; CHECK-RV64-NEXT: sb s1, 10(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t3, 9(a0)
+; CHECK-RV64-NEXT: sb s0, 9(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t2, 8(a0)
+; CHECK-RV64-NEXT: sb t6, 8(a0)
; CHECK-RV64-NEXT: ntl.all
; CHECK-RV64-NEXT: sb t1, 7(a0)
; CHECK-RV64-NEXT: ntl.all
@@ -976,30 +976,30 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32-NEXT: lbu a7, 20(a1)
; CHECK-RV32-NEXT: lbu t0, 24(a1)
; CHECK-RV32-NEXT: lbu t1, 28(a1)
-; CHECK-RV32-NEXT: lbu t2, 32(a1)
-; CHECK-RV32-NEXT: lbu t3, 36(a1)
-; CHECK-RV32-NEXT: lbu t4, 40(a1)
-; CHECK-RV32-NEXT: lbu t5, 44(a1)
-; CHECK-RV32-NEXT: lbu t6, 60(a1)
-; CHECK-RV32-NEXT: lbu s0, 56(a1)
-; CHECK-RV32-NEXT: lbu s1, 52(a1)
-; CHECK-RV32-NEXT: lbu a1, 48(a1)
+; CHECK-RV32-NEXT: lbu t2, 48(a1)
+; CHECK-RV32-NEXT: lbu t3, 52(a1)
+; CHECK-RV32-NEXT: lbu t4, 56(a1)
+; CHECK-RV32-NEXT: lbu t5, 60(a1)
+; CHECK-RV32-NEXT: lbu t6, 32(a1)
+; CHECK-RV32-NEXT: lbu s0, 36(a1)
+; CHECK-RV32-NEXT: lbu s1, 40(a1)
+; CHECK-RV32-NEXT: lbu a1, 44(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t6, 15(a0)
+; CHECK-RV32-NEXT: sb t5, 15(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb s0, 14(a0)
+; CHECK-RV32-NEXT: sb t4, 14(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb s1, 13(a0)
+; CHECK-RV32-NEXT: sb t3, 13(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb a1, 12(a0)
+; CHECK-RV32-NEXT: sb t2, 12(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t5, 11(a0)
+; CHECK-RV32-NEXT: sb a1, 11(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t4, 10(a0)
+; CHECK-RV32-NEXT: sb s1, 10(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t3, 9(a0)
+; CHECK-RV32-NEXT: sb s0, 9(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t2, 8(a0)
+; CHECK-RV32-NEXT: sb t6, 8(a0)
; CHECK-RV32-NEXT: ntl.all
; CHECK-RV32-NEXT: sb t1, 7(a0)
; CHECK-RV32-NEXT: ntl.all
@@ -1037,28 +1037,28 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64C-NEXT: lbu t3, 40(a1)
; CHECK-RV64C-NEXT: lbu t4, 48(a1)
; CHECK-RV64C-NEXT: lbu t5, 56(a1)
+; CHECK-RV64C-NEXT: lbu a2, 96(a1)
+; CHECK-RV64C-NEXT: lbu a3, 104(a1)
+; CHECK-RV64C-NEXT: lbu a4, 112(a1)
+; CHECK-RV64C-NEXT: lbu a5, 120(a1)
; CHECK-RV64C-NEXT: lbu t6, 64(a1)
-; CHECK-RV64C-NEXT: lbu a3, 72(a1)
-; CHECK-RV64C-NEXT: lbu a4, 80(a1)
-; CHECK-RV64C-NEXT: lbu a5, 88(a1)
-; CHECK-RV64C-NEXT: lbu a2, 120(a1)
-; CHECK-RV64C-NEXT: lbu s0, 112(a1)
-; CHECK-RV64C-NEXT: lbu s1, 104(a1)
-; CHECK-RV64C-NEXT: lbu a1, 96(a1)
+; CHECK-RV64C-NEXT: lbu s0, 72(a1)
+; CHECK-RV64C-NEXT: lbu s1, 80(a1)
+; CHECK-RV64C-NEXT: lbu a1, 88(a1)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a2, 15(a0)
+; CHECK-RV64C-NEXT: sb a5, 15(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb s0, 14(a0)
+; CHECK-RV64C-NEXT: sb a4, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb s1, 13(a0)
+; CHECK-RV64C-NEXT: sb a3, 13(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a1, 12(a0)
+; CHECK-RV64C-NEXT: sb a2, 12(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a5, 11(a0)
+; CHECK-RV64C-NEXT: sb a1, 11(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a4, 10(a0)
+; CHECK-RV64C-NEXT: sb s1, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a3, 9(a0)
+; CHECK-RV64C-NEXT: sb s0, 9(a0)
; CHECK-RV64C-NEXT: c.ntl.all
; CHECK-RV64C-NEXT: sb t6, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.all
@@ -1098,28 +1098,28 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32C-NEXT: lbu t3, 20(a1)
; CHECK-RV32C-NEXT: lbu t4, 24(a1)
; CHECK-RV32C-NEXT: lbu t5, 28(a1)
+; CHECK-RV32C-NEXT: lbu a2, 48(a1)
+; CHECK-RV32C-NEXT: lbu a3, 52(a1)
+; CHECK-RV32C-NEXT: lbu a4, 56(a1)
+; CHECK-RV32C-NEXT: lbu a5, 60(a1)
; CHECK-RV32C-NEXT: lbu t6, 32(a1)
-; CHECK-RV32C-NEXT: lbu a3, 36(a1)
-; CHECK-RV32C-NEXT: lbu a4, 40(a1)
-; CHECK-RV32C-NEXT: lbu a5, 44(a1)
-; CHECK-RV32C-NEXT: lbu a2, 60(a1)
-; CHECK-RV32C-NEXT: lbu s0, 56(a1)
-; CHECK-RV32C-NEXT: lbu s1, 52(a1)
-; CHECK-RV32C-NEXT: lbu a1, 48(a1)
+; CHECK-RV32C-NEXT: lbu s0, 36(a1)
+; CHECK-RV32C-NEXT: lbu s1, 40(a1)
+; CHECK-RV32C-NEXT: lbu a1, 44(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a2, 15(a0)
+; CHECK-RV32C-NEXT: sb a5, 15(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb s0, 14(a0)
+; CHECK-RV32C-NEXT: sb a4, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb s1, 13(a0)
+; CHECK-RV32C-NEXT: sb a3, 13(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a1, 12(a0)
+; CHECK-RV32C-NEXT: sb a2, 12(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a5, 11(a0)
+; CHECK-RV32C-NEXT: sb a1, 11(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a4, 10(a0)
+; CHECK-RV32C-NEXT: sb s1, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a3, 9(a0)
+; CHECK-RV32C-NEXT: sb s0, 9(a0)
; CHECK-RV32C-NEXT: c.ntl.all
; CHECK-RV32C-NEXT: sb t6, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.all
@@ -1163,112 +1163,112 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) {
define void @test_nontemporal_store_v8i16(ptr %p, <8 x i16> %v) {
; CHECK-RV64-LABEL: test_nontemporal_store_v8i16:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lh a2, 0(a1)
-; CHECK-RV64-NEXT: lh a3, 8(a1)
-; CHECK-RV64-NEXT: lh a4, 16(a1)
-; CHECK-RV64-NEXT: lh a5, 24(a1)
-; CHECK-RV64-NEXT: lh a6, 56(a1)
-; CHECK-RV64-NEXT: lh a7, 48(a1)
-; CHECK-RV64-NEXT: lh t0, 40(a1)
-; CHECK-RV64-NEXT: lh a1, 32(a1)
+; CHECK-RV64-NEXT: lh a2, 32(a1)
+; CHECK-RV64-NEXT: lh a3, 40(a1)
+; CHECK-RV64-NEXT: lh a4, 48(a1)
+; CHECK-RV64-NEXT: lh a5, 56(a1)
+; CHECK-RV64-NEXT: lh a6, 0(a1)
+; CHECK-RV64-NEXT: lh a7, 8(a1)
+; CHECK-RV64-NEXT: lh t0, 16(a1)
+; CHECK-RV64-NEXT: lh a1, 24(a1)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a6, 14(a0)
+; CHECK-RV64-NEXT: sh a5, 14(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a7, 12(a0)
+; CHECK-RV64-NEXT: sh a4, 12(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh t0, 10(a0)
+; CHECK-RV64-NEXT: sh a3, 10(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a1, 8(a0)
+; CHECK-RV64-NEXT: sh a2, 8(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a5, 6(a0)
+; CHECK-RV64-NEXT: sh a1, 6(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a4, 4(a0)
+; CHECK-RV64-NEXT: sh t0, 4(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a3, 2(a0)
+; CHECK-RV64-NEXT: sh a7, 2(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a2, 0(a0)
+; CHECK-RV64-NEXT: sh a6, 0(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_store_v8i16:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lh a2, 0(a1)
-; CHECK-RV32-NEXT: lh a3, 4(a1)
-; CHECK-RV32-NEXT: lh a4, 8(a1)
-; CHECK-RV32-NEXT: lh a5, 12(a1)
-; CHECK-RV32-NEXT: lh a6, 28(a1)
-; CHECK-RV32-NEXT: lh a7, 24(a1)
-; CHECK-RV32-NEXT: lh t0, 20(a1)
-; CHECK-RV32-NEXT: lh a1, 16(a1)
+; CHECK-RV32-NEXT: lh a2, 16(a1)
+; CHECK-RV32-NEXT: lh a3, 20(a1)
+; CHECK-RV32-NEXT: lh a4, 24(a1)
+; CHECK-RV32-NEXT: lh a5, 28(a1)
+; CHECK-RV32-NEXT: lh a6, 0(a1)
+; CHECK-RV32-NEXT: lh a7, 4(a1)
+; CHECK-RV32-NEXT: lh t0, 8(a1)
+; CHECK-RV32-NEXT: lh a1, 12(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a6, 14(a0)
+; CHECK-RV32-NEXT: sh a5, 14(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a7, 12(a0)
+; CHECK-RV32-NEXT: sh a4, 12(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh t0, 10(a0)
+; CHECK-RV32-NEXT: sh a3, 10(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a1, 8(a0)
+; CHECK-RV32-NEXT: sh a2, 8(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a5, 6(a0)
+; CHECK-RV32-NEXT: sh a1, 6(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a4, 4(a0)
+; CHECK-RV32-NEXT: sh t0, 4(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a3, 2(a0)
+; CHECK-RV32-NEXT: sh a7, 2(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a2, 0(a0)
+; CHECK-RV32-NEXT: sh a6, 0(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_store_v8i16:
; CHECK-RV64C: # %bb.0:
+; CHECK-RV64C-NEXT: lh a7, 32(a1)
+; CHECK-RV64C-NEXT: lh a3, 40(a1)
+; CHECK-RV64C-NEXT: lh a4, 48(a1)
+; CHECK-RV64C-NEXT: lh a5, 56(a1)
; CHECK-RV64C-NEXT: lh a6, 0(a1)
-; CHECK-RV64C-NEXT: lh a7, 8(a1)
-; CHECK-RV64C-NEXT: lh t0, 16(a1)
-; CHECK-RV64C-NEXT: lh a5, 24(a1)
-; CHECK-RV64C-NEXT: lh a2, 56(a1)
-; CHECK-RV64C-NEXT: lh a3, 48(a1)
-; CHECK-RV64C-NEXT: lh a4, 40(a1)
-; CHECK-RV64C-NEXT: lh a1, 32(a1)
+; CHECK-RV64C-NEXT: lh t0, 8(a1)
+; CHECK-RV64C-NEXT: lh a2, 16(a1)
+; CHECK-RV64C-NEXT: lh a1, 24(a1)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a2, 14(a0)
+; CHECK-RV64C-NEXT: sh a5, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a3, 12(a0)
+; CHECK-RV64C-NEXT: sh a4, 12(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a4, 10(a0)
+; CHECK-RV64C-NEXT: sh a3, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a1, 8(a0)
+; CHECK-RV64C-NEXT: sh a7, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a5, 6(a0)
+; CHECK-RV64C-NEXT: sh a1, 6(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh t0, 4(a0)
+; CHECK-RV64C-NEXT: sh a2, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a7, 2(a0)
+; CHECK-RV64C-NEXT: sh t0, 2(a0)
; CHECK-RV64C-NEXT: c.ntl.all
; CHECK-RV64C-NEXT: sh a6, 0(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_store_v8i16:
; CHECK-RV32C: # %bb.0:
+; CHECK-RV32C-NEXT: lh a7, 16(a1)
+; CHECK-RV32C-NEXT: lh a3, 20(a1)
+; CHECK-RV32C-NEXT: lh a4, 24(a1)
+; CHECK-RV32C-NEXT: lh a5, 28(a1)
; CHECK-RV32C-NEXT: lh a6, 0(a1)
-; CHECK-RV32C-NEXT: lh a7, 4(a1)
-; CHECK-RV32C-NEXT: lh t0, 8(a1)
-; CHECK-RV32C-NEXT: lh a5, 12(a1)
-; CHECK-RV32C-NEXT: lh a2, 28(a1)
-; CHECK-RV32C-NEXT: lh a3, 24(a1)
-; CHECK-RV32C-NEXT: lh a4, 20(a1)
-; CHECK-RV32C-NEXT: lh a1, 16(a1)
+; CHECK-RV32C-NEXT: lh t0, 4(a1)
+; CHECK-RV32C-NEXT: lh a2, 8(a1)
+; CHECK-RV32C-NEXT: lh a1, 12(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a2, 14(a0)
+; CHECK-RV32C-NEXT: sh a5, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a3, 12(a0)
+; CHECK-RV32C-NEXT: sh a4, 12(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a4, 10(a0)
+; CHECK-RV32C-NEXT: sh a3, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a1, 8(a0)
+; CHECK-RV32C-NEXT: sh a7, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a5, 6(a0)
+; CHECK-RV32C-NEXT: sh a1, 6(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh t0, 4(a0)
+; CHECK-RV32C-NEXT: sh a2, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a7, 2(a0)
+; CHECK-RV32C-NEXT: sh t0, 2(a0)
; CHECK-RV32C-NEXT: c.ntl.all
; CHECK-RV32C-NEXT: sh a6, 0(a0)
; CHECK-RV32C-NEXT: ret
@@ -2329,30 +2329,30 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64-NEXT: lbu a7, 40(a1)
; CHECK-RV64-NEXT: lbu t0, 48(a1)
; CHECK-RV64-NEXT: lbu t1, 56(a1)
-; CHECK-RV64-NEXT: lbu t2, 64(a1)
-; CHECK-RV64-NEXT: lbu t3, 72(a1)
-; CHECK-RV64-NEXT: lbu t4, 80(a1)
-; CHECK-RV64-NEXT: lbu t5, 88(a1)
-; CHECK-RV64-NEXT: lbu t6, 120(a1)
-; CHECK-RV64-NEXT: lbu s0, 112(a1)
-; CHECK-RV64-NEXT: lbu s1, 104(a1)
-; CHECK-RV64-NEXT: lbu a1, 96(a1)
+; CHECK-RV64-NEXT: lbu t2, 96(a1)
+; CHECK-RV64-NEXT: lbu t3, 104(a1)
+; CHECK-RV64-NEXT: lbu t4, 112(a1)
+; CHECK-RV64-NEXT: lbu t5, 120(a1)
+; CHECK-RV64-NEXT: lbu t6, 64(a1)
+; CHECK-RV64-NEXT: lbu s0, 72(a1)
+; CHECK-RV64-NEXT: lbu s1, 80(a1)
+; CHECK-RV64-NEXT: lbu a1, 88(a1)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb t6, 15(a0)
+; CHECK-RV64-NEXT: sb t5, 15(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb s0, 14(a0)
+; CHECK-RV64-NEXT: sb t4, 14(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb s1, 13(a0)
+; CHECK-RV64-NEXT: sb t3, 13(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb a1, 12(a0)
+; CHECK-RV64-NEXT: sb t2, 12(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb t5, 11(a0)
+; CHECK-RV64-NEXT: sb a1, 11(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb t4, 10(a0)
+; CHECK-RV64-NEXT: sb s1, 10(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb t3, 9(a0)
+; CHECK-RV64-NEXT: sb s0, 9(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sb t2, 8(a0)
+; CHECK-RV64-NEXT: sb t6, 8(a0)
; CHECK-RV64-NEXT: ntl.p1
; CHECK-RV64-NEXT: sb t1, 7(a0)
; CHECK-RV64-NEXT: ntl.p1
@@ -2390,30 +2390,30 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32-NEXT: lbu a7, 20(a1)
; CHECK-RV32-NEXT: lbu t0, 24(a1)
; CHECK-RV32-NEXT: lbu t1, 28(a1)
-; CHECK-RV32-NEXT: lbu t2, 32(a1)
-; CHECK-RV32-NEXT: lbu t3, 36(a1)
-; CHECK-RV32-NEXT: lbu t4, 40(a1)
-; CHECK-RV32-NEXT: lbu t5, 44(a1)
-; CHECK-RV32-NEXT: lbu t6, 60(a1)
-; CHECK-RV32-NEXT: lbu s0, 56(a1)
-; CHECK-RV32-NEXT: lbu s1, 52(a1)
-; CHECK-RV32-NEXT: lbu a1, 48(a1)
+; CHECK-RV32-NEXT: lbu t2, 48(a1)
+; CHECK-RV32-NEXT: lbu t3, 52(a1)
+; CHECK-RV32-NEXT: lbu t4, 56(a1)
+; CHECK-RV32-NEXT: lbu t5, 60(a1)
+; CHECK-RV32-NEXT: lbu t6, 32(a1)
+; CHECK-RV32-NEXT: lbu s0, 36(a1)
+; CHECK-RV32-NEXT: lbu s1, 40(a1)
+; CHECK-RV32-NEXT: lbu a1, 44(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb t6, 15(a0)
+; CHECK-RV32-NEXT: sb t5, 15(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb s0, 14(a0)
+; CHECK-RV32-NEXT: sb t4, 14(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb s1, 13(a0)
+; CHECK-RV32-NEXT: sb t3, 13(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb a1, 12(a0)
+; CHECK-RV32-NEXT: sb t2, 12(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb t5, 11(a0)
+; CHECK-RV32-NEXT: sb a1, 11(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb t4, 10(a0)
+; CHECK-RV32-NEXT: sb s1, 10(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb t3, 9(a0)
+; CHECK-RV32-NEXT: sb s0, 9(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sb t2, 8(a0)
+; CHECK-RV32-NEXT: sb t6, 8(a0)
; CHECK-RV32-NEXT: ntl.p1
; CHECK-RV32-NEXT: sb t1, 7(a0)
; CHECK-RV32-NEXT: ntl.p1
@@ -2451,28 +2451,28 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64C-NEXT: lbu t3, 40(a1)
; CHECK-RV64C-NEXT: lbu t4, 48(a1)
; CHECK-RV64C-NEXT: lbu t5, 56(a1)
+; CHECK-RV64C-NEXT: lbu a2, 96(a1)
+; CHECK-RV64C-NEXT: lbu a3, 104(a1)
+; CHECK-RV64C-NEXT: lbu a4, 112(a1)
+; CHECK-RV64C-NEXT: lbu a5, 120(a1)
; CHECK-RV64C-NEXT: lbu t6, 64(a1)
-; CHECK-RV64C-NEXT: lbu a3, 72(a1)
-; CHECK-RV64C-NEXT: lbu a4, 80(a1)
-; CHECK-RV64C-NEXT: lbu a5, 88(a1)
-; CHECK-RV64C-NEXT: lbu a2, 120(a1)
-; CHECK-RV64C-NEXT: lbu s0, 112(a1)
-; CHECK-RV64C-NEXT: lbu s1, 104(a1)
-; CHECK-RV64C-NEXT: lbu a1, 96(a1)
+; CHECK-RV64C-NEXT: lbu s0, 72(a1)
+; CHECK-RV64C-NEXT: lbu s1, 80(a1)
+; CHECK-RV64C-NEXT: lbu a1, 88(a1)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb a2, 15(a0)
+; CHECK-RV64C-NEXT: sb a5, 15(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb s0, 14(a0)
+; CHECK-RV64C-NEXT: sb a4, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb s1, 13(a0)
+; CHECK-RV64C-NEXT: sb a3, 13(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb a1, 12(a0)
+; CHECK-RV64C-NEXT: sb a2, 12(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb a5, 11(a0)
+; CHECK-RV64C-NEXT: sb a1, 11(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb a4, 10(a0)
+; CHECK-RV64C-NEXT: sb s1, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sb a3, 9(a0)
+; CHECK-RV64C-NEXT: sb s0, 9(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
; CHECK-RV64C-NEXT: sb t6, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
@@ -2512,28 +2512,28 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32C-NEXT: lbu t3, 20(a1)
; CHECK-RV32C-NEXT: lbu t4, 24(a1)
; CHECK-RV32C-NEXT: lbu t5, 28(a1)
+; CHECK-RV32C-NEXT: lbu a2, 48(a1)
+; CHECK-RV32C-NEXT: lbu a3, 52(a1)
+; CHECK-RV32C-NEXT: lbu a4, 56(a1)
+; CHECK-RV32C-NEXT: lbu a5, 60(a1)
; CHECK-RV32C-NEXT: lbu t6, 32(a1)
-; CHECK-RV32C-NEXT: lbu a3, 36(a1)
-; CHECK-RV32C-NEXT: lbu a4, 40(a1)
-; CHECK-RV32C-NEXT: lbu a5, 44(a1)
-; CHECK-RV32C-NEXT: lbu a2, 60(a1)
-; CHECK-RV32C-NEXT: lbu s0, 56(a1)
-; CHECK-RV32C-NEXT: lbu s1, 52(a1)
-; CHECK-RV32C-NEXT: lbu a1, 48(a1)
+; CHECK-RV32C-NEXT: lbu s0, 36(a1)
+; CHECK-RV32C-NEXT: lbu s1, 40(a1)
+; CHECK-RV32C-NEXT: lbu a1, 44(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb a2, 15(a0)
+; CHECK-RV32C-NEXT: sb a5, 15(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb s0, 14(a0)
+; CHECK-RV32C-NEXT: sb a4, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb s1, 13(a0)
+; CHECK-RV32C-NEXT: sb a3, 13(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb a1, 12(a0)
+; CHECK-RV32C-NEXT: sb a2, 12(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb a5, 11(a0)
+; CHECK-RV32C-NEXT: sb a1, 11(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb a4, 10(a0)
+; CHECK-RV32C-NEXT: sb s1, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sb a3, 9(a0)
+; CHECK-RV32C-NEXT: sb s0, 9(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
; CHECK-RV32C-NEXT: sb t6, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
@@ -2577,112 +2577,112 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) {
define void @test_nontemporal_P1_store_v8i16(ptr %p, <8 x i16> %v) {
; CHECK-RV64-LABEL: test_nontemporal_P1_store_v8i16:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lh a2, 0(a1)
-; CHECK-RV64-NEXT: lh a3, 8(a1)
-; CHECK-RV64-NEXT: lh a4, 16(a1)
-; CHECK-RV64-NEXT: lh a5, 24(a1)
-; CHECK-RV64-NEXT: lh a6, 56(a1)
-; CHECK-RV64-NEXT: lh a7, 48(a1)
-; CHECK-RV64-NEXT: lh t0, 40(a1)
-; CHECK-RV64-NEXT: lh a1, 32(a1)
+; CHECK-RV64-NEXT: lh a2, 32(a1)
+; CHECK-RV64-NEXT: lh a3, 40(a1)
+; CHECK-RV64-NEXT: lh a4, 48(a1)
+; CHECK-RV64-NEXT: lh a5, 56(a1)
+; CHECK-RV64-NEXT: lh a6, 0(a1)
+; CHECK-RV64-NEXT: lh a7, 8(a1)
+; CHECK-RV64-NEXT: lh t0, 16(a1)
+; CHECK-RV64-NEXT: lh a1, 24(a1)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sh a6, 14(a0)
+; CHECK-RV64-NEXT: sh a5, 14(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sh a7, 12(a0)
+; CHECK-RV64-NEXT: sh a4, 12(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sh t0, 10(a0)
+; CHECK-RV64-NEXT: sh a3, 10(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sh a1, 8(a0)
+; CHECK-RV64-NEXT: sh a2, 8(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sh a5, 6(a0)
+; CHECK-RV64-NEXT: sh a1, 6(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sh a4, 4(a0)
+; CHECK-RV64-NEXT: sh t0, 4(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sh a3, 2(a0)
+; CHECK-RV64-NEXT: sh a7, 2(a0)
; CHECK-RV64-NEXT: ntl.p1
-; CHECK-RV64-NEXT: sh a2, 0(a0)
+; CHECK-RV64-NEXT: sh a6, 0(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_P1_store_v8i16:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lh a2, 0(a1)
-; CHECK-RV32-NEXT: lh a3, 4(a1)
-; CHECK-RV32-NEXT: lh a4, 8(a1)
-; CHECK-RV32-NEXT: lh a5, 12(a1)
-; CHECK-RV32-NEXT: lh a6, 28(a1)
-; CHECK-RV32-NEXT: lh a7, 24(a1)
-; CHECK-RV32-NEXT: lh t0, 20(a1)
-; CHECK-RV32-NEXT: lh a1, 16(a1)
+; CHECK-RV32-NEXT: lh a2, 16(a1)
+; CHECK-RV32-NEXT: lh a3, 20(a1)
+; CHECK-RV32-NEXT: lh a4, 24(a1)
+; CHECK-RV32-NEXT: lh a5, 28(a1)
+; CHECK-RV32-NEXT: lh a6, 0(a1)
+; CHECK-RV32-NEXT: lh a7, 4(a1)
+; CHECK-RV32-NEXT: lh t0, 8(a1)
+; CHECK-RV32-NEXT: lh a1, 12(a1)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sh a6, 14(a0)
+; CHECK-RV32-NEXT: sh a5, 14(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sh a7, 12(a0)
+; CHECK-RV32-NEXT: sh a4, 12(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sh t0, 10(a0)
+; CHECK-RV32-NEXT: sh a3, 10(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sh a1, 8(a0)
+; CHECK-RV32-NEXT: sh a2, 8(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sh a5, 6(a0)
+; CHECK-RV32-NEXT: sh a1, 6(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sh a4, 4(a0)
+; CHECK-RV32-NEXT: sh t0, 4(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sh a3, 2(a0)
+; CHECK-RV32-NEXT: sh a7, 2(a0)
; CHECK-RV32-NEXT: ntl.p1
-; CHECK-RV32-NEXT: sh a2, 0(a0)
+; CHECK-RV32-NEXT: sh a6, 0(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_P1_store_v8i16:
; CHECK-RV64C: # %bb.0:
+; CHECK-RV64C-NEXT: lh a7, 32(a1)
+; CHECK-RV64C-NEXT: lh a3, 40(a1)
+; CHECK-RV64C-NEXT: lh a4, 48(a1)
+; CHECK-RV64C-NEXT: lh a5, 56(a1)
; CHECK-RV64C-NEXT: lh a6, 0(a1)
-; CHECK-RV64C-NEXT: lh a7, 8(a1)
-; CHECK-RV64C-NEXT: lh t0, 16(a1)
-; CHECK-RV64C-NEXT: lh a5, 24(a1)
-; CHECK-RV64C-NEXT: lh a2, 56(a1)
-; CHECK-RV64C-NEXT: lh a3, 48(a1)
-; CHECK-RV64C-NEXT: lh a4, 40(a1)
-; CHECK-RV64C-NEXT: lh a1, 32(a1)
+; CHECK-RV64C-NEXT: lh t0, 8(a1)
+; CHECK-RV64C-NEXT: lh a2, 16(a1)
+; CHECK-RV64C-NEXT: lh a1, 24(a1)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sh a2, 14(a0)
+; CHECK-RV64C-NEXT: sh a5, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sh a3, 12(a0)
+; CHECK-RV64C-NEXT: sh a4, 12(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sh a4, 10(a0)
+; CHECK-RV64C-NEXT: sh a3, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sh a1, 8(a0)
+; CHECK-RV64C-NEXT: sh a7, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sh a5, 6(a0)
+; CHECK-RV64C-NEXT: sh a1, 6(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sh t0, 4(a0)
+; CHECK-RV64C-NEXT: sh a2, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
-; CHECK-RV64C-NEXT: sh a7, 2(a0)
+; CHECK-RV64C-NEXT: sh t0, 2(a0)
; CHECK-RV64C-NEXT: c.ntl.p1
; CHECK-RV64C-NEXT: sh a6, 0(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_P1_store_v8i16:
; CHECK-RV32C: # %bb.0:
+; CHECK-RV32C-NEXT: lh a7, 16(a1)
+; CHECK-RV32C-NEXT: lh a3, 20(a1)
+; CHECK-RV32C-NEXT: lh a4, 24(a1)
+; CHECK-RV32C-NEXT: lh a5, 28(a1)
; CHECK-RV32C-NEXT: lh a6, 0(a1)
-; CHECK-RV32C-NEXT: lh a7, 4(a1)
-; CHECK-RV32C-NEXT: lh t0, 8(a1)
-; CHECK-RV32C-NEXT: lh a5, 12(a1)
-; CHECK-RV32C-NEXT: lh a2, 28(a1)
-; CHECK-RV32C-NEXT: lh a3, 24(a1)
-; CHECK-RV32C-NEXT: lh a4, 20(a1)
-; CHECK-RV32C-NEXT: lh a1, 16(a1)
+; CHECK-RV32C-NEXT: lh t0, 4(a1)
+; CHECK-RV32C-NEXT: lh a2, 8(a1)
+; CHECK-RV32C-NEXT: lh a1, 12(a1)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sh a2, 14(a0)
+; CHECK-RV32C-NEXT: sh a5, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sh a3, 12(a0)
+; CHECK-RV32C-NEXT: sh a4, 12(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sh a4, 10(a0)
+; CHECK-RV32C-NEXT: sh a3, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sh a1, 8(a0)
+; CHECK-RV32C-NEXT: sh a7, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sh a5, 6(a0)
+; CHECK-RV32C-NEXT: sh a1, 6(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sh t0, 4(a0)
+; CHECK-RV32C-NEXT: sh a2, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
-; CHECK-RV32C-NEXT: sh a7, 2(a0)
+; CHECK-RV32C-NEXT: sh t0, 2(a0)
; CHECK-RV32C-NEXT: c.ntl.p1
; CHECK-RV32C-NEXT: sh a6, 0(a0)
; CHECK-RV32C-NEXT: ret
@@ -3743,30 +3743,30 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64-NEXT: lbu a7, 40(a1)
; CHECK-RV64-NEXT: lbu t0, 48(a1)
; CHECK-RV64-NEXT: lbu t1, 56(a1)
-; CHECK-RV64-NEXT: lbu t2, 64(a1)
-; CHECK-RV64-NEXT: lbu t3, 72(a1)
-; CHECK-RV64-NEXT: lbu t4, 80(a1)
-; CHECK-RV64-NEXT: lbu t5, 88(a1)
-; CHECK-RV64-NEXT: lbu t6, 120(a1)
-; CHECK-RV64-NEXT: lbu s0, 112(a1)
-; CHECK-RV64-NEXT: lbu s1, 104(a1)
-; CHECK-RV64-NEXT: lbu a1, 96(a1)
+; CHECK-RV64-NEXT: lbu t2, 96(a1)
+; CHECK-RV64-NEXT: lbu t3, 104(a1)
+; CHECK-RV64-NEXT: lbu t4, 112(a1)
+; CHECK-RV64-NEXT: lbu t5, 120(a1)
+; CHECK-RV64-NEXT: lbu t6, 64(a1)
+; CHECK-RV64-NEXT: lbu s0, 72(a1)
+; CHECK-RV64-NEXT: lbu s1, 80(a1)
+; CHECK-RV64-NEXT: lbu a1, 88(a1)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb t6, 15(a0)
+; CHECK-RV64-NEXT: sb t5, 15(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb s0, 14(a0)
+; CHECK-RV64-NEXT: sb t4, 14(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb s1, 13(a0)
+; CHECK-RV64-NEXT: sb t3, 13(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb a1, 12(a0)
+; CHECK-RV64-NEXT: sb t2, 12(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb t5, 11(a0)
+; CHECK-RV64-NEXT: sb a1, 11(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb t4, 10(a0)
+; CHECK-RV64-NEXT: sb s1, 10(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb t3, 9(a0)
+; CHECK-RV64-NEXT: sb s0, 9(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sb t2, 8(a0)
+; CHECK-RV64-NEXT: sb t6, 8(a0)
; CHECK-RV64-NEXT: ntl.pall
; CHECK-RV64-NEXT: sb t1, 7(a0)
; CHECK-RV64-NEXT: ntl.pall
@@ -3804,30 +3804,30 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32-NEXT: lbu a7, 20(a1)
; CHECK-RV32-NEXT: lbu t0, 24(a1)
; CHECK-RV32-NEXT: lbu t1, 28(a1)
-; CHECK-RV32-NEXT: lbu t2, 32(a1)
-; CHECK-RV32-NEXT: lbu t3, 36(a1)
-; CHECK-RV32-NEXT: lbu t4, 40(a1)
-; CHECK-RV32-NEXT: lbu t5, 44(a1)
-; CHECK-RV32-NEXT: lbu t6, 60(a1)
-; CHECK-RV32-NEXT: lbu s0, 56(a1)
-; CHECK-RV32-NEXT: lbu s1, 52(a1)
-; CHECK-RV32-NEXT: lbu a1, 48(a1)
+; CHECK-RV32-NEXT: lbu t2, 48(a1)
+; CHECK-RV32-NEXT: lbu t3, 52(a1)
+; CHECK-RV32-NEXT: lbu t4, 56(a1)
+; CHECK-RV32-NEXT: lbu t5, 60(a1)
+; CHECK-RV32-NEXT: lbu t6, 32(a1)
+; CHECK-RV32-NEXT: lbu s0, 36(a1)
+; CHECK-RV32-NEXT: lbu s1, 40(a1)
+; CHECK-RV32-NEXT: lbu a1, 44(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb t6, 15(a0)
+; CHECK-RV32-NEXT: sb t5, 15(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb s0, 14(a0)
+; CHECK-RV32-NEXT: sb t4, 14(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb s1, 13(a0)
+; CHECK-RV32-NEXT: sb t3, 13(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb a1, 12(a0)
+; CHECK-RV32-NEXT: sb t2, 12(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb t5, 11(a0)
+; CHECK-RV32-NEXT: sb a1, 11(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb t4, 10(a0)
+; CHECK-RV32-NEXT: sb s1, 10(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb t3, 9(a0)
+; CHECK-RV32-NEXT: sb s0, 9(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sb t2, 8(a0)
+; CHECK-RV32-NEXT: sb t6, 8(a0)
; CHECK-RV32-NEXT: ntl.pall
; CHECK-RV32-NEXT: sb t1, 7(a0)
; CHECK-RV32-NEXT: ntl.pall
@@ -3865,28 +3865,28 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64C-NEXT: lbu t3, 40(a1)
; CHECK-RV64C-NEXT: lbu t4, 48(a1)
; CHECK-RV64C-NEXT: lbu t5, 56(a1)
+; CHECK-RV64C-NEXT: lbu a2, 96(a1)
+; CHECK-RV64C-NEXT: lbu a3, 104(a1)
+; CHECK-RV64C-NEXT: lbu a4, 112(a1)
+; CHECK-RV64C-NEXT: lbu a5, 120(a1)
; CHECK-RV64C-NEXT: lbu t6, 64(a1)
-; CHECK-RV64C-NEXT: lbu a3, 72(a1)
-; CHECK-RV64C-NEXT: lbu a4, 80(a1)
-; CHECK-RV64C-NEXT: lbu a5, 88(a1)
-; CHECK-RV64C-NEXT: lbu a2, 120(a1)
-; CHECK-RV64C-NEXT: lbu s0, 112(a1)
-; CHECK-RV64C-NEXT: lbu s1, 104(a1)
-; CHECK-RV64C-NEXT: lbu a1, 96(a1)
+; CHECK-RV64C-NEXT: lbu s0, 72(a1)
+; CHECK-RV64C-NEXT: lbu s1, 80(a1)
+; CHECK-RV64C-NEXT: lbu a1, 88(a1)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb a2, 15(a0)
+; CHECK-RV64C-NEXT: sb a5, 15(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb s0, 14(a0)
+; CHECK-RV64C-NEXT: sb a4, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb s1, 13(a0)
+; CHECK-RV64C-NEXT: sb a3, 13(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb a1, 12(a0)
+; CHECK-RV64C-NEXT: sb a2, 12(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb a5, 11(a0)
+; CHECK-RV64C-NEXT: sb a1, 11(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb a4, 10(a0)
+; CHECK-RV64C-NEXT: sb s1, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sb a3, 9(a0)
+; CHECK-RV64C-NEXT: sb s0, 9(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
; CHECK-RV64C-NEXT: sb t6, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
@@ -3926,28 +3926,28 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32C-NEXT: lbu t3, 20(a1)
; CHECK-RV32C-NEXT: lbu t4, 24(a1)
; CHECK-RV32C-NEXT: lbu t5, 28(a1)
+; CHECK-RV32C-NEXT: lbu a2, 48(a1)
+; CHECK-RV32C-NEXT: lbu a3, 52(a1)
+; CHECK-RV32C-NEXT: lbu a4, 56(a1)
+; CHECK-RV32C-NEXT: lbu a5, 60(a1)
; CHECK-RV32C-NEXT: lbu t6, 32(a1)
-; CHECK-RV32C-NEXT: lbu a3, 36(a1)
-; CHECK-RV32C-NEXT: lbu a4, 40(a1)
-; CHECK-RV32C-NEXT: lbu a5, 44(a1)
-; CHECK-RV32C-NEXT: lbu a2, 60(a1)
-; CHECK-RV32C-NEXT: lbu s0, 56(a1)
-; CHECK-RV32C-NEXT: lbu s1, 52(a1)
-; CHECK-RV32C-NEXT: lbu a1, 48(a1)
+; CHECK-RV32C-NEXT: lbu s0, 36(a1)
+; CHECK-RV32C-NEXT: lbu s1, 40(a1)
+; CHECK-RV32C-NEXT: lbu a1, 44(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb a2, 15(a0)
+; CHECK-RV32C-NEXT: sb a5, 15(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb s0, 14(a0)
+; CHECK-RV32C-NEXT: sb a4, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb s1, 13(a0)
+; CHECK-RV32C-NEXT: sb a3, 13(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb a1, 12(a0)
+; CHECK-RV32C-NEXT: sb a2, 12(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb a5, 11(a0)
+; CHECK-RV32C-NEXT: sb a1, 11(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb a4, 10(a0)
+; CHECK-RV32C-NEXT: sb s1, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sb a3, 9(a0)
+; CHECK-RV32C-NEXT: sb s0, 9(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
; CHECK-RV32C-NEXT: sb t6, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
@@ -3991,112 +3991,112 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) {
define void @test_nontemporal_PALL_store_v8i16(ptr %p, <8 x i16> %v) {
; CHECK-RV64-LABEL: test_nontemporal_PALL_store_v8i16:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lh a2, 0(a1)
-; CHECK-RV64-NEXT: lh a3, 8(a1)
-; CHECK-RV64-NEXT: lh a4, 16(a1)
-; CHECK-RV64-NEXT: lh a5, 24(a1)
-; CHECK-RV64-NEXT: lh a6, 56(a1)
-; CHECK-RV64-NEXT: lh a7, 48(a1)
-; CHECK-RV64-NEXT: lh t0, 40(a1)
-; CHECK-RV64-NEXT: lh a1, 32(a1)
+; CHECK-RV64-NEXT: lh a2, 32(a1)
+; CHECK-RV64-NEXT: lh a3, 40(a1)
+; CHECK-RV64-NEXT: lh a4, 48(a1)
+; CHECK-RV64-NEXT: lh a5, 56(a1)
+; CHECK-RV64-NEXT: lh a6, 0(a1)
+; CHECK-RV64-NEXT: lh a7, 8(a1)
+; CHECK-RV64-NEXT: lh t0, 16(a1)
+; CHECK-RV64-NEXT: lh a1, 24(a1)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sh a6, 14(a0)
+; CHECK-RV64-NEXT: sh a5, 14(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sh a7, 12(a0)
+; CHECK-RV64-NEXT: sh a4, 12(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sh t0, 10(a0)
+; CHECK-RV64-NEXT: sh a3, 10(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sh a1, 8(a0)
+; CHECK-RV64-NEXT: sh a2, 8(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sh a5, 6(a0)
+; CHECK-RV64-NEXT: sh a1, 6(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sh a4, 4(a0)
+; CHECK-RV64-NEXT: sh t0, 4(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sh a3, 2(a0)
+; CHECK-RV64-NEXT: sh a7, 2(a0)
; CHECK-RV64-NEXT: ntl.pall
-; CHECK-RV64-NEXT: sh a2, 0(a0)
+; CHECK-RV64-NEXT: sh a6, 0(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_PALL_store_v8i16:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lh a2, 0(a1)
-; CHECK-RV32-NEXT: lh a3, 4(a1)
-; CHECK-RV32-NEXT: lh a4, 8(a1)
-; CHECK-RV32-NEXT: lh a5, 12(a1)
-; CHECK-RV32-NEXT: lh a6, 28(a1)
-; CHECK-RV32-NEXT: lh a7, 24(a1)
-; CHECK-RV32-NEXT: lh t0, 20(a1)
-; CHECK-RV32-NEXT: lh a1, 16(a1)
+; CHECK-RV32-NEXT: lh a2, 16(a1)
+; CHECK-RV32-NEXT: lh a3, 20(a1)
+; CHECK-RV32-NEXT: lh a4, 24(a1)
+; CHECK-RV32-NEXT: lh a5, 28(a1)
+; CHECK-RV32-NEXT: lh a6, 0(a1)
+; CHECK-RV32-NEXT: lh a7, 4(a1)
+; CHECK-RV32-NEXT: lh t0, 8(a1)
+; CHECK-RV32-NEXT: lh a1, 12(a1)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sh a6, 14(a0)
+; CHECK-RV32-NEXT: sh a5, 14(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sh a7, 12(a0)
+; CHECK-RV32-NEXT: sh a4, 12(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sh t0, 10(a0)
+; CHECK-RV32-NEXT: sh a3, 10(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sh a1, 8(a0)
+; CHECK-RV32-NEXT: sh a2, 8(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sh a5, 6(a0)
+; CHECK-RV32-NEXT: sh a1, 6(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sh a4, 4(a0)
+; CHECK-RV32-NEXT: sh t0, 4(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sh a3, 2(a0)
+; CHECK-RV32-NEXT: sh a7, 2(a0)
; CHECK-RV32-NEXT: ntl.pall
-; CHECK-RV32-NEXT: sh a2, 0(a0)
+; CHECK-RV32-NEXT: sh a6, 0(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_v8i16:
; CHECK-RV64C: # %bb.0:
+; CHECK-RV64C-NEXT: lh a7, 32(a1)
+; CHECK-RV64C-NEXT: lh a3, 40(a1)
+; CHECK-RV64C-NEXT: lh a4, 48(a1)
+; CHECK-RV64C-NEXT: lh a5, 56(a1)
; CHECK-RV64C-NEXT: lh a6, 0(a1)
-; CHECK-RV64C-NEXT: lh a7, 8(a1)
-; CHECK-RV64C-NEXT: lh t0, 16(a1)
-; CHECK-RV64C-NEXT: lh a5, 24(a1)
-; CHECK-RV64C-NEXT: lh a2, 56(a1)
-; CHECK-RV64C-NEXT: lh a3, 48(a1)
-; CHECK-RV64C-NEXT: lh a4, 40(a1)
-; CHECK-RV64C-NEXT: lh a1, 32(a1)
+; CHECK-RV64C-NEXT: lh t0, 8(a1)
+; CHECK-RV64C-NEXT: lh a2, 16(a1)
+; CHECK-RV64C-NEXT: lh a1, 24(a1)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sh a2, 14(a0)
+; CHECK-RV64C-NEXT: sh a5, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sh a3, 12(a0)
+; CHECK-RV64C-NEXT: sh a4, 12(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sh a4, 10(a0)
+; CHECK-RV64C-NEXT: sh a3, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sh a1, 8(a0)
+; CHECK-RV64C-NEXT: sh a7, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sh a5, 6(a0)
+; CHECK-RV64C-NEXT: sh a1, 6(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sh t0, 4(a0)
+; CHECK-RV64C-NEXT: sh a2, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
-; CHECK-RV64C-NEXT: sh a7, 2(a0)
+; CHECK-RV64C-NEXT: sh t0, 2(a0)
; CHECK-RV64C-NEXT: c.ntl.pall
; CHECK-RV64C-NEXT: sh a6, 0(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_v8i16:
; CHECK-RV32C: # %bb.0:
+; CHECK-RV32C-NEXT: lh a7, 16(a1)
+; CHECK-RV32C-NEXT: lh a3, 20(a1)
+; CHECK-RV32C-NEXT: lh a4, 24(a1)
+; CHECK-RV32C-NEXT: lh a5, 28(a1)
; CHECK-RV32C-NEXT: lh a6, 0(a1)
-; CHECK-RV32C-NEXT: lh a7, 4(a1)
-; CHECK-RV32C-NEXT: lh t0, 8(a1)
-; CHECK-RV32C-NEXT: lh a5, 12(a1)
-; CHECK-RV32C-NEXT: lh a2, 28(a1)
-; CHECK-RV32C-NEXT: lh a3, 24(a1)
-; CHECK-RV32C-NEXT: lh a4, 20(a1)
-; CHECK-RV32C-NEXT: lh a1, 16(a1)
+; CHECK-RV32C-NEXT: lh t0, 4(a1)
+; CHECK-RV32C-NEXT: lh a2, 8(a1)
+; CHECK-RV32C-NEXT: lh a1, 12(a1)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sh a2, 14(a0)
+; CHECK-RV32C-NEXT: sh a5, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sh a3, 12(a0)
+; CHECK-RV32C-NEXT: sh a4, 12(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sh a4, 10(a0)
+; CHECK-RV32C-NEXT: sh a3, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sh a1, 8(a0)
+; CHECK-RV32C-NEXT: sh a7, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sh a5, 6(a0)
+; CHECK-RV32C-NEXT: sh a1, 6(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sh t0, 4(a0)
+; CHECK-RV32C-NEXT: sh a2, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
-; CHECK-RV32C-NEXT: sh a7, 2(a0)
+; CHECK-RV32C-NEXT: sh t0, 2(a0)
; CHECK-RV32C-NEXT: c.ntl.pall
; CHECK-RV32C-NEXT: sh a6, 0(a0)
; CHECK-RV32C-NEXT: ret
@@ -5157,30 +5157,30 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64-NEXT: lbu a7, 40(a1)
; CHECK-RV64-NEXT: lbu t0, 48(a1)
; CHECK-RV64-NEXT: lbu t1, 56(a1)
-; CHECK-RV64-NEXT: lbu t2, 64(a1)
-; CHECK-RV64-NEXT: lbu t3, 72(a1)
-; CHECK-RV64-NEXT: lbu t4, 80(a1)
-; CHECK-RV64-NEXT: lbu t5, 88(a1)
-; CHECK-RV64-NEXT: lbu t6, 120(a1)
-; CHECK-RV64-NEXT: lbu s0, 112(a1)
-; CHECK-RV64-NEXT: lbu s1, 104(a1)
-; CHECK-RV64-NEXT: lbu a1, 96(a1)
+; CHECK-RV64-NEXT: lbu t2, 96(a1)
+; CHECK-RV64-NEXT: lbu t3, 104(a1)
+; CHECK-RV64-NEXT: lbu t4, 112(a1)
+; CHECK-RV64-NEXT: lbu t5, 120(a1)
+; CHECK-RV64-NEXT: lbu t6, 64(a1)
+; CHECK-RV64-NEXT: lbu s0, 72(a1)
+; CHECK-RV64-NEXT: lbu s1, 80(a1)
+; CHECK-RV64-NEXT: lbu a1, 88(a1)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb t6, 15(a0)
+; CHECK-RV64-NEXT: sb t5, 15(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb s0, 14(a0)
+; CHECK-RV64-NEXT: sb t4, 14(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb s1, 13(a0)
+; CHECK-RV64-NEXT: sb t3, 13(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb a1, 12(a0)
+; CHECK-RV64-NEXT: sb t2, 12(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb t5, 11(a0)
+; CHECK-RV64-NEXT: sb a1, 11(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb t4, 10(a0)
+; CHECK-RV64-NEXT: sb s1, 10(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb t3, 9(a0)
+; CHECK-RV64-NEXT: sb s0, 9(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sb t2, 8(a0)
+; CHECK-RV64-NEXT: sb t6, 8(a0)
; CHECK-RV64-NEXT: ntl.s1
; CHECK-RV64-NEXT: sb t1, 7(a0)
; CHECK-RV64-NEXT: ntl.s1
@@ -5218,30 +5218,30 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32-NEXT: lbu a7, 20(a1)
; CHECK-RV32-NEXT: lbu t0, 24(a1)
; CHECK-RV32-NEXT: lbu t1, 28(a1)
-; CHECK-RV32-NEXT: lbu t2, 32(a1)
-; CHECK-RV32-NEXT: lbu t3, 36(a1)
-; CHECK-RV32-NEXT: lbu t4, 40(a1)
-; CHECK-RV32-NEXT: lbu t5, 44(a1)
-; CHECK-RV32-NEXT: lbu t6, 60(a1)
-; CHECK-RV32-NEXT: lbu s0, 56(a1)
-; CHECK-RV32-NEXT: lbu s1, 52(a1)
-; CHECK-RV32-NEXT: lbu a1, 48(a1)
+; CHECK-RV32-NEXT: lbu t2, 48(a1)
+; CHECK-RV32-NEXT: lbu t3, 52(a1)
+; CHECK-RV32-NEXT: lbu t4, 56(a1)
+; CHECK-RV32-NEXT: lbu t5, 60(a1)
+; CHECK-RV32-NEXT: lbu t6, 32(a1)
+; CHECK-RV32-NEXT: lbu s0, 36(a1)
+; CHECK-RV32-NEXT: lbu s1, 40(a1)
+; CHECK-RV32-NEXT: lbu a1, 44(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb t6, 15(a0)
+; CHECK-RV32-NEXT: sb t5, 15(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb s0, 14(a0)
+; CHECK-RV32-NEXT: sb t4, 14(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb s1, 13(a0)
+; CHECK-RV32-NEXT: sb t3, 13(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb a1, 12(a0)
+; CHECK-RV32-NEXT: sb t2, 12(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb t5, 11(a0)
+; CHECK-RV32-NEXT: sb a1, 11(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb t4, 10(a0)
+; CHECK-RV32-NEXT: sb s1, 10(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb t3, 9(a0)
+; CHECK-RV32-NEXT: sb s0, 9(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sb t2, 8(a0)
+; CHECK-RV32-NEXT: sb t6, 8(a0)
; CHECK-RV32-NEXT: ntl.s1
; CHECK-RV32-NEXT: sb t1, 7(a0)
; CHECK-RV32-NEXT: ntl.s1
@@ -5279,28 +5279,28 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64C-NEXT: lbu t3, 40(a1)
; CHECK-RV64C-NEXT: lbu t4, 48(a1)
; CHECK-RV64C-NEXT: lbu t5, 56(a1)
+; CHECK-RV64C-NEXT: lbu a2, 96(a1)
+; CHECK-RV64C-NEXT: lbu a3, 104(a1)
+; CHECK-RV64C-NEXT: lbu a4, 112(a1)
+; CHECK-RV64C-NEXT: lbu a5, 120(a1)
; CHECK-RV64C-NEXT: lbu t6, 64(a1)
-; CHECK-RV64C-NEXT: lbu a3, 72(a1)
-; CHECK-RV64C-NEXT: lbu a4, 80(a1)
-; CHECK-RV64C-NEXT: lbu a5, 88(a1)
-; CHECK-RV64C-NEXT: lbu a2, 120(a1)
-; CHECK-RV64C-NEXT: lbu s0, 112(a1)
-; CHECK-RV64C-NEXT: lbu s1, 104(a1)
-; CHECK-RV64C-NEXT: lbu a1, 96(a1)
+; CHECK-RV64C-NEXT: lbu s0, 72(a1)
+; CHECK-RV64C-NEXT: lbu s1, 80(a1)
+; CHECK-RV64C-NEXT: lbu a1, 88(a1)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb a2, 15(a0)
+; CHECK-RV64C-NEXT: sb a5, 15(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb s0, 14(a0)
+; CHECK-RV64C-NEXT: sb a4, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb s1, 13(a0)
+; CHECK-RV64C-NEXT: sb a3, 13(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb a1, 12(a0)
+; CHECK-RV64C-NEXT: sb a2, 12(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb a5, 11(a0)
+; CHECK-RV64C-NEXT: sb a1, 11(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb a4, 10(a0)
+; CHECK-RV64C-NEXT: sb s1, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sb a3, 9(a0)
+; CHECK-RV64C-NEXT: sb s0, 9(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
; CHECK-RV64C-NEXT: sb t6, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
@@ -5340,28 +5340,28 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32C-NEXT: lbu t3, 20(a1)
; CHECK-RV32C-NEXT: lbu t4, 24(a1)
; CHECK-RV32C-NEXT: lbu t5, 28(a1)
+; CHECK-RV32C-NEXT: lbu a2, 48(a1)
+; CHECK-RV32C-NEXT: lbu a3, 52(a1)
+; CHECK-RV32C-NEXT: lbu a4, 56(a1)
+; CHECK-RV32C-NEXT: lbu a5, 60(a1)
; CHECK-RV32C-NEXT: lbu t6, 32(a1)
-; CHECK-RV32C-NEXT: lbu a3, 36(a1)
-; CHECK-RV32C-NEXT: lbu a4, 40(a1)
-; CHECK-RV32C-NEXT: lbu a5, 44(a1)
-; CHECK-RV32C-NEXT: lbu a2, 60(a1)
-; CHECK-RV32C-NEXT: lbu s0, 56(a1)
-; CHECK-RV32C-NEXT: lbu s1, 52(a1)
-; CHECK-RV32C-NEXT: lbu a1, 48(a1)
+; CHECK-RV32C-NEXT: lbu s0, 36(a1)
+; CHECK-RV32C-NEXT: lbu s1, 40(a1)
+; CHECK-RV32C-NEXT: lbu a1, 44(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb a2, 15(a0)
+; CHECK-RV32C-NEXT: sb a5, 15(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb s0, 14(a0)
+; CHECK-RV32C-NEXT: sb a4, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb s1, 13(a0)
+; CHECK-RV32C-NEXT: sb a3, 13(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb a1, 12(a0)
+; CHECK-RV32C-NEXT: sb a2, 12(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb a5, 11(a0)
+; CHECK-RV32C-NEXT: sb a1, 11(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb a4, 10(a0)
+; CHECK-RV32C-NEXT: sb s1, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sb a3, 9(a0)
+; CHECK-RV32C-NEXT: sb s0, 9(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
; CHECK-RV32C-NEXT: sb t6, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
@@ -5405,112 +5405,112 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) {
define void @test_nontemporal_S1_store_v8i16(ptr %p, <8 x i16> %v) {
; CHECK-RV64-LABEL: test_nontemporal_S1_store_v8i16:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lh a2, 0(a1)
-; CHECK-RV64-NEXT: lh a3, 8(a1)
-; CHECK-RV64-NEXT: lh a4, 16(a1)
-; CHECK-RV64-NEXT: lh a5, 24(a1)
-; CHECK-RV64-NEXT: lh a6, 56(a1)
-; CHECK-RV64-NEXT: lh a7, 48(a1)
-; CHECK-RV64-NEXT: lh t0, 40(a1)
-; CHECK-RV64-NEXT: lh a1, 32(a1)
+; CHECK-RV64-NEXT: lh a2, 32(a1)
+; CHECK-RV64-NEXT: lh a3, 40(a1)
+; CHECK-RV64-NEXT: lh a4, 48(a1)
+; CHECK-RV64-NEXT: lh a5, 56(a1)
+; CHECK-RV64-NEXT: lh a6, 0(a1)
+; CHECK-RV64-NEXT: lh a7, 8(a1)
+; CHECK-RV64-NEXT: lh t0, 16(a1)
+; CHECK-RV64-NEXT: lh a1, 24(a1)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sh a6, 14(a0)
+; CHECK-RV64-NEXT: sh a5, 14(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sh a7, 12(a0)
+; CHECK-RV64-NEXT: sh a4, 12(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sh t0, 10(a0)
+; CHECK-RV64-NEXT: sh a3, 10(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sh a1, 8(a0)
+; CHECK-RV64-NEXT: sh a2, 8(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sh a5, 6(a0)
+; CHECK-RV64-NEXT: sh a1, 6(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sh a4, 4(a0)
+; CHECK-RV64-NEXT: sh t0, 4(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sh a3, 2(a0)
+; CHECK-RV64-NEXT: sh a7, 2(a0)
; CHECK-RV64-NEXT: ntl.s1
-; CHECK-RV64-NEXT: sh a2, 0(a0)
+; CHECK-RV64-NEXT: sh a6, 0(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_S1_store_v8i16:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lh a2, 0(a1)
-; CHECK-RV32-NEXT: lh a3, 4(a1)
-; CHECK-RV32-NEXT: lh a4, 8(a1)
-; CHECK-RV32-NEXT: lh a5, 12(a1)
-; CHECK-RV32-NEXT: lh a6, 28(a1)
-; CHECK-RV32-NEXT: lh a7, 24(a1)
-; CHECK-RV32-NEXT: lh t0, 20(a1)
-; CHECK-RV32-NEXT: lh a1, 16(a1)
+; CHECK-RV32-NEXT: lh a2, 16(a1)
+; CHECK-RV32-NEXT: lh a3, 20(a1)
+; CHECK-RV32-NEXT: lh a4, 24(a1)
+; CHECK-RV32-NEXT: lh a5, 28(a1)
+; CHECK-RV32-NEXT: lh a6, 0(a1)
+; CHECK-RV32-NEXT: lh a7, 4(a1)
+; CHECK-RV32-NEXT: lh t0, 8(a1)
+; CHECK-RV32-NEXT: lh a1, 12(a1)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sh a6, 14(a0)
+; CHECK-RV32-NEXT: sh a5, 14(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sh a7, 12(a0)
+; CHECK-RV32-NEXT: sh a4, 12(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sh t0, 10(a0)
+; CHECK-RV32-NEXT: sh a3, 10(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sh a1, 8(a0)
+; CHECK-RV32-NEXT: sh a2, 8(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sh a5, 6(a0)
+; CHECK-RV32-NEXT: sh a1, 6(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sh a4, 4(a0)
+; CHECK-RV32-NEXT: sh t0, 4(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sh a3, 2(a0)
+; CHECK-RV32-NEXT: sh a7, 2(a0)
; CHECK-RV32-NEXT: ntl.s1
-; CHECK-RV32-NEXT: sh a2, 0(a0)
+; CHECK-RV32-NEXT: sh a6, 0(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_S1_store_v8i16:
; CHECK-RV64C: # %bb.0:
+; CHECK-RV64C-NEXT: lh a7, 32(a1)
+; CHECK-RV64C-NEXT: lh a3, 40(a1)
+; CHECK-RV64C-NEXT: lh a4, 48(a1)
+; CHECK-RV64C-NEXT: lh a5, 56(a1)
; CHECK-RV64C-NEXT: lh a6, 0(a1)
-; CHECK-RV64C-NEXT: lh a7, 8(a1)
-; CHECK-RV64C-NEXT: lh t0, 16(a1)
-; CHECK-RV64C-NEXT: lh a5, 24(a1)
-; CHECK-RV64C-NEXT: lh a2, 56(a1)
-; CHECK-RV64C-NEXT: lh a3, 48(a1)
-; CHECK-RV64C-NEXT: lh a4, 40(a1)
-; CHECK-RV64C-NEXT: lh a1, 32(a1)
+; CHECK-RV64C-NEXT: lh t0, 8(a1)
+; CHECK-RV64C-NEXT: lh a2, 16(a1)
+; CHECK-RV64C-NEXT: lh a1, 24(a1)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sh a2, 14(a0)
+; CHECK-RV64C-NEXT: sh a5, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sh a3, 12(a0)
+; CHECK-RV64C-NEXT: sh a4, 12(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sh a4, 10(a0)
+; CHECK-RV64C-NEXT: sh a3, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sh a1, 8(a0)
+; CHECK-RV64C-NEXT: sh a7, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sh a5, 6(a0)
+; CHECK-RV64C-NEXT: sh a1, 6(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sh t0, 4(a0)
+; CHECK-RV64C-NEXT: sh a2, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
-; CHECK-RV64C-NEXT: sh a7, 2(a0)
+; CHECK-RV64C-NEXT: sh t0, 2(a0)
; CHECK-RV64C-NEXT: c.ntl.s1
; CHECK-RV64C-NEXT: sh a6, 0(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_S1_store_v8i16:
; CHECK-RV32C: # %bb.0:
+; CHECK-RV32C-NEXT: lh a7, 16(a1)
+; CHECK-RV32C-NEXT: lh a3, 20(a1)
+; CHECK-RV32C-NEXT: lh a4, 24(a1)
+; CHECK-RV32C-NEXT: lh a5, 28(a1)
; CHECK-RV32C-NEXT: lh a6, 0(a1)
-; CHECK-RV32C-NEXT: lh a7, 4(a1)
-; CHECK-RV32C-NEXT: lh t0, 8(a1)
-; CHECK-RV32C-NEXT: lh a5, 12(a1)
-; CHECK-RV32C-NEXT: lh a2, 28(a1)
-; CHECK-RV32C-NEXT: lh a3, 24(a1)
-; CHECK-RV32C-NEXT: lh a4, 20(a1)
-; CHECK-RV32C-NEXT: lh a1, 16(a1)
+; CHECK-RV32C-NEXT: lh t0, 4(a1)
+; CHECK-RV32C-NEXT: lh a2, 8(a1)
+; CHECK-RV32C-NEXT: lh a1, 12(a1)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sh a2, 14(a0)
+; CHECK-RV32C-NEXT: sh a5, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sh a3, 12(a0)
+; CHECK-RV32C-NEXT: sh a4, 12(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sh a4, 10(a0)
+; CHECK-RV32C-NEXT: sh a3, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sh a1, 8(a0)
+; CHECK-RV32C-NEXT: sh a7, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sh a5, 6(a0)
+; CHECK-RV32C-NEXT: sh a1, 6(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sh t0, 4(a0)
+; CHECK-RV32C-NEXT: sh a2, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
-; CHECK-RV32C-NEXT: sh a7, 2(a0)
+; CHECK-RV32C-NEXT: sh t0, 2(a0)
; CHECK-RV32C-NEXT: c.ntl.s1
; CHECK-RV32C-NEXT: sh a6, 0(a0)
; CHECK-RV32C-NEXT: ret
@@ -6571,30 +6571,30 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64-NEXT: lbu a7, 40(a1)
; CHECK-RV64-NEXT: lbu t0, 48(a1)
; CHECK-RV64-NEXT: lbu t1, 56(a1)
-; CHECK-RV64-NEXT: lbu t2, 64(a1)
-; CHECK-RV64-NEXT: lbu t3, 72(a1)
-; CHECK-RV64-NEXT: lbu t4, 80(a1)
-; CHECK-RV64-NEXT: lbu t5, 88(a1)
-; CHECK-RV64-NEXT: lbu t6, 120(a1)
-; CHECK-RV64-NEXT: lbu s0, 112(a1)
-; CHECK-RV64-NEXT: lbu s1, 104(a1)
-; CHECK-RV64-NEXT: lbu a1, 96(a1)
+; CHECK-RV64-NEXT: lbu t2, 96(a1)
+; CHECK-RV64-NEXT: lbu t3, 104(a1)
+; CHECK-RV64-NEXT: lbu t4, 112(a1)
+; CHECK-RV64-NEXT: lbu t5, 120(a1)
+; CHECK-RV64-NEXT: lbu t6, 64(a1)
+; CHECK-RV64-NEXT: lbu s0, 72(a1)
+; CHECK-RV64-NEXT: lbu s1, 80(a1)
+; CHECK-RV64-NEXT: lbu a1, 88(a1)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t6, 15(a0)
+; CHECK-RV64-NEXT: sb t5, 15(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb s0, 14(a0)
+; CHECK-RV64-NEXT: sb t4, 14(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb s1, 13(a0)
+; CHECK-RV64-NEXT: sb t3, 13(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb a1, 12(a0)
+; CHECK-RV64-NEXT: sb t2, 12(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t5, 11(a0)
+; CHECK-RV64-NEXT: sb a1, 11(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t4, 10(a0)
+; CHECK-RV64-NEXT: sb s1, 10(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t3, 9(a0)
+; CHECK-RV64-NEXT: sb s0, 9(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sb t2, 8(a0)
+; CHECK-RV64-NEXT: sb t6, 8(a0)
; CHECK-RV64-NEXT: ntl.all
; CHECK-RV64-NEXT: sb t1, 7(a0)
; CHECK-RV64-NEXT: ntl.all
@@ -6632,30 +6632,30 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32-NEXT: lbu a7, 20(a1)
; CHECK-RV32-NEXT: lbu t0, 24(a1)
; CHECK-RV32-NEXT: lbu t1, 28(a1)
-; CHECK-RV32-NEXT: lbu t2, 32(a1)
-; CHECK-RV32-NEXT: lbu t3, 36(a1)
-; CHECK-RV32-NEXT: lbu t4, 40(a1)
-; CHECK-RV32-NEXT: lbu t5, 44(a1)
-; CHECK-RV32-NEXT: lbu t6, 60(a1)
-; CHECK-RV32-NEXT: lbu s0, 56(a1)
-; CHECK-RV32-NEXT: lbu s1, 52(a1)
-; CHECK-RV32-NEXT: lbu a1, 48(a1)
+; CHECK-RV32-NEXT: lbu t2, 48(a1)
+; CHECK-RV32-NEXT: lbu t3, 52(a1)
+; CHECK-RV32-NEXT: lbu t4, 56(a1)
+; CHECK-RV32-NEXT: lbu t5, 60(a1)
+; CHECK-RV32-NEXT: lbu t6, 32(a1)
+; CHECK-RV32-NEXT: lbu s0, 36(a1)
+; CHECK-RV32-NEXT: lbu s1, 40(a1)
+; CHECK-RV32-NEXT: lbu a1, 44(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t6, 15(a0)
+; CHECK-RV32-NEXT: sb t5, 15(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb s0, 14(a0)
+; CHECK-RV32-NEXT: sb t4, 14(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb s1, 13(a0)
+; CHECK-RV32-NEXT: sb t3, 13(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb a1, 12(a0)
+; CHECK-RV32-NEXT: sb t2, 12(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t5, 11(a0)
+; CHECK-RV32-NEXT: sb a1, 11(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t4, 10(a0)
+; CHECK-RV32-NEXT: sb s1, 10(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t3, 9(a0)
+; CHECK-RV32-NEXT: sb s0, 9(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sb t2, 8(a0)
+; CHECK-RV32-NEXT: sb t6, 8(a0)
; CHECK-RV32-NEXT: ntl.all
; CHECK-RV32-NEXT: sb t1, 7(a0)
; CHECK-RV32-NEXT: ntl.all
@@ -6693,28 +6693,28 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64C-NEXT: lbu t3, 40(a1)
; CHECK-RV64C-NEXT: lbu t4, 48(a1)
; CHECK-RV64C-NEXT: lbu t5, 56(a1)
+; CHECK-RV64C-NEXT: lbu a2, 96(a1)
+; CHECK-RV64C-NEXT: lbu a3, 104(a1)
+; CHECK-RV64C-NEXT: lbu a4, 112(a1)
+; CHECK-RV64C-NEXT: lbu a5, 120(a1)
; CHECK-RV64C-NEXT: lbu t6, 64(a1)
-; CHECK-RV64C-NEXT: lbu a3, 72(a1)
-; CHECK-RV64C-NEXT: lbu a4, 80(a1)
-; CHECK-RV64C-NEXT: lbu a5, 88(a1)
-; CHECK-RV64C-NEXT: lbu a2, 120(a1)
-; CHECK-RV64C-NEXT: lbu s0, 112(a1)
-; CHECK-RV64C-NEXT: lbu s1, 104(a1)
-; CHECK-RV64C-NEXT: lbu a1, 96(a1)
+; CHECK-RV64C-NEXT: lbu s0, 72(a1)
+; CHECK-RV64C-NEXT: lbu s1, 80(a1)
+; CHECK-RV64C-NEXT: lbu a1, 88(a1)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a2, 15(a0)
+; CHECK-RV64C-NEXT: sb a5, 15(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb s0, 14(a0)
+; CHECK-RV64C-NEXT: sb a4, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb s1, 13(a0)
+; CHECK-RV64C-NEXT: sb a3, 13(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a1, 12(a0)
+; CHECK-RV64C-NEXT: sb a2, 12(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a5, 11(a0)
+; CHECK-RV64C-NEXT: sb a1, 11(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a4, 10(a0)
+; CHECK-RV64C-NEXT: sb s1, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sb a3, 9(a0)
+; CHECK-RV64C-NEXT: sb s0, 9(a0)
; CHECK-RV64C-NEXT: c.ntl.all
; CHECK-RV64C-NEXT: sb t6, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.all
@@ -6754,28 +6754,28 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV32C-NEXT: lbu t3, 20(a1)
; CHECK-RV32C-NEXT: lbu t4, 24(a1)
; CHECK-RV32C-NEXT: lbu t5, 28(a1)
+; CHECK-RV32C-NEXT: lbu a2, 48(a1)
+; CHECK-RV32C-NEXT: lbu a3, 52(a1)
+; CHECK-RV32C-NEXT: lbu a4, 56(a1)
+; CHECK-RV32C-NEXT: lbu a5, 60(a1)
; CHECK-RV32C-NEXT: lbu t6, 32(a1)
-; CHECK-RV32C-NEXT: lbu a3, 36(a1)
-; CHECK-RV32C-NEXT: lbu a4, 40(a1)
-; CHECK-RV32C-NEXT: lbu a5, 44(a1)
-; CHECK-RV32C-NEXT: lbu a2, 60(a1)
-; CHECK-RV32C-NEXT: lbu s0, 56(a1)
-; CHECK-RV32C-NEXT: lbu s1, 52(a1)
-; CHECK-RV32C-NEXT: lbu a1, 48(a1)
+; CHECK-RV32C-NEXT: lbu s0, 36(a1)
+; CHECK-RV32C-NEXT: lbu s1, 40(a1)
+; CHECK-RV32C-NEXT: lbu a1, 44(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a2, 15(a0)
+; CHECK-RV32C-NEXT: sb a5, 15(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb s0, 14(a0)
+; CHECK-RV32C-NEXT: sb a4, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb s1, 13(a0)
+; CHECK-RV32C-NEXT: sb a3, 13(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a1, 12(a0)
+; CHECK-RV32C-NEXT: sb a2, 12(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a5, 11(a0)
+; CHECK-RV32C-NEXT: sb a1, 11(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a4, 10(a0)
+; CHECK-RV32C-NEXT: sb s1, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sb a3, 9(a0)
+; CHECK-RV32C-NEXT: sb s0, 9(a0)
; CHECK-RV32C-NEXT: c.ntl.all
; CHECK-RV32C-NEXT: sb t6, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.all
@@ -6819,112 +6819,112 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) {
define void @test_nontemporal_ALL_store_v8i16(ptr %p, <8 x i16> %v) {
; CHECK-RV64-LABEL: test_nontemporal_ALL_store_v8i16:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lh a2, 0(a1)
-; CHECK-RV64-NEXT: lh a3, 8(a1)
-; CHECK-RV64-NEXT: lh a4, 16(a1)
-; CHECK-RV64-NEXT: lh a5, 24(a1)
-; CHECK-RV64-NEXT: lh a6, 56(a1)
-; CHECK-RV64-NEXT: lh a7, 48(a1)
-; CHECK-RV64-NEXT: lh t0, 40(a1)
-; CHECK-RV64-NEXT: lh a1, 32(a1)
+; CHECK-RV64-NEXT: lh a2, 32(a1)
+; CHECK-RV64-NEXT: lh a3, 40(a1)
+; CHECK-RV64-NEXT: lh a4, 48(a1)
+; CHECK-RV64-NEXT: lh a5, 56(a1)
+; CHECK-RV64-NEXT: lh a6, 0(a1)
+; CHECK-RV64-NEXT: lh a7, 8(a1)
+; CHECK-RV64-NEXT: lh t0, 16(a1)
+; CHECK-RV64-NEXT: lh a1, 24(a1)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a6, 14(a0)
+; CHECK-RV64-NEXT: sh a5, 14(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a7, 12(a0)
+; CHECK-RV64-NEXT: sh a4, 12(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh t0, 10(a0)
+; CHECK-RV64-NEXT: sh a3, 10(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a1, 8(a0)
+; CHECK-RV64-NEXT: sh a2, 8(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a5, 6(a0)
+; CHECK-RV64-NEXT: sh a1, 6(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a4, 4(a0)
+; CHECK-RV64-NEXT: sh t0, 4(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a3, 2(a0)
+; CHECK-RV64-NEXT: sh a7, 2(a0)
; CHECK-RV64-NEXT: ntl.all
-; CHECK-RV64-NEXT: sh a2, 0(a0)
+; CHECK-RV64-NEXT: sh a6, 0(a0)
; CHECK-RV64-NEXT: ret
;
; CHECK-RV32-LABEL: test_nontemporal_ALL_store_v8i16:
; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: lh a2, 0(a1)
-; CHECK-RV32-NEXT: lh a3, 4(a1)
-; CHECK-RV32-NEXT: lh a4, 8(a1)
-; CHECK-RV32-NEXT: lh a5, 12(a1)
-; CHECK-RV32-NEXT: lh a6, 28(a1)
-; CHECK-RV32-NEXT: lh a7, 24(a1)
-; CHECK-RV32-NEXT: lh t0, 20(a1)
-; CHECK-RV32-NEXT: lh a1, 16(a1)
+; CHECK-RV32-NEXT: lh a2, 16(a1)
+; CHECK-RV32-NEXT: lh a3, 20(a1)
+; CHECK-RV32-NEXT: lh a4, 24(a1)
+; CHECK-RV32-NEXT: lh a5, 28(a1)
+; CHECK-RV32-NEXT: lh a6, 0(a1)
+; CHECK-RV32-NEXT: lh a7, 4(a1)
+; CHECK-RV32-NEXT: lh t0, 8(a1)
+; CHECK-RV32-NEXT: lh a1, 12(a1)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a6, 14(a0)
+; CHECK-RV32-NEXT: sh a5, 14(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a7, 12(a0)
+; CHECK-RV32-NEXT: sh a4, 12(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh t0, 10(a0)
+; CHECK-RV32-NEXT: sh a3, 10(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a1, 8(a0)
+; CHECK-RV32-NEXT: sh a2, 8(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a5, 6(a0)
+; CHECK-RV32-NEXT: sh a1, 6(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a4, 4(a0)
+; CHECK-RV32-NEXT: sh t0, 4(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a3, 2(a0)
+; CHECK-RV32-NEXT: sh a7, 2(a0)
; CHECK-RV32-NEXT: ntl.all
-; CHECK-RV32-NEXT: sh a2, 0(a0)
+; CHECK-RV32-NEXT: sh a6, 0(a0)
; CHECK-RV32-NEXT: ret
;
; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_v8i16:
; CHECK-RV64C: # %bb.0:
+; CHECK-RV64C-NEXT: lh a7, 32(a1)
+; CHECK-RV64C-NEXT: lh a3, 40(a1)
+; CHECK-RV64C-NEXT: lh a4, 48(a1)
+; CHECK-RV64C-NEXT: lh a5, 56(a1)
; CHECK-RV64C-NEXT: lh a6, 0(a1)
-; CHECK-RV64C-NEXT: lh a7, 8(a1)
-; CHECK-RV64C-NEXT: lh t0, 16(a1)
-; CHECK-RV64C-NEXT: lh a5, 24(a1)
-; CHECK-RV64C-NEXT: lh a2, 56(a1)
-; CHECK-RV64C-NEXT: lh a3, 48(a1)
-; CHECK-RV64C-NEXT: lh a4, 40(a1)
-; CHECK-RV64C-NEXT: lh a1, 32(a1)
+; CHECK-RV64C-NEXT: lh t0, 8(a1)
+; CHECK-RV64C-NEXT: lh a2, 16(a1)
+; CHECK-RV64C-NEXT: lh a1, 24(a1)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a2, 14(a0)
+; CHECK-RV64C-NEXT: sh a5, 14(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a3, 12(a0)
+; CHECK-RV64C-NEXT: sh a4, 12(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a4, 10(a0)
+; CHECK-RV64C-NEXT: sh a3, 10(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a1, 8(a0)
+; CHECK-RV64C-NEXT: sh a7, 8(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a5, 6(a0)
+; CHECK-RV64C-NEXT: sh a1, 6(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh t0, 4(a0)
+; CHECK-RV64C-NEXT: sh a2, 4(a0)
; CHECK-RV64C-NEXT: c.ntl.all
-; CHECK-RV64C-NEXT: sh a7, 2(a0)
+; CHECK-RV64C-NEXT: sh t0, 2(a0)
; CHECK-RV64C-NEXT: c.ntl.all
; CHECK-RV64C-NEXT: sh a6, 0(a0)
; CHECK-RV64C-NEXT: ret
;
; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_v8i16:
; CHECK-RV32C: # %bb.0:
+; CHECK-RV32C-NEXT: lh a7, 16(a1)
+; CHECK-RV32C-NEXT: lh a3, 20(a1)
+; CHECK-RV32C-NEXT: lh a4, 24(a1)
+; CHECK-RV32C-NEXT: lh a5, 28(a1)
; CHECK-RV32C-NEXT: lh a6, 0(a1)
-; CHECK-RV32C-NEXT: lh a7, 4(a1)
-; CHECK-RV32C-NEXT: lh t0, 8(a1)
-; CHECK-RV32C-NEXT: lh a5, 12(a1)
-; CHECK-RV32C-NEXT: lh a2, 28(a1)
-; CHECK-RV32C-NEXT: lh a3, 24(a1)
-; CHECK-RV32C-NEXT: lh a4, 20(a1)
-; CHECK-RV32C-NEXT: lh a1, 16(a1)
+; CHECK-RV32C-NEXT: lh t0, 4(a1)
+; CHECK-RV32C-NEXT: lh a2, 8(a1)
+; CHECK-RV32C-NEXT: lh a1, 12(a1)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a2, 14(a0)
+; CHECK-RV32C-NEXT: sh a5, 14(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a3, 12(a0)
+; CHECK-RV32C-NEXT: sh a4, 12(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a4, 10(a0)
+; CHECK-RV32C-NEXT: sh a3, 10(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a1, 8(a0)
+; CHECK-RV32C-NEXT: sh a7, 8(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a5, 6(a0)
+; CHECK-RV32C-NEXT: sh a1, 6(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh t0, 4(a0)
+; CHECK-RV32C-NEXT: sh a2, 4(a0)
; CHECK-RV32C-NEXT: c.ntl.all
-; CHECK-RV32C-NEXT: sh a7, 2(a0)
+; CHECK-RV32C-NEXT: sh t0, 2(a0)
; CHECK-RV32C-NEXT: c.ntl.all
; CHECK-RV32C-NEXT: sh a6, 0(a0)
; CHECK-RV32C-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
index 4bb65f376218f1..fe602b5b8fc2bc 100644
--- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
@@ -1241,8 +1241,8 @@ define i64 @foo2(ptr %p) {
define void @PR41129(ptr %p64) {
; RV32-LABEL: PR41129:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: lw a2, 4(a0)
; RV32-NEXT: lw a1, 0(a0)
+; RV32-NEXT: lw a2, 4(a0)
; RV32-NEXT: or a3, a1, a2
; RV32-NEXT: beqz a3, .LBB37_2
; RV32-NEXT: # %bb.1: # %false
diff --git a/llvm/test/CodeGen/RISCV/push-pop-popret.ll b/llvm/test/CodeGen/RISCV/push-pop-popret.ll
index 7548faaae61f47..85c2997e268a94 100644
--- a/llvm/test/CodeGen/RISCV/push-pop-popret.ll
+++ b/llvm/test/CodeGen/RISCV/push-pop-popret.ll
@@ -1117,26 +1117,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind {
; RV32IZCMP-NEXT: lw t3, 20(a5)
; RV32IZCMP-NEXT: lw t4, 24(a5)
; RV32IZCMP-NEXT: lw t5, 28(a5)
-; RV32IZCMP-NEXT: lw t6, 32(a5)
-; RV32IZCMP-NEXT: lw s2, 36(a5)
-; RV32IZCMP-NEXT: lw s3, 40(a5)
-; RV32IZCMP-NEXT: lw s4, 44(a5)
-; RV32IZCMP-NEXT: lw a1, 48(a5)
-; RV32IZCMP-NEXT: lw s0, 52(a5)
-; RV32IZCMP-NEXT: lw s1, 68(a5)
-; RV32IZCMP-NEXT: lw a2, 64(a5)
-; RV32IZCMP-NEXT: lw a3, 60(a5)
-; RV32IZCMP-NEXT: lw a4, 56(a5)
-; RV32IZCMP-NEXT: sw s1, 68(a5)
-; RV32IZCMP-NEXT: sw a2, 64(a5)
-; RV32IZCMP-NEXT: sw a3, 60(a5)
-; RV32IZCMP-NEXT: sw a4, 56(a5)
-; RV32IZCMP-NEXT: sw s0, 52(a5)
-; RV32IZCMP-NEXT: sw a1, 48(a5)
-; RV32IZCMP-NEXT: sw s4, 44(a5)
-; RV32IZCMP-NEXT: sw s3, 40(a5)
-; RV32IZCMP-NEXT: sw s2, 36(a5)
-; RV32IZCMP-NEXT: sw t6, 32(a5)
+; RV32IZCMP-NEXT: lw t6, 48(a5)
+; RV32IZCMP-NEXT: lw s2, 52(a5)
+; RV32IZCMP-NEXT: lw a3, 56(a5)
+; RV32IZCMP-NEXT: lw a4, 60(a5)
+; RV32IZCMP-NEXT: lw a1, 64(a5)
+; RV32IZCMP-NEXT: lw s0, 68(a5)
+; RV32IZCMP-NEXT: lw s3, 32(a5)
+; RV32IZCMP-NEXT: lw s4, 36(a5)
+; RV32IZCMP-NEXT: lw s1, 40(a5)
+; RV32IZCMP-NEXT: lw a2, 44(a5)
+; RV32IZCMP-NEXT: sw s0, 68(a5)
+; RV32IZCMP-NEXT: sw a1, 64(a5)
+; RV32IZCMP-NEXT: sw a4, 60(a5)
+; RV32IZCMP-NEXT: sw a3, 56(a5)
+; RV32IZCMP-NEXT: sw s2, 52(a5)
+; RV32IZCMP-NEXT: sw t6, 48(a5)
+; RV32IZCMP-NEXT: sw a2, 44(a5)
+; RV32IZCMP-NEXT: sw s1, 40(a5)
+; RV32IZCMP-NEXT: sw s4, 36(a5)
+; RV32IZCMP-NEXT: sw s3, 32(a5)
; RV32IZCMP-NEXT: sw t5, 28(a5)
; RV32IZCMP-NEXT: sw t4, 24(a5)
; RV32IZCMP-NEXT: sw t3, 20(a5)
@@ -1160,26 +1160,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind {
; RV64IZCMP-NEXT: lw t3, 20(a5)
; RV64IZCMP-NEXT: lw t4, 24(a5)
; RV64IZCMP-NEXT: lw t5, 28(a5)
-; RV64IZCMP-NEXT: lw t6, 32(a5)
-; RV64IZCMP-NEXT: lw s2, 36(a5)
-; RV64IZCMP-NEXT: lw s3, 40(a5)
-; RV64IZCMP-NEXT: lw s4, 44(a5)
-; RV64IZCMP-NEXT: lw a1, 48(a5)
-; RV64IZCMP-NEXT: lw s0, 52(a5)
-; RV64IZCMP-NEXT: lw s1, 68(a5)
-; RV64IZCMP-NEXT: lw a2, 64(a5)
-; RV64IZCMP-NEXT: lw a3, 60(a5)
-; RV64IZCMP-NEXT: lw a4, 56(a5)
-; RV64IZCMP-NEXT: sw s1, 68(a5)
-; RV64IZCMP-NEXT: sw a2, 64(a5)
-; RV64IZCMP-NEXT: sw a3, 60(a5)
-; RV64IZCMP-NEXT: sw a4, 56(a5)
-; RV64IZCMP-NEXT: sw s0, 52(a5)
-; RV64IZCMP-NEXT: sw a1, 48(a5)
-; RV64IZCMP-NEXT: sw s4, 44(a5)
-; RV64IZCMP-NEXT: sw s3, 40(a5)
-; RV64IZCMP-NEXT: sw s2, 36(a5)
-; RV64IZCMP-NEXT: sw t6, 32(a5)
+; RV64IZCMP-NEXT: lw t6, 48(a5)
+; RV64IZCMP-NEXT: lw s2, 52(a5)
+; RV64IZCMP-NEXT: lw a3, 56(a5)
+; RV64IZCMP-NEXT: lw a4, 60(a5)
+; RV64IZCMP-NEXT: lw a1, 64(a5)
+; RV64IZCMP-NEXT: lw s0, 68(a5)
+; RV64IZCMP-NEXT: lw s3, 32(a5)
+; RV64IZCMP-NEXT: lw s4, 36(a5)
+; RV64IZCMP-NEXT: lw s1, 40(a5)
+; RV64IZCMP-NEXT: lw a2, 44(a5)
+; RV64IZCMP-NEXT: sw s0, 68(a5)
+; RV64IZCMP-NEXT: sw a1, 64(a5)
+; RV64IZCMP-NEXT: sw a4, 60(a5)
+; RV64IZCMP-NEXT: sw a3, 56(a5)
+; RV64IZCMP-NEXT: sw s2, 52(a5)
+; RV64IZCMP-NEXT: sw t6, 48(a5)
+; RV64IZCMP-NEXT: sw a2, 44(a5)
+; RV64IZCMP-NEXT: sw s1, 40(a5)
+; RV64IZCMP-NEXT: sw s4, 36(a5)
+; RV64IZCMP-NEXT: sw s3, 32(a5)
; RV64IZCMP-NEXT: sw t5, 28(a5)
; RV64IZCMP-NEXT: sw t4, 24(a5)
; RV64IZCMP-NEXT: sw t3, 20(a5)
@@ -1203,26 +1203,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind {
; RV32IZCMP-SR-NEXT: lw t3, 20(a5)
; RV32IZCMP-SR-NEXT: lw t4, 24(a5)
; RV32IZCMP-SR-NEXT: lw t5, 28(a5)
-; RV32IZCMP-SR-NEXT: lw t6, 32(a5)
-; RV32IZCMP-SR-NEXT: lw s2, 36(a5)
-; RV32IZCMP-SR-NEXT: lw s3, 40(a5)
-; RV32IZCMP-SR-NEXT: lw s4, 44(a5)
-; RV32IZCMP-SR-NEXT: lw a1, 48(a5)
-; RV32IZCMP-SR-NEXT: lw s0, 52(a5)
-; RV32IZCMP-SR-NEXT: lw s1, 68(a5)
-; RV32IZCMP-SR-NEXT: lw a2, 64(a5)
-; RV32IZCMP-SR-NEXT: lw a3, 60(a5)
-; RV32IZCMP-SR-NEXT: lw a4, 56(a5)
-; RV32IZCMP-SR-NEXT: sw s1, 68(a5)
-; RV32IZCMP-SR-NEXT: sw a2, 64(a5)
-; RV32IZCMP-SR-NEXT: sw a3, 60(a5)
-; RV32IZCMP-SR-NEXT: sw a4, 56(a5)
-; RV32IZCMP-SR-NEXT: sw s0, 52(a5)
-; RV32IZCMP-SR-NEXT: sw a1, 48(a5)
-; RV32IZCMP-SR-NEXT: sw s4, 44(a5)
-; RV32IZCMP-SR-NEXT: sw s3, 40(a5)
-; RV32IZCMP-SR-NEXT: sw s2, 36(a5)
-; RV32IZCMP-SR-NEXT: sw t6, 32(a5)
+; RV32IZCMP-SR-NEXT: lw t6, 48(a5)
+; RV32IZCMP-SR-NEXT: lw s2, 52(a5)
+; RV32IZCMP-SR-NEXT: lw a3, 56(a5)
+; RV32IZCMP-SR-NEXT: lw a4, 60(a5)
+; RV32IZCMP-SR-NEXT: lw a1, 64(a5)
+; RV32IZCMP-SR-NEXT: lw s0, 68(a5)
+; RV32IZCMP-SR-NEXT: lw s3, 32(a5)
+; RV32IZCMP-SR-NEXT: lw s4, 36(a5)
+; RV32IZCMP-SR-NEXT: lw s1, 40(a5)
+; RV32IZCMP-SR-NEXT: lw a2, 44(a5)
+; RV32IZCMP-SR-NEXT: sw s0, 68(a5)
+; RV32IZCMP-SR-NEXT: sw a1, 64(a5)
+; RV32IZCMP-SR-NEXT: sw a4, 60(a5)
+; RV32IZCMP-SR-NEXT: sw a3, 56(a5)
+; RV32IZCMP-SR-NEXT: sw s2, 52(a5)
+; RV32IZCMP-SR-NEXT: sw t6, 48(a5)
+; RV32IZCMP-SR-NEXT: sw a2, 44(a5)
+; RV32IZCMP-SR-NEXT: sw s1, 40(a5)
+; RV32IZCMP-SR-NEXT: sw s4, 36(a5)
+; RV32IZCMP-SR-NEXT: sw s3, 32(a5)
; RV32IZCMP-SR-NEXT: sw t5, 28(a5)
; RV32IZCMP-SR-NEXT: sw t4, 24(a5)
; RV32IZCMP-SR-NEXT: sw t3, 20(a5)
@@ -1246,26 +1246,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind {
; RV64IZCMP-SR-NEXT: lw t3, 20(a5)
; RV64IZCMP-SR-NEXT: lw t4, 24(a5)
; RV64IZCMP-SR-NEXT: lw t5, 28(a5)
-; RV64IZCMP-SR-NEXT: lw t6, 32(a5)
-; RV64IZCMP-SR-NEXT: lw s2, 36(a5)
-; RV64IZCMP-SR-NEXT: lw s3, 40(a5)
-; RV64IZCMP-SR-NEXT: lw s4, 44(a5)
-; RV64IZCMP-SR-NEXT: lw a1, 48(a5)
-; RV64IZCMP-SR-NEXT: lw s0, 52(a5)
-; RV64IZCMP-SR-NEXT: lw s1, 68(a5)
-; RV64IZCMP-SR-NEXT: lw a2, 64(a5)
-; RV64IZCMP-SR-NEXT: lw a3, 60(a5)
-; RV64IZCMP-SR-NEXT: lw a4, 56(a5)
-; RV64IZCMP-SR-NEXT: sw s1, 68(a5)
-; RV64IZCMP-SR-NEXT: sw a2, 64(a5)
-; RV64IZCMP-SR-NEXT: sw a3, 60(a5)
-; RV64IZCMP-SR-NEXT: sw a4, 56(a5)
-; RV64IZCMP-SR-NEXT: sw s0, 52(a5)
-; RV64IZCMP-SR-NEXT: sw a1, 48(a5)
-; RV64IZCMP-SR-NEXT: sw s4, 44(a5)
-; RV64IZCMP-SR-NEXT: sw s3, 40(a5)
-; RV64IZCMP-SR-NEXT: sw s2, 36(a5)
-; RV64IZCMP-SR-NEXT: sw t6, 32(a5)
+; RV64IZCMP-SR-NEXT: lw t6, 48(a5)
+; RV64IZCMP-SR-NEXT: lw s2, 52(a5)
+; RV64IZCMP-SR-NEXT: lw a3, 56(a5)
+; RV64IZCMP-SR-NEXT: lw a4, 60(a5)
+; RV64IZCMP-SR-NEXT: lw a1, 64(a5)
+; RV64IZCMP-SR-NEXT: lw s0, 68(a5)
+; RV64IZCMP-SR-NEXT: lw s3, 32(a5)
+; RV64IZCMP-SR-NEXT: lw s4, 36(a5)
+; RV64IZCMP-SR-NEXT: lw s1, 40(a5)
+; RV64IZCMP-SR-NEXT: lw a2, 44(a5)
+; RV64IZCMP-SR-NEXT: sw s0, 68(a5)
+; RV64IZCMP-SR-NEXT: sw a1, 64(a5)
+; RV64IZCMP-SR-NEXT: sw a4, 60(a5)
+; RV64IZCMP-SR-NEXT: sw a3, 56(a5)
+; RV64IZCMP-SR-NEXT: sw s2, 52(a5)
+; RV64IZCMP-SR-NEXT: sw t6, 48(a5)
+; RV64IZCMP-SR-NEXT: sw a2, 44(a5)
+; RV64IZCMP-SR-NEXT: sw s1, 40(a5)
+; RV64IZCMP-SR-NEXT: sw s4, 36(a5)
+; RV64IZCMP-SR-NEXT: sw s3, 32(a5)
; RV64IZCMP-SR-NEXT: sw t5, 28(a5)
; RV64IZCMP-SR-NEXT: sw t4, 24(a5)
; RV64IZCMP-SR-NEXT: sw t3, 20(a5)
@@ -1294,26 +1294,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind {
; RV32I-NEXT: lw a7, 20(a5)
; RV32I-NEXT: lw t0, 24(a5)
; RV32I-NEXT: lw t1, 28(a5)
-; RV32I-NEXT: lw t2, 32(a5)
-; RV32I-NEXT: lw t3, 36(a5)
-; RV32I-NEXT: lw t4, 40(a5)
-; RV32I-NEXT: lw t5, 44(a5)
-; RV32I-NEXT: lw t6, 48(a5)
-; RV32I-NEXT: lw s0, 52(a5)
-; RV32I-NEXT: lw s1, 68(a5)
-; RV32I-NEXT: lw s2, 64(a5)
-; RV32I-NEXT: lw s3, 60(a5)
-; RV32I-NEXT: lw s4, 56(a5)
-; RV32I-NEXT: sw s1, 68(a5)
-; RV32I-NEXT: sw s2, 64(a5)
-; RV32I-NEXT: sw s3, 60(a5)
-; RV32I-NEXT: sw s4, 56(a5)
-; RV32I-NEXT: sw s0, 52(a5)
-; RV32I-NEXT: sw t6, 48(a5)
-; RV32I-NEXT: sw t5, 44(a5)
-; RV32I-NEXT: sw t4, 40(a5)
-; RV32I-NEXT: sw t3, 36(a5)
-; RV32I-NEXT: sw t2, 32(a5)
+; RV32I-NEXT: lw t2, 48(a5)
+; RV32I-NEXT: lw t3, 52(a5)
+; RV32I-NEXT: lw t4, 56(a5)
+; RV32I-NEXT: lw t5, 60(a5)
+; RV32I-NEXT: lw t6, 64(a5)
+; RV32I-NEXT: lw s0, 68(a5)
+; RV32I-NEXT: lw s1, 32(a5)
+; RV32I-NEXT: lw s2, 36(a5)
+; RV32I-NEXT: lw s3, 40(a5)
+; RV32I-NEXT: lw s4, 44(a5)
+; RV32I-NEXT: sw s0, 68(a5)
+; RV32I-NEXT: sw t6, 64(a5)
+; RV32I-NEXT: sw t5, 60(a5)
+; RV32I-NEXT: sw t4, 56(a5)
+; RV32I-NEXT: sw t3, 52(a5)
+; RV32I-NEXT: sw t2, 48(a5)
+; RV32I-NEXT: sw s4, 44(a5)
+; RV32I-NEXT: sw s3, 40(a5)
+; RV32I-NEXT: sw s2, 36(a5)
+; RV32I-NEXT: sw s1, 32(a5)
; RV32I-NEXT: sw t1, 28(a5)
; RV32I-NEXT: sw t0, 24(a5)
; RV32I-NEXT: sw a7, 20(a5)
@@ -1348,26 +1348,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind {
; RV64I-NEXT: lw a7, 20(a5)
; RV64I-NEXT: lw t0, 24(a5)
; RV64I-NEXT: lw t1, 28(a5)
-; RV64I-NEXT: lw t2, 32(a5)
-; RV64I-NEXT: lw t3, 36(a5)
-; RV64I-NEXT: lw t4, 40(a5)
-; RV64I-NEXT: lw t5, 44(a5)
-; RV64I-NEXT: lw t6, 48(a5)
-; RV64I-NEXT: lw s0, 52(a5)
-; RV64I-NEXT: lw s1, 68(a5)
-; RV64I-NEXT: lw s2, 64(a5)
-; RV64I-NEXT: lw s3, 60(a5)
-; RV64I-NEXT: lw s4, 56(a5)
-; RV64I-NEXT: sw s1, 68(a5)
-; RV64I-NEXT: sw s2, 64(a5)
-; RV64I-NEXT: sw s3, 60(a5)
-; RV64I-NEXT: sw s4, 56(a5)
-; RV64I-NEXT: sw s0, 52(a5)
-; RV64I-NEXT: sw t6, 48(a5)
-; RV64I-NEXT: sw t5, 44(a5)
-; RV64I-NEXT: sw t4, 40(a5)
-; RV64I-NEXT: sw t3, 36(a5)
-; RV64I-NEXT: sw t2, 32(a5)
+; RV64I-NEXT: lw t2, 48(a5)
+; RV64I-NEXT: lw t3, 52(a5)
+; RV64I-NEXT: lw t4, 56(a5)
+; RV64I-NEXT: lw t5, 60(a5)
+; RV64I-NEXT: lw t6, 64(a5)
+; RV64I-NEXT: lw s0, 68(a5)
+; RV64I-NEXT: lw s1, 32(a5)
+; RV64I-NEXT: lw s2, 36(a5)
+; RV64I-NEXT: lw s3, 40(a5)
+; RV64I-NEXT: lw s4, 44(a5)
+; RV64I-NEXT: sw s0, 68(a5)
+; RV64I-NEXT: sw t6, 64(a5)
+; RV64I-NEXT: sw t5, 60(a5)
+; RV64I-NEXT: sw t4, 56(a5)
+; RV64I-NEXT: sw t3, 52(a5)
+; RV64I-NEXT: sw t2, 48(a5)
+; RV64I-NEXT: sw s4, 44(a5)
+; RV64I-NEXT: sw s3, 40(a5)
+; RV64I-NEXT: sw s2, 36(a5)
+; RV64I-NEXT: sw s1, 32(a5)
; RV64I-NEXT: sw t1, 28(a5)
; RV64I-NEXT: sw t0, 24(a5)
; RV64I-NEXT: sw a7, 20(a5)
@@ -1813,16 +1813,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV32IZCMP-NEXT: sw t4, 44(sp) # 4-byte Folded Spill
; RV32IZCMP-NEXT: sw t5, 40(sp) # 4-byte Folded Spill
; RV32IZCMP-NEXT: sw t6, 36(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT: lui a6, %hi(var_test_irq)
-; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-NEXT: lui t0, %hi(var_test_irq)
+; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(t0)
; RV32IZCMP-NEXT: sw a0, 32(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a6)
+; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(t0)
; RV32IZCMP-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a6)
+; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(t0)
; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a6)
+; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(t0)
; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT: addi a5, a6, %lo(var_test_irq)
+; RV32IZCMP-NEXT: addi a5, t0, %lo(var_test_irq)
; RV32IZCMP-NEXT: lw a0, 16(a5)
; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
; RV32IZCMP-NEXT: lw a0, 20(a5)
@@ -1845,22 +1845,22 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV32IZCMP-NEXT: lw t3, 84(a5)
; RV32IZCMP-NEXT: lw t2, 88(a5)
; RV32IZCMP-NEXT: lw t1, 92(a5)
-; RV32IZCMP-NEXT: lw t0, 96(a5)
-; RV32IZCMP-NEXT: lw s0, 100(a5)
-; RV32IZCMP-NEXT: lw a7, 104(a5)
-; RV32IZCMP-NEXT: lw a4, 108(a5)
+; RV32IZCMP-NEXT: lw a7, 112(a5)
+; RV32IZCMP-NEXT: lw s0, 116(a5)
+; RV32IZCMP-NEXT: lw a3, 120(a5)
; RV32IZCMP-NEXT: lw a0, 124(a5)
-; RV32IZCMP-NEXT: lw a1, 120(a5)
-; RV32IZCMP-NEXT: lw a2, 116(a5)
-; RV32IZCMP-NEXT: lw a3, 112(a5)
+; RV32IZCMP-NEXT: lw a6, 96(a5)
+; RV32IZCMP-NEXT: lw a4, 100(a5)
+; RV32IZCMP-NEXT: lw a2, 104(a5)
+; RV32IZCMP-NEXT: lw a1, 108(a5)
; RV32IZCMP-NEXT: sw a0, 124(a5)
-; RV32IZCMP-NEXT: sw a1, 120(a5)
-; RV32IZCMP-NEXT: sw a2, 116(a5)
-; RV32IZCMP-NEXT: sw a3, 112(a5)
-; RV32IZCMP-NEXT: sw a4, 108(a5)
-; RV32IZCMP-NEXT: sw a7, 104(a5)
-; RV32IZCMP-NEXT: sw s0, 100(a5)
-; RV32IZCMP-NEXT: sw t0, 96(a5)
+; RV32IZCMP-NEXT: sw a3, 120(a5)
+; RV32IZCMP-NEXT: sw s0, 116(a5)
+; RV32IZCMP-NEXT: sw a7, 112(a5)
+; RV32IZCMP-NEXT: sw a1, 108(a5)
+; RV32IZCMP-NEXT: sw a2, 104(a5)
+; RV32IZCMP-NEXT: sw a4, 100(a5)
+; RV32IZCMP-NEXT: sw a6, 96(a5)
; RV32IZCMP-NEXT: sw t1, 92(a5)
; RV32IZCMP-NEXT: sw t2, 88(a5)
; RV32IZCMP-NEXT: sw t3, 84(a5)
@@ -1884,13 +1884,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
; RV32IZCMP-NEXT: sw a0, 16(a5)
; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a6)
+; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(t0)
; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a6)
+; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(t0)
; RV32IZCMP-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a6)
+; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(t0)
; RV32IZCMP-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(t0)
; RV32IZCMP-NEXT: lw t0, 92(sp) # 4-byte Folded Reload
; RV32IZCMP-NEXT: lw t1, 88(sp) # 4-byte Folded Reload
; RV32IZCMP-NEXT: lw t2, 84(sp) # 4-byte Folded Reload
@@ -1929,16 +1929,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV64IZCMP-NEXT: sd t4, 72(sp) # 8-byte Folded Spill
; RV64IZCMP-NEXT: sd t5, 64(sp) # 8-byte Folded Spill
; RV64IZCMP-NEXT: sd t6, 56(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT: lui a6, %hi(var_test_irq)
-; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-NEXT: lui t0, %hi(var_test_irq)
+; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(t0)
; RV64IZCMP-NEXT: sd a0, 48(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a6)
+; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(t0)
; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a6)
+; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(t0)
; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a6)
+; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(t0)
; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT: addi a5, a6, %lo(var_test_irq)
+; RV64IZCMP-NEXT: addi a5, t0, %lo(var_test_irq)
; RV64IZCMP-NEXT: lw a0, 16(a5)
; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
; RV64IZCMP-NEXT: lw a0, 20(a5)
@@ -1961,22 +1961,22 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV64IZCMP-NEXT: lw t3, 84(a5)
; RV64IZCMP-NEXT: lw t2, 88(a5)
; RV64IZCMP-NEXT: lw t1, 92(a5)
-; RV64IZCMP-NEXT: lw t0, 96(a5)
-; RV64IZCMP-NEXT: lw s0, 100(a5)
-; RV64IZCMP-NEXT: lw a7, 104(a5)
-; RV64IZCMP-NEXT: lw a4, 108(a5)
+; RV64IZCMP-NEXT: lw a7, 112(a5)
+; RV64IZCMP-NEXT: lw s0, 116(a5)
+; RV64IZCMP-NEXT: lw a3, 120(a5)
; RV64IZCMP-NEXT: lw a0, 124(a5)
-; RV64IZCMP-NEXT: lw a1, 120(a5)
-; RV64IZCMP-NEXT: lw a2, 116(a5)
-; RV64IZCMP-NEXT: lw a3, 112(a5)
+; RV64IZCMP-NEXT: lw a6, 96(a5)
+; RV64IZCMP-NEXT: lw a4, 100(a5)
+; RV64IZCMP-NEXT: lw a2, 104(a5)
+; RV64IZCMP-NEXT: lw a1, 108(a5)
; RV64IZCMP-NEXT: sw a0, 124(a5)
-; RV64IZCMP-NEXT: sw a1, 120(a5)
-; RV64IZCMP-NEXT: sw a2, 116(a5)
-; RV64IZCMP-NEXT: sw a3, 112(a5)
-; RV64IZCMP-NEXT: sw a4, 108(a5)
-; RV64IZCMP-NEXT: sw a7, 104(a5)
-; RV64IZCMP-NEXT: sw s0, 100(a5)
-; RV64IZCMP-NEXT: sw t0, 96(a5)
+; RV64IZCMP-NEXT: sw a3, 120(a5)
+; RV64IZCMP-NEXT: sw s0, 116(a5)
+; RV64IZCMP-NEXT: sw a7, 112(a5)
+; RV64IZCMP-NEXT: sw a1, 108(a5)
+; RV64IZCMP-NEXT: sw a2, 104(a5)
+; RV64IZCMP-NEXT: sw a4, 100(a5)
+; RV64IZCMP-NEXT: sw a6, 96(a5)
; RV64IZCMP-NEXT: sw t1, 92(a5)
; RV64IZCMP-NEXT: sw t2, 88(a5)
; RV64IZCMP-NEXT: sw t3, 84(a5)
@@ -2000,13 +2000,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
; RV64IZCMP-NEXT: sw a0, 16(a5)
; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a6)
+; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(t0)
; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a6)
+; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(t0)
; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a6)
+; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(t0)
; RV64IZCMP-NEXT: ld a0, 48(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(t0)
; RV64IZCMP-NEXT: ld t0, 168(sp) # 8-byte Folded Reload
; RV64IZCMP-NEXT: ld t1, 160(sp) # 8-byte Folded Reload
; RV64IZCMP-NEXT: ld t2, 152(sp) # 8-byte Folded Reload
@@ -2045,16 +2045,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV32IZCMP-SR-NEXT: sw t4, 44(sp) # 4-byte Folded Spill
; RV32IZCMP-SR-NEXT: sw t5, 40(sp) # 4-byte Folded Spill
; RV32IZCMP-SR-NEXT: sw t6, 36(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT: lui a6, %hi(var_test_irq)
-; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-SR-NEXT: lui t0, %hi(var_test_irq)
+; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(t0)
; RV32IZCMP-SR-NEXT: sw a0, 32(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(a6)
+; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(t0)
; RV32IZCMP-SR-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(a6)
+; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(t0)
; RV32IZCMP-SR-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(a6)
+; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(t0)
; RV32IZCMP-SR-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT: addi a5, a6, %lo(var_test_irq)
+; RV32IZCMP-SR-NEXT: addi a5, t0, %lo(var_test_irq)
; RV32IZCMP-SR-NEXT: lw a0, 16(a5)
; RV32IZCMP-SR-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
; RV32IZCMP-SR-NEXT: lw a0, 20(a5)
@@ -2077,22 +2077,22 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV32IZCMP-SR-NEXT: lw t3, 84(a5)
; RV32IZCMP-SR-NEXT: lw t2, 88(a5)
; RV32IZCMP-SR-NEXT: lw t1, 92(a5)
-; RV32IZCMP-SR-NEXT: lw t0, 96(a5)
-; RV32IZCMP-SR-NEXT: lw s0, 100(a5)
-; RV32IZCMP-SR-NEXT: lw a7, 104(a5)
-; RV32IZCMP-SR-NEXT: lw a4, 108(a5)
+; RV32IZCMP-SR-NEXT: lw a7, 112(a5)
+; RV32IZCMP-SR-NEXT: lw s0, 116(a5)
+; RV32IZCMP-SR-NEXT: lw a3, 120(a5)
; RV32IZCMP-SR-NEXT: lw a0, 124(a5)
-; RV32IZCMP-SR-NEXT: lw a1, 120(a5)
-; RV32IZCMP-SR-NEXT: lw a2, 116(a5)
-; RV32IZCMP-SR-NEXT: lw a3, 112(a5)
+; RV32IZCMP-SR-NEXT: lw a6, 96(a5)
+; RV32IZCMP-SR-NEXT: lw a4, 100(a5)
+; RV32IZCMP-SR-NEXT: lw a2, 104(a5)
+; RV32IZCMP-SR-NEXT: lw a1, 108(a5)
; RV32IZCMP-SR-NEXT: sw a0, 124(a5)
-; RV32IZCMP-SR-NEXT: sw a1, 120(a5)
-; RV32IZCMP-SR-NEXT: sw a2, 116(a5)
-; RV32IZCMP-SR-NEXT: sw a3, 112(a5)
-; RV32IZCMP-SR-NEXT: sw a4, 108(a5)
-; RV32IZCMP-SR-NEXT: sw a7, 104(a5)
-; RV32IZCMP-SR-NEXT: sw s0, 100(a5)
-; RV32IZCMP-SR-NEXT: sw t0, 96(a5)
+; RV32IZCMP-SR-NEXT: sw a3, 120(a5)
+; RV32IZCMP-SR-NEXT: sw s0, 116(a5)
+; RV32IZCMP-SR-NEXT: sw a7, 112(a5)
+; RV32IZCMP-SR-NEXT: sw a1, 108(a5)
+; RV32IZCMP-SR-NEXT: sw a2, 104(a5)
+; RV32IZCMP-SR-NEXT: sw a4, 100(a5)
+; RV32IZCMP-SR-NEXT: sw a6, 96(a5)
; RV32IZCMP-SR-NEXT: sw t1, 92(a5)
; RV32IZCMP-SR-NEXT: sw t2, 88(a5)
; RV32IZCMP-SR-NEXT: sw t3, 84(a5)
@@ -2116,13 +2116,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV32IZCMP-SR-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
; RV32IZCMP-SR-NEXT: sw a0, 16(a5)
; RV32IZCMP-SR-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(a6)
+; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(t0)
; RV32IZCMP-SR-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(a6)
+; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(t0)
; RV32IZCMP-SR-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(a6)
+; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(t0)
; RV32IZCMP-SR-NEXT: lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(t0)
; RV32IZCMP-SR-NEXT: lw t0, 92(sp) # 4-byte Folded Reload
; RV32IZCMP-SR-NEXT: lw t1, 88(sp) # 4-byte Folded Reload
; RV32IZCMP-SR-NEXT: lw t2, 84(sp) # 4-byte Folded Reload
@@ -2161,16 +2161,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV64IZCMP-SR-NEXT: sd t4, 72(sp) # 8-byte Folded Spill
; RV64IZCMP-SR-NEXT: sd t5, 64(sp) # 8-byte Folded Spill
; RV64IZCMP-SR-NEXT: sd t6, 56(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT: lui a6, %hi(var_test_irq)
-; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-SR-NEXT: lui t0, %hi(var_test_irq)
+; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(t0)
; RV64IZCMP-SR-NEXT: sd a0, 48(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(a6)
+; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(t0)
; RV64IZCMP-SR-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(a6)
+; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(t0)
; RV64IZCMP-SR-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(a6)
+; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(t0)
; RV64IZCMP-SR-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT: addi a5, a6, %lo(var_test_irq)
+; RV64IZCMP-SR-NEXT: addi a5, t0, %lo(var_test_irq)
; RV64IZCMP-SR-NEXT: lw a0, 16(a5)
; RV64IZCMP-SR-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
; RV64IZCMP-SR-NEXT: lw a0, 20(a5)
@@ -2193,22 +2193,22 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV64IZCMP-SR-NEXT: lw t3, 84(a5)
; RV64IZCMP-SR-NEXT: lw t2, 88(a5)
; RV64IZCMP-SR-NEXT: lw t1, 92(a5)
-; RV64IZCMP-SR-NEXT: lw t0, 96(a5)
-; RV64IZCMP-SR-NEXT: lw s0, 100(a5)
-; RV64IZCMP-SR-NEXT: lw a7, 104(a5)
-; RV64IZCMP-SR-NEXT: lw a4, 108(a5)
+; RV64IZCMP-SR-NEXT: lw a7, 112(a5)
+; RV64IZCMP-SR-NEXT: lw s0, 116(a5)
+; RV64IZCMP-SR-NEXT: lw a3, 120(a5)
; RV64IZCMP-SR-NEXT: lw a0, 124(a5)
-; RV64IZCMP-SR-NEXT: lw a1, 120(a5)
-; RV64IZCMP-SR-NEXT: lw a2, 116(a5)
-; RV64IZCMP-SR-NEXT: lw a3, 112(a5)
+; RV64IZCMP-SR-NEXT: lw a6, 96(a5)
+; RV64IZCMP-SR-NEXT: lw a4, 100(a5)
+; RV64IZCMP-SR-NEXT: lw a2, 104(a5)
+; RV64IZCMP-SR-NEXT: lw a1, 108(a5)
; RV64IZCMP-SR-NEXT: sw a0, 124(a5)
-; RV64IZCMP-SR-NEXT: sw a1, 120(a5)
-; RV64IZCMP-SR-NEXT: sw a2, 116(a5)
-; RV64IZCMP-SR-NEXT: sw a3, 112(a5)
-; RV64IZCMP-SR-NEXT: sw a4, 108(a5)
-; RV64IZCMP-SR-NEXT: sw a7, 104(a5)
-; RV64IZCMP-SR-NEXT: sw s0, 100(a5)
-; RV64IZCMP-SR-NEXT: sw t0, 96(a5)
+; RV64IZCMP-SR-NEXT: sw a3, 120(a5)
+; RV64IZCMP-SR-NEXT: sw s0, 116(a5)
+; RV64IZCMP-SR-NEXT: sw a7, 112(a5)
+; RV64IZCMP-SR-NEXT: sw a1, 108(a5)
+; RV64IZCMP-SR-NEXT: sw a2, 104(a5)
+; RV64IZCMP-SR-NEXT: sw a4, 100(a5)
+; RV64IZCMP-SR-NEXT: sw a6, 96(a5)
; RV64IZCMP-SR-NEXT: sw t1, 92(a5)
; RV64IZCMP-SR-NEXT: sw t2, 88(a5)
; RV64IZCMP-SR-NEXT: sw t3, 84(a5)
@@ -2232,13 +2232,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV64IZCMP-SR-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
; RV64IZCMP-SR-NEXT: sw a0, 16(a5)
; RV64IZCMP-SR-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(a6)
+; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(t0)
; RV64IZCMP-SR-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(a6)
+; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(t0)
; RV64IZCMP-SR-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(a6)
+; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(t0)
; RV64IZCMP-SR-NEXT: ld a0, 48(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(t0)
; RV64IZCMP-SR-NEXT: ld t0, 168(sp) # 8-byte Folded Reload
; RV64IZCMP-SR-NEXT: ld t1, 160(sp) # 8-byte Folded Reload
; RV64IZCMP-SR-NEXT: ld t2, 152(sp) # 8-byte Folded Reload
@@ -2289,16 +2289,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV32I-NEXT: sw t4, 40(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw t5, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw t6, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lui a6, %hi(var_test_irq)
-; RV32I-NEXT: lw a0, %lo(var_test_irq)(a6)
+; RV32I-NEXT: lui a7, %hi(var_test_irq)
+; RV32I-NEXT: lw a0, %lo(var_test_irq)(a7)
; RV32I-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a6)
+; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a7)
; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a6)
+; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a7)
; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a6)
+; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a7)
; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: addi a5, a6, %lo(var_test_irq)
+; RV32I-NEXT: addi a5, a7, %lo(var_test_irq)
; RV32I-NEXT: lw a0, 16(a5)
; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: lw a0, 20(a5)
@@ -2321,22 +2321,22 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV32I-NEXT: lw s8, 84(a5)
; RV32I-NEXT: lw s9, 88(a5)
; RV32I-NEXT: lw s10, 92(a5)
-; RV32I-NEXT: lw s11, 96(a5)
-; RV32I-NEXT: lw ra, 100(a5)
-; RV32I-NEXT: lw a7, 104(a5)
-; RV32I-NEXT: lw a4, 108(a5)
+; RV32I-NEXT: lw s11, 112(a5)
+; RV32I-NEXT: lw ra, 116(a5)
+; RV32I-NEXT: lw a3, 120(a5)
; RV32I-NEXT: lw a0, 124(a5)
-; RV32I-NEXT: lw a1, 120(a5)
-; RV32I-NEXT: lw a2, 116(a5)
-; RV32I-NEXT: lw a3, 112(a5)
+; RV32I-NEXT: lw a6, 96(a5)
+; RV32I-NEXT: lw a4, 100(a5)
+; RV32I-NEXT: lw a2, 104(a5)
+; RV32I-NEXT: lw a1, 108(a5)
; RV32I-NEXT: sw a0, 124(a5)
-; RV32I-NEXT: sw a1, 120(a5)
-; RV32I-NEXT: sw a2, 116(a5)
-; RV32I-NEXT: sw a3, 112(a5)
-; RV32I-NEXT: sw a4, 108(a5)
-; RV32I-NEXT: sw a7, 104(a5)
-; RV32I-NEXT: sw ra, 100(a5)
-; RV32I-NEXT: sw s11, 96(a5)
+; RV32I-NEXT: sw a3, 120(a5)
+; RV32I-NEXT: sw ra, 116(a5)
+; RV32I-NEXT: sw s11, 112(a5)
+; RV32I-NEXT: sw a1, 108(a5)
+; RV32I-NEXT: sw a2, 104(a5)
+; RV32I-NEXT: sw a4, 100(a5)
+; RV32I-NEXT: sw a6, 96(a5)
; RV32I-NEXT: sw s10, 92(a5)
; RV32I-NEXT: sw s9, 88(a5)
; RV32I-NEXT: sw s8, 84(a5)
@@ -2360,13 +2360,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: sw a0, 16(a5)
; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a6)
+; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a7)
; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a6)
+; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a7)
; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a6)
+; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a7)
; RV32I-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a0, %lo(var_test_irq)(a6)
+; RV32I-NEXT: sw a0, %lo(var_test_irq)(a7)
; RV32I-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw t0, 136(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw t1, 132(sp) # 4-byte Folded Reload
@@ -2429,16 +2429,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV64I-NEXT: sd t4, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd t5, 56(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd t6, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lui a6, %hi(var_test_irq)
-; RV64I-NEXT: lw a0, %lo(var_test_irq)(a6)
+; RV64I-NEXT: lui a7, %hi(var_test_irq)
+; RV64I-NEXT: lw a0, %lo(var_test_irq)(a7)
; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a6)
+; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a7)
; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a6)
+; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a7)
; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a6)
+; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a7)
; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT: addi a5, a6, %lo(var_test_irq)
+; RV64I-NEXT: addi a5, a7, %lo(var_test_irq)
; RV64I-NEXT: lw a0, 16(a5)
; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: lw a0, 20(a5)
@@ -2461,22 +2461,22 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV64I-NEXT: lw s8, 84(a5)
; RV64I-NEXT: lw s9, 88(a5)
; RV64I-NEXT: lw s10, 92(a5)
-; RV64I-NEXT: lw s11, 96(a5)
-; RV64I-NEXT: lw ra, 100(a5)
-; RV64I-NEXT: lw a7, 104(a5)
-; RV64I-NEXT: lw a4, 108(a5)
+; RV64I-NEXT: lw s11, 112(a5)
+; RV64I-NEXT: lw ra, 116(a5)
+; RV64I-NEXT: lw a3, 120(a5)
; RV64I-NEXT: lw a0, 124(a5)
-; RV64I-NEXT: lw a1, 120(a5)
-; RV64I-NEXT: lw a2, 116(a5)
-; RV64I-NEXT: lw a3, 112(a5)
+; RV64I-NEXT: lw a6, 96(a5)
+; RV64I-NEXT: lw a4, 100(a5)
+; RV64I-NEXT: lw a2, 104(a5)
+; RV64I-NEXT: lw a1, 108(a5)
; RV64I-NEXT: sw a0, 124(a5)
-; RV64I-NEXT: sw a1, 120(a5)
-; RV64I-NEXT: sw a2, 116(a5)
-; RV64I-NEXT: sw a3, 112(a5)
-; RV64I-NEXT: sw a4, 108(a5)
-; RV64I-NEXT: sw a7, 104(a5)
-; RV64I-NEXT: sw ra, 100(a5)
-; RV64I-NEXT: sw s11, 96(a5)
+; RV64I-NEXT: sw a3, 120(a5)
+; RV64I-NEXT: sw ra, 116(a5)
+; RV64I-NEXT: sw s11, 112(a5)
+; RV64I-NEXT: sw a1, 108(a5)
+; RV64I-NEXT: sw a2, 104(a5)
+; RV64I-NEXT: sw a4, 100(a5)
+; RV64I-NEXT: sw a6, 96(a5)
; RV64I-NEXT: sw s10, 92(a5)
; RV64I-NEXT: sw s9, 88(a5)
; RV64I-NEXT: sw s8, 84(a5)
@@ -2500,13 +2500,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: sw a0, 16(a5)
; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a6)
+; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a7)
; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a6)
+; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a7)
; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a6)
+; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a7)
; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sw a0, %lo(var_test_irq)(a6)
+; RV64I-NEXT: sw a0, %lo(var_test_irq)(a7)
; RV64I-NEXT: ld ra, 264(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld t0, 256(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld t1, 248(sp) # 8-byte Folded Reload
@@ -2546,16 +2546,16 @@ define void @callee_no_irq() nounwind{
; RV32IZCMP-LABEL: callee_no_irq:
; RV32IZCMP: # %bb.0:
; RV32IZCMP-NEXT: cm.push {ra, s0-s11}, -96
-; RV32IZCMP-NEXT: lui a6, %hi(var_test_irq)
-; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-NEXT: lui t0, %hi(var_test_irq)
+; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(t0)
; RV32IZCMP-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a6)
+; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(t0)
; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a6)
+; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(t0)
; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a6)
+; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(t0)
; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT: addi a5, a6, %lo(var_test_irq)
+; RV32IZCMP-NEXT: addi a5, t0, %lo(var_test_irq)
; RV32IZCMP-NEXT: lw a0, 16(a5)
; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
; RV32IZCMP-NEXT: lw a0, 20(a5)
@@ -2578,22 +2578,22 @@ define void @callee_no_irq() nounwind{
; RV32IZCMP-NEXT: lw t3, 84(a5)
; RV32IZCMP-NEXT: lw t2, 88(a5)
; RV32IZCMP-NEXT: lw t1, 92(a5)
-; RV32IZCMP-NEXT: lw t0, 96(a5)
-; RV32IZCMP-NEXT: lw s0, 100(a5)
-; RV32IZCMP-NEXT: lw a7, 104(a5)
-; RV32IZCMP-NEXT: lw a4, 108(a5)
+; RV32IZCMP-NEXT: lw a7, 112(a5)
+; RV32IZCMP-NEXT: lw s0, 116(a5)
+; RV32IZCMP-NEXT: lw a3, 120(a5)
; RV32IZCMP-NEXT: lw a0, 124(a5)
-; RV32IZCMP-NEXT: lw a1, 120(a5)
-; RV32IZCMP-NEXT: lw a2, 116(a5)
-; RV32IZCMP-NEXT: lw a3, 112(a5)
+; RV32IZCMP-NEXT: lw a6, 96(a5)
+; RV32IZCMP-NEXT: lw a4, 100(a5)
+; RV32IZCMP-NEXT: lw a2, 104(a5)
+; RV32IZCMP-NEXT: lw a1, 108(a5)
; RV32IZCMP-NEXT: sw a0, 124(a5)
-; RV32IZCMP-NEXT: sw a1, 120(a5)
-; RV32IZCMP-NEXT: sw a2, 116(a5)
-; RV32IZCMP-NEXT: sw a3, 112(a5)
-; RV32IZCMP-NEXT: sw a4, 108(a5)
-; RV32IZCMP-NEXT: sw a7, 104(a5)
-; RV32IZCMP-NEXT: sw s0, 100(a5)
-; RV32IZCMP-NEXT: sw t0, 96(a5)
+; RV32IZCMP-NEXT: sw a3, 120(a5)
+; RV32IZCMP-NEXT: sw s0, 116(a5)
+; RV32IZCMP-NEXT: sw a7, 112(a5)
+; RV32IZCMP-NEXT: sw a1, 108(a5)
+; RV32IZCMP-NEXT: sw a2, 104(a5)
+; RV32IZCMP-NEXT: sw a4, 100(a5)
+; RV32IZCMP-NEXT: sw a6, 96(a5)
; RV32IZCMP-NEXT: sw t1, 92(a5)
; RV32IZCMP-NEXT: sw t2, 88(a5)
; RV32IZCMP-NEXT: sw t3, 84(a5)
@@ -2617,28 +2617,28 @@ define void @callee_no_irq() nounwind{
; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
; RV32IZCMP-NEXT: sw a0, 16(a5)
; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a6)
+; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(t0)
; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a6)
+; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(t0)
; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a6)
+; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(t0)
; RV32IZCMP-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(t0)
; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 96
;
; RV64IZCMP-LABEL: callee_no_irq:
; RV64IZCMP: # %bb.0:
; RV64IZCMP-NEXT: cm.push {ra, s0-s11}, -160
-; RV64IZCMP-NEXT: lui a6, %hi(var_test_irq)
-; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-NEXT: lui t0, %hi(var_test_irq)
+; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(t0)
; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a6)
+; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(t0)
; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a6)
+; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(t0)
; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a6)
+; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(t0)
; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT: addi a5, a6, %lo(var_test_irq)
+; RV64IZCMP-NEXT: addi a5, t0, %lo(var_test_irq)
; RV64IZCMP-NEXT: lw a0, 16(a5)
; RV64IZCMP-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
; RV64IZCMP-NEXT: lw a0, 20(a5)
@@ -2661,22 +2661,22 @@ define void @callee_no_irq() nounwind{
; RV64IZCMP-NEXT: lw t3, 84(a5)
; RV64IZCMP-NEXT: lw t2, 88(a5)
; RV64IZCMP-NEXT: lw t1, 92(a5)
-; RV64IZCMP-NEXT: lw t0, 96(a5)
-; RV64IZCMP-NEXT: lw s0, 100(a5)
-; RV64IZCMP-NEXT: lw a7, 104(a5)
-; RV64IZCMP-NEXT: lw a4, 108(a5)
+; RV64IZCMP-NEXT: lw a7, 112(a5)
+; RV64IZCMP-NEXT: lw s0, 116(a5)
+; RV64IZCMP-NEXT: lw a3, 120(a5)
; RV64IZCMP-NEXT: lw a0, 124(a5)
-; RV64IZCMP-NEXT: lw a1, 120(a5)
-; RV64IZCMP-NEXT: lw a2, 116(a5)
-; RV64IZCMP-NEXT: lw a3, 112(a5)
+; RV64IZCMP-NEXT: lw a6, 96(a5)
+; RV64IZCMP-NEXT: lw a4, 100(a5)
+; RV64IZCMP-NEXT: lw a2, 104(a5)
+; RV64IZCMP-NEXT: lw a1, 108(a5)
; RV64IZCMP-NEXT: sw a0, 124(a5)
-; RV64IZCMP-NEXT: sw a1, 120(a5)
-; RV64IZCMP-NEXT: sw a2, 116(a5)
-; RV64IZCMP-NEXT: sw a3, 112(a5)
-; RV64IZCMP-NEXT: sw a4, 108(a5)
-; RV64IZCMP-NEXT: sw a7, 104(a5)
-; RV64IZCMP-NEXT: sw s0, 100(a5)
-; RV64IZCMP-NEXT: sw t0, 96(a5)
+; RV64IZCMP-NEXT: sw a3, 120(a5)
+; RV64IZCMP-NEXT: sw s0, 116(a5)
+; RV64IZCMP-NEXT: sw a7, 112(a5)
+; RV64IZCMP-NEXT: sw a1, 108(a5)
+; RV64IZCMP-NEXT: sw a2, 104(a5)
+; RV64IZCMP-NEXT: sw a4, 100(a5)
+; RV64IZCMP-NEXT: sw a6, 96(a5)
; RV64IZCMP-NEXT: sw t1, 92(a5)
; RV64IZCMP-NEXT: sw t2, 88(a5)
; RV64IZCMP-NEXT: sw t3, 84(a5)
@@ -2700,28 +2700,28 @@ define void @callee_no_irq() nounwind{
; RV64IZCMP-NEXT: ld a0, 8(sp) # 8-byte Folded Reload
; RV64IZCMP-NEXT: sw a0, 16(a5)
; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a6)
+; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(t0)
; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a6)
+; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(t0)
; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a6)
+; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(t0)
; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(t0)
; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160
;
; RV32IZCMP-SR-LABEL: callee_no_irq:
; RV32IZCMP-SR: # %bb.0:
; RV32IZCMP-SR-NEXT: cm.push {ra, s0-s11}, -96
-; RV32IZCMP-SR-NEXT: lui a6, %hi(var_test_irq)
-; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-SR-NEXT: lui t0, %hi(var_test_irq)
+; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(t0)
; RV32IZCMP-SR-NEXT: sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(a6)
+; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(t0)
; RV32IZCMP-SR-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(a6)
+; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(t0)
; RV32IZCMP-SR-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(a6)
+; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(t0)
; RV32IZCMP-SR-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT: addi a5, a6, %lo(var_test_irq)
+; RV32IZCMP-SR-NEXT: addi a5, t0, %lo(var_test_irq)
; RV32IZCMP-SR-NEXT: lw a0, 16(a5)
; RV32IZCMP-SR-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
; RV32IZCMP-SR-NEXT: lw a0, 20(a5)
@@ -2744,22 +2744,22 @@ define void @callee_no_irq() nounwind{
; RV32IZCMP-SR-NEXT: lw t3, 84(a5)
; RV32IZCMP-SR-NEXT: lw t2, 88(a5)
; RV32IZCMP-SR-NEXT: lw t1, 92(a5)
-; RV32IZCMP-SR-NEXT: lw t0, 96(a5)
-; RV32IZCMP-SR-NEXT: lw s0, 100(a5)
-; RV32IZCMP-SR-NEXT: lw a7, 104(a5)
-; RV32IZCMP-SR-NEXT: lw a4, 108(a5)
+; RV32IZCMP-SR-NEXT: lw a7, 112(a5)
+; RV32IZCMP-SR-NEXT: lw s0, 116(a5)
+; RV32IZCMP-SR-NEXT: lw a3, 120(a5)
; RV32IZCMP-SR-NEXT: lw a0, 124(a5)
-; RV32IZCMP-SR-NEXT: lw a1, 120(a5)
-; RV32IZCMP-SR-NEXT: lw a2, 116(a5)
-; RV32IZCMP-SR-NEXT: lw a3, 112(a5)
+; RV32IZCMP-SR-NEXT: lw a6, 96(a5)
+; RV32IZCMP-SR-NEXT: lw a4, 100(a5)
+; RV32IZCMP-SR-NEXT: lw a2, 104(a5)
+; RV32IZCMP-SR-NEXT: lw a1, 108(a5)
; RV32IZCMP-SR-NEXT: sw a0, 124(a5)
-; RV32IZCMP-SR-NEXT: sw a1, 120(a5)
-; RV32IZCMP-SR-NEXT: sw a2, 116(a5)
-; RV32IZCMP-SR-NEXT: sw a3, 112(a5)
-; RV32IZCMP-SR-NEXT: sw a4, 108(a5)
-; RV32IZCMP-SR-NEXT: sw a7, 104(a5)
-; RV32IZCMP-SR-NEXT: sw s0, 100(a5)
-; RV32IZCMP-SR-NEXT: sw t0, 96(a5)
+; RV32IZCMP-SR-NEXT: sw a3, 120(a5)
+; RV32IZCMP-SR-NEXT: sw s0, 116(a5)
+; RV32IZCMP-SR-NEXT: sw a7, 112(a5)
+; RV32IZCMP-SR-NEXT: sw a1, 108(a5)
+; RV32IZCMP-SR-NEXT: sw a2, 104(a5)
+; RV32IZCMP-SR-NEXT: sw a4, 100(a5)
+; RV32IZCMP-SR-NEXT: sw a6, 96(a5)
; RV32IZCMP-SR-NEXT: sw t1, 92(a5)
; RV32IZCMP-SR-NEXT: sw t2, 88(a5)
; RV32IZCMP-SR-NEXT: sw t3, 84(a5)
@@ -2783,28 +2783,28 @@ define void @callee_no_irq() nounwind{
; RV32IZCMP-SR-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
; RV32IZCMP-SR-NEXT: sw a0, 16(a5)
; RV32IZCMP-SR-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(a6)
+; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(t0)
; RV32IZCMP-SR-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(a6)
+; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(t0)
; RV32IZCMP-SR-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(a6)
+; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(t0)
; RV32IZCMP-SR-NEXT: lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(t0)
; RV32IZCMP-SR-NEXT: cm.popret {ra, s0-s11}, 96
;
; RV64IZCMP-SR-LABEL: callee_no_irq:
; RV64IZCMP-SR: # %bb.0:
; RV64IZCMP-SR-NEXT: cm.push {ra, s0-s11}, -160
-; RV64IZCMP-SR-NEXT: lui a6, %hi(var_test_irq)
-; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-SR-NEXT: lui t0, %hi(var_test_irq)
+; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(t0)
; RV64IZCMP-SR-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(a6)
+; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(t0)
; RV64IZCMP-SR-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(a6)
+; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(t0)
; RV64IZCMP-SR-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(a6)
+; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(t0)
; RV64IZCMP-SR-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT: addi a5, a6, %lo(var_test_irq)
+; RV64IZCMP-SR-NEXT: addi a5, t0, %lo(var_test_irq)
; RV64IZCMP-SR-NEXT: lw a0, 16(a5)
; RV64IZCMP-SR-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
; RV64IZCMP-SR-NEXT: lw a0, 20(a5)
@@ -2827,22 +2827,22 @@ define void @callee_no_irq() nounwind{
; RV64IZCMP-SR-NEXT: lw t3, 84(a5)
; RV64IZCMP-SR-NEXT: lw t2, 88(a5)
; RV64IZCMP-SR-NEXT: lw t1, 92(a5)
-; RV64IZCMP-SR-NEXT: lw t0, 96(a5)
-; RV64IZCMP-SR-NEXT: lw s0, 100(a5)
-; RV64IZCMP-SR-NEXT: lw a7, 104(a5)
-; RV64IZCMP-SR-NEXT: lw a4, 108(a5)
+; RV64IZCMP-SR-NEXT: lw a7, 112(a5)
+; RV64IZCMP-SR-NEXT: lw s0, 116(a5)
+; RV64IZCMP-SR-NEXT: lw a3, 120(a5)
; RV64IZCMP-SR-NEXT: lw a0, 124(a5)
-; RV64IZCMP-SR-NEXT: lw a1, 120(a5)
-; RV64IZCMP-SR-NEXT: lw a2, 116(a5)
-; RV64IZCMP-SR-NEXT: lw a3, 112(a5)
+; RV64IZCMP-SR-NEXT: lw a6, 96(a5)
+; RV64IZCMP-SR-NEXT: lw a4, 100(a5)
+; RV64IZCMP-SR-NEXT: lw a2, 104(a5)
+; RV64IZCMP-SR-NEXT: lw a1, 108(a5)
; RV64IZCMP-SR-NEXT: sw a0, 124(a5)
-; RV64IZCMP-SR-NEXT: sw a1, 120(a5)
-; RV64IZCMP-SR-NEXT: sw a2, 116(a5)
-; RV64IZCMP-SR-NEXT: sw a3, 112(a5)
-; RV64IZCMP-SR-NEXT: sw a4, 108(a5)
-; RV64IZCMP-SR-NEXT: sw a7, 104(a5)
-; RV64IZCMP-SR-NEXT: sw s0, 100(a5)
-; RV64IZCMP-SR-NEXT: sw t0, 96(a5)
+; RV64IZCMP-SR-NEXT: sw a3, 120(a5)
+; RV64IZCMP-SR-NEXT: sw s0, 116(a5)
+; RV64IZCMP-SR-NEXT: sw a7, 112(a5)
+; RV64IZCMP-SR-NEXT: sw a1, 108(a5)
+; RV64IZCMP-SR-NEXT: sw a2, 104(a5)
+; RV64IZCMP-SR-NEXT: sw a4, 100(a5)
+; RV64IZCMP-SR-NEXT: sw a6, 96(a5)
; RV64IZCMP-SR-NEXT: sw t1, 92(a5)
; RV64IZCMP-SR-NEXT: sw t2, 88(a5)
; RV64IZCMP-SR-NEXT: sw t3, 84(a5)
@@ -2866,13 +2866,13 @@ define void @callee_no_irq() nounwind{
; RV64IZCMP-SR-NEXT: ld a0, 8(sp) # 8-byte Folded Reload
; RV64IZCMP-SR-NEXT: sw a0, 16(a5)
; RV64IZCMP-SR-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(a6)
+; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(t0)
; RV64IZCMP-SR-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(a6)
+; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(t0)
; RV64IZCMP-SR-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(a6)
+; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(t0)
; RV64IZCMP-SR-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(t0)
; RV64IZCMP-SR-NEXT: cm.popret {ra, s0-s11}, 160
;
; RV32I-LABEL: callee_no_irq:
@@ -2891,16 +2891,16 @@ define void @callee_no_irq() nounwind{
; RV32I-NEXT: sw s9, 36(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s10, 32(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s11, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lui a6, %hi(var_test_irq)
-; RV32I-NEXT: lw a0, %lo(var_test_irq)(a6)
+; RV32I-NEXT: lui a7, %hi(var_test_irq)
+; RV32I-NEXT: lw a0, %lo(var_test_irq)(a7)
; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a6)
+; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a7)
; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a6)
+; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a7)
; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a6)
+; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a7)
; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: addi a5, a6, %lo(var_test_irq)
+; RV32I-NEXT: addi a5, a7, %lo(var_test_irq)
; RV32I-NEXT: lw a0, 16(a5)
; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: lw a0, 20(a5)
@@ -2923,22 +2923,22 @@ define void @callee_no_irq() nounwind{
; RV32I-NEXT: lw s8, 84(a5)
; RV32I-NEXT: lw s9, 88(a5)
; RV32I-NEXT: lw s10, 92(a5)
-; RV32I-NEXT: lw s11, 96(a5)
-; RV32I-NEXT: lw ra, 100(a5)
-; RV32I-NEXT: lw a7, 104(a5)
-; RV32I-NEXT: lw a4, 108(a5)
+; RV32I-NEXT: lw s11, 112(a5)
+; RV32I-NEXT: lw ra, 116(a5)
+; RV32I-NEXT: lw a3, 120(a5)
; RV32I-NEXT: lw a0, 124(a5)
-; RV32I-NEXT: lw a1, 120(a5)
-; RV32I-NEXT: lw a2, 116(a5)
-; RV32I-NEXT: lw a3, 112(a5)
+; RV32I-NEXT: lw a6, 96(a5)
+; RV32I-NEXT: lw a4, 100(a5)
+; RV32I-NEXT: lw a2, 104(a5)
+; RV32I-NEXT: lw a1, 108(a5)
; RV32I-NEXT: sw a0, 124(a5)
-; RV32I-NEXT: sw a1, 120(a5)
-; RV32I-NEXT: sw a2, 116(a5)
-; RV32I-NEXT: sw a3, 112(a5)
-; RV32I-NEXT: sw a4, 108(a5)
-; RV32I-NEXT: sw a7, 104(a5)
-; RV32I-NEXT: sw ra, 100(a5)
-; RV32I-NEXT: sw s11, 96(a5)
+; RV32I-NEXT: sw a3, 120(a5)
+; RV32I-NEXT: sw ra, 116(a5)
+; RV32I-NEXT: sw s11, 112(a5)
+; RV32I-NEXT: sw a1, 108(a5)
+; RV32I-NEXT: sw a2, 104(a5)
+; RV32I-NEXT: sw a4, 100(a5)
+; RV32I-NEXT: sw a6, 96(a5)
; RV32I-NEXT: sw s10, 92(a5)
; RV32I-NEXT: sw s9, 88(a5)
; RV32I-NEXT: sw s8, 84(a5)
@@ -2962,13 +2962,13 @@ define void @callee_no_irq() nounwind{
; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: sw a0, 16(a5)
; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a6)
+; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a7)
; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a6)
+; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a7)
; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a6)
+; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a7)
; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT: sw a0, %lo(var_test_irq)(a6)
+; RV32I-NEXT: sw a0, %lo(var_test_irq)(a7)
; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload
@@ -3001,16 +3001,16 @@ define void @callee_no_irq() nounwind{
; RV64I-NEXT: sd s9, 72(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s10, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s11, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lui a6, %hi(var_test_irq)
-; RV64I-NEXT: lw a0, %lo(var_test_irq)(a6)
+; RV64I-NEXT: lui a7, %hi(var_test_irq)
+; RV64I-NEXT: lw a0, %lo(var_test_irq)(a7)
; RV64I-NEXT: sd a0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a6)
+; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a7)
; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a6)
+; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a7)
; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a6)
+; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a7)
; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT: addi a5, a6, %lo(var_test_irq)
+; RV64I-NEXT: addi a5, a7, %lo(var_test_irq)
; RV64I-NEXT: lw a0, 16(a5)
; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: lw a0, 20(a5)
@@ -3033,22 +3033,22 @@ define void @callee_no_irq() nounwind{
; RV64I-NEXT: lw s8, 84(a5)
; RV64I-NEXT: lw s9, 88(a5)
; RV64I-NEXT: lw s10, 92(a5)
-; RV64I-NEXT: lw s11, 96(a5)
-; RV64I-NEXT: lw ra, 100(a5)
-; RV64I-NEXT: lw a7, 104(a5)
-; RV64I-NEXT: lw a4, 108(a5)
+; RV64I-NEXT: lw s11, 112(a5)
+; RV64I-NEXT: lw ra, 116(a5)
+; RV64I-NEXT: lw a3, 120(a5)
; RV64I-NEXT: lw a0, 124(a5)
-; RV64I-NEXT: lw a1, 120(a5)
-; RV64I-NEXT: lw a2, 116(a5)
-; RV64I-NEXT: lw a3, 112(a5)
+; RV64I-NEXT: lw a6, 96(a5)
+; RV64I-NEXT: lw a4, 100(a5)
+; RV64I-NEXT: lw a2, 104(a5)
+; RV64I-NEXT: lw a1, 108(a5)
; RV64I-NEXT: sw a0, 124(a5)
-; RV64I-NEXT: sw a1, 120(a5)
-; RV64I-NEXT: sw a2, 116(a5)
-; RV64I-NEXT: sw a3, 112(a5)
-; RV64I-NEXT: sw a4, 108(a5)
-; RV64I-NEXT: sw a7, 104(a5)
-; RV64I-NEXT: sw ra, 100(a5)
-; RV64I-NEXT: sw s11, 96(a5)
+; RV64I-NEXT: sw a3, 120(a5)
+; RV64I-NEXT: sw ra, 116(a5)
+; RV64I-NEXT: sw s11, 112(a5)
+; RV64I-NEXT: sw a1, 108(a5)
+; RV64I-NEXT: sw a2, 104(a5)
+; RV64I-NEXT: sw a4, 100(a5)
+; RV64I-NEXT: sw a6, 96(a5)
; RV64I-NEXT: sw s10, 92(a5)
; RV64I-NEXT: sw s9, 88(a5)
; RV64I-NEXT: sw s8, 84(a5)
@@ -3072,13 +3072,13 @@ define void @callee_no_irq() nounwind{
; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: sw a0, 16(a5)
; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a6)
+; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a7)
; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a6)
+; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a7)
; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a6)
+; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a7)
; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT: sw a0, %lo(var_test_irq)(a6)
+; RV64I-NEXT: sw a0, %lo(var_test_irq)(a7)
; RV64I-NEXT: ld ra, 152(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 144(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 136(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/reduction-formation.ll b/llvm/test/CodeGen/RISCV/reduction-formation.ll
index 6b4dc0cd3699e3..ced3a38ab5ea00 100644
--- a/llvm/test/CodeGen/RISCV/reduction-formation.ll
+++ b/llvm/test/CodeGen/RISCV/reduction-formation.ll
@@ -8,24 +8,24 @@
define i32 @reduce_sum_4xi32(<4 x i32> %v) {
; RV32-LABEL: reduce_sum_4xi32:
; RV32: # %bb.0:
-; RV32-NEXT: lw a1, 12(a0)
+; RV32-NEXT: lw a1, 0(a0)
; RV32-NEXT: lw a2, 4(a0)
-; RV32-NEXT: lw a3, 0(a0)
-; RV32-NEXT: lw a0, 8(a0)
-; RV32-NEXT: add a2, a3, a2
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: add a0, a2, a0
+; RV32-NEXT: lw a3, 8(a0)
+; RV32-NEXT: lw a0, 12(a0)
+; RV32-NEXT: add a1, a1, a2
+; RV32-NEXT: add a0, a3, a0
+; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: reduce_sum_4xi32:
; RV64: # %bb.0:
-; RV64-NEXT: lw a1, 24(a0)
+; RV64-NEXT: lw a1, 0(a0)
; RV64-NEXT: lw a2, 8(a0)
-; RV64-NEXT: lw a3, 0(a0)
-; RV64-NEXT: lw a0, 16(a0)
-; RV64-NEXT: add a2, a3, a2
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: addw a0, a2, a0
+; RV64-NEXT: lw a3, 16(a0)
+; RV64-NEXT: lw a0, 24(a0)
+; RV64-NEXT: add a1, a1, a2
+; RV64-NEXT: add a0, a3, a0
+; RV64-NEXT: addw a0, a1, a0
; RV64-NEXT: ret
%e0 = extractelement <4 x i32> %v, i32 0
%e1 = extractelement <4 x i32> %v, i32 1
@@ -40,24 +40,24 @@ define i32 @reduce_sum_4xi32(<4 x i32> %v) {
define i32 @reduce_xor_4xi32(<4 x i32> %v) {
; RV32-LABEL: reduce_xor_4xi32:
; RV32: # %bb.0:
-; RV32-NEXT: lw a1, 12(a0)
+; RV32-NEXT: lw a1, 0(a0)
; RV32-NEXT: lw a2, 4(a0)
-; RV32-NEXT: lw a3, 0(a0)
-; RV32-NEXT: lw a0, 8(a0)
-; RV32-NEXT: xor a2, a3, a2
-; RV32-NEXT: xor a0, a0, a1
-; RV32-NEXT: xor a0, a2, a0
+; RV32-NEXT: lw a3, 8(a0)
+; RV32-NEXT: lw a0, 12(a0)
+; RV32-NEXT: xor a1, a1, a2
+; RV32-NEXT: xor a0, a3, a0
+; RV32-NEXT: xor a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: reduce_xor_4xi32:
; RV64: # %bb.0:
-; RV64-NEXT: ld a1, 24(a0)
+; RV64-NEXT: ld a1, 0(a0)
; RV64-NEXT: ld a2, 8(a0)
-; RV64-NEXT: ld a3, 0(a0)
-; RV64-NEXT: ld a0, 16(a0)
-; RV64-NEXT: xor a2, a3, a2
-; RV64-NEXT: xor a0, a0, a1
-; RV64-NEXT: xor a0, a2, a0
+; RV64-NEXT: ld a3, 16(a0)
+; RV64-NEXT: ld a0, 24(a0)
+; RV64-NEXT: xor a1, a1, a2
+; RV64-NEXT: xor a0, a3, a0
+; RV64-NEXT: xor a0, a1, a0
; RV64-NEXT: ret
%e0 = extractelement <4 x i32> %v, i32 0
%e1 = extractelement <4 x i32> %v, i32 1
@@ -72,24 +72,24 @@ define i32 @reduce_xor_4xi32(<4 x i32> %v) {
define i32 @reduce_or_4xi32(<4 x i32> %v) {
; RV32-LABEL: reduce_or_4xi32:
; RV32: # %bb.0:
-; RV32-NEXT: lw a1, 12(a0)
+; RV32-NEXT: lw a1, 0(a0)
; RV32-NEXT: lw a2, 4(a0)
-; RV32-NEXT: lw a3, 0(a0)
-; RV32-NEXT: lw a0, 8(a0)
-; RV32-NEXT: or a2, a3, a2
-; RV32-NEXT: or a0, a0, a1
-; RV32-NEXT: or a0, a2, a0
+; RV32-NEXT: lw a3, 8(a0)
+; RV32-NEXT: lw a0, 12(a0)
+; RV32-NEXT: or a1, a1, a2
+; RV32-NEXT: or a0, a3, a0
+; RV32-NEXT: or a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: reduce_or_4xi32:
; RV64: # %bb.0:
-; RV64-NEXT: ld a1, 24(a0)
+; RV64-NEXT: ld a1, 0(a0)
; RV64-NEXT: ld a2, 8(a0)
-; RV64-NEXT: ld a3, 0(a0)
-; RV64-NEXT: ld a0, 16(a0)
-; RV64-NEXT: or a2, a3, a2
-; RV64-NEXT: or a0, a0, a1
-; RV64-NEXT: or a0, a2, a0
+; RV64-NEXT: ld a3, 16(a0)
+; RV64-NEXT: ld a0, 24(a0)
+; RV64-NEXT: or a1, a1, a2
+; RV64-NEXT: or a0, a3, a0
+; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: ret
%e0 = extractelement <4 x i32> %v, i32 0
%e1 = extractelement <4 x i32> %v, i32 1
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index fa320f53cec6ce..e24b1b41645cdf 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -683,9 +683,9 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind {
; RV32I-LABEL: ctpop_v2i64:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a3, 4(a1)
+; RV32I-NEXT: lw a4, 0(a1)
; RV32I-NEXT: lw a2, 8(a1)
-; RV32I-NEXT: lw a4, 12(a1)
-; RV32I-NEXT: lw a1, 0(a1)
+; RV32I-NEXT: lw a1, 12(a1)
; RV32I-NEXT: srli a5, a3, 1
; RV32I-NEXT: lui a6, 349525
; RV32I-NEXT: addi a6, a6, 1365
@@ -707,37 +707,37 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind {
; RV32I-NEXT: slli t0, a3, 16
; RV32I-NEXT: add a3, a3, t0
; RV32I-NEXT: srli a3, a3, 24
-; RV32I-NEXT: srli t0, a1, 1
+; RV32I-NEXT: srli t0, a4, 1
; RV32I-NEXT: and t0, t0, a6
-; RV32I-NEXT: sub a1, a1, t0
-; RV32I-NEXT: and t0, a1, a5
+; RV32I-NEXT: sub a4, a4, t0
+; RV32I-NEXT: and t0, a4, a5
+; RV32I-NEXT: srli a4, a4, 2
+; RV32I-NEXT: and a4, a4, a5
+; RV32I-NEXT: add a4, t0, a4
+; RV32I-NEXT: srli t0, a4, 4
+; RV32I-NEXT: add a4, a4, t0
+; RV32I-NEXT: and a4, a4, a7
+; RV32I-NEXT: slli t0, a4, 8
+; RV32I-NEXT: add a4, a4, t0
+; RV32I-NEXT: slli t0, a4, 16
+; RV32I-NEXT: add a4, a4, t0
+; RV32I-NEXT: srli a4, a4, 24
+; RV32I-NEXT: add a3, a4, a3
+; RV32I-NEXT: srli a4, a1, 1
+; RV32I-NEXT: and a4, a4, a6
+; RV32I-NEXT: sub a1, a1, a4
+; RV32I-NEXT: and a4, a1, a5
; RV32I-NEXT: srli a1, a1, 2
; RV32I-NEXT: and a1, a1, a5
-; RV32I-NEXT: add a1, t0, a1
-; RV32I-NEXT: srli t0, a1, 4
-; RV32I-NEXT: add a1, a1, t0
+; RV32I-NEXT: add a1, a4, a1
+; RV32I-NEXT: srli a4, a1, 4
+; RV32I-NEXT: add a1, a1, a4
; RV32I-NEXT: and a1, a1, a7
-; RV32I-NEXT: slli t0, a1, 8
-; RV32I-NEXT: add a1, a1, t0
-; RV32I-NEXT: slli t0, a1, 16
-; RV32I-NEXT: add a1, a1, t0
+; RV32I-NEXT: slli a4, a1, 8
+; RV32I-NEXT: add a1, a1, a4
+; RV32I-NEXT: slli a4, a1, 16
+; RV32I-NEXT: add a1, a1, a4
; RV32I-NEXT: srli a1, a1, 24
-; RV32I-NEXT: add a1, a1, a3
-; RV32I-NEXT: srli a3, a4, 1
-; RV32I-NEXT: and a3, a3, a6
-; RV32I-NEXT: sub a4, a4, a3
-; RV32I-NEXT: and a3, a4, a5
-; RV32I-NEXT: srli a4, a4, 2
-; RV32I-NEXT: and a4, a4, a5
-; RV32I-NEXT: add a3, a3, a4
-; RV32I-NEXT: srli a4, a3, 4
-; RV32I-NEXT: add a3, a3, a4
-; RV32I-NEXT: and a3, a3, a7
-; RV32I-NEXT: slli a4, a3, 8
-; RV32I-NEXT: add a3, a3, a4
-; RV32I-NEXT: slli a4, a3, 16
-; RV32I-NEXT: add a3, a3, a4
-; RV32I-NEXT: srli a3, a3, 24
; RV32I-NEXT: srli a4, a2, 1
; RV32I-NEXT: and a4, a4, a6
; RV32I-NEXT: sub a2, a2, a4
@@ -753,11 +753,11 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind {
; RV32I-NEXT: slli a4, a2, 16
; RV32I-NEXT: add a2, a2, a4
; RV32I-NEXT: srli a2, a2, 24
-; RV32I-NEXT: add a2, a2, a3
+; RV32I-NEXT: add a1, a2, a1
; RV32I-NEXT: sw zero, 12(a0)
; RV32I-NEXT: sw zero, 4(a0)
-; RV32I-NEXT: sw a2, 8(a0)
-; RV32I-NEXT: sw a1, 0(a0)
+; RV32I-NEXT: sw a1, 8(a0)
+; RV32I-NEXT: sw a3, 0(a0)
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_v2i64:
@@ -785,21 +785,21 @@ define <2 x i1> @ctpop_v2i64_ult_two(<2 x i64> %a) nounwind {
; RV32I-LABEL: ctpop_v2i64_ult_two:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a1, 0(a0)
-; RV32I-NEXT: lw a2, 12(a0)
+; RV32I-NEXT: lw a2, 4(a0)
; RV32I-NEXT: lw a3, 8(a0)
-; RV32I-NEXT: lw a0, 4(a0)
-; RV32I-NEXT: addi a4, a1, -1
-; RV32I-NEXT: and a4, a1, a4
+; RV32I-NEXT: lw a4, 12(a0)
+; RV32I-NEXT: addi a0, a1, -1
+; RV32I-NEXT: and a0, a1, a0
; RV32I-NEXT: seqz a1, a1
-; RV32I-NEXT: sub a1, a0, a1
-; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: or a0, a4, a0
+; RV32I-NEXT: sub a1, a2, a1
+; RV32I-NEXT: and a1, a2, a1
+; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: addi a1, a3, -1
; RV32I-NEXT: and a1, a3, a1
-; RV32I-NEXT: seqz a3, a3
-; RV32I-NEXT: sub a3, a2, a3
-; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: seqz a2, a3
+; RV32I-NEXT: sub a2, a4, a2
+; RV32I-NEXT: and a2, a4, a2
; RV32I-NEXT: or a1, a1, a2
; RV32I-NEXT: seqz a1, a1
; RV32I-NEXT: ret
@@ -828,21 +828,21 @@ define <2 x i1> @ctpop_v2i64_ugt_one(<2 x i64> %a) nounwind {
; RV32I-LABEL: ctpop_v2i64_ugt_one:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a1, 0(a0)
-; RV32I-NEXT: lw a2, 12(a0)
+; RV32I-NEXT: lw a2, 4(a0)
; RV32I-NEXT: lw a3, 8(a0)
-; RV32I-NEXT: lw a0, 4(a0)
-; RV32I-NEXT: addi a4, a1, -1
-; RV32I-NEXT: and a4, a1, a4
+; RV32I-NEXT: lw a4, 12(a0)
+; RV32I-NEXT: addi a0, a1, -1
+; RV32I-NEXT: and a0, a1, a0
; RV32I-NEXT: seqz a1, a1
-; RV32I-NEXT: sub a1, a0, a1
-; RV32I-NEXT: and a0, a0, a1
-; RV32I-NEXT: or a0, a4, a0
+; RV32I-NEXT: sub a1, a2, a1
+; RV32I-NEXT: and a1, a2, a1
+; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: addi a1, a3, -1
; RV32I-NEXT: and a1, a3, a1
-; RV32I-NEXT: seqz a3, a3
-; RV32I-NEXT: sub a3, a2, a3
-; RV32I-NEXT: and a2, a2, a3
+; RV32I-NEXT: seqz a2, a3
+; RV32I-NEXT: sub a2, a4, a2
+; RV32I-NEXT: and a2, a4, a2
; RV32I-NEXT: or a1, a1, a2
; RV32I-NEXT: snez a1, a1
; RV32I-NEXT: ret
@@ -873,15 +873,15 @@ define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind {
; RV32I-LABEL: ctpop_v2i64_eq_one:
; RV32I: # %bb.0:
; RV32I-NEXT: mv a1, a0
-; RV32I-NEXT: lw a2, 12(a0)
-; RV32I-NEXT: lw a0, 4(a0)
-; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: beqz a0, .LBB22_3
+; RV32I-NEXT: lw a0, 0(a0)
+; RV32I-NEXT: lw a3, 4(a1)
+; RV32I-NEXT: lw a2, 12(a1)
+; RV32I-NEXT: beqz a3, .LBB22_3
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: seqz a3, a3
-; RV32I-NEXT: sub a3, a0, a3
-; RV32I-NEXT: xor a0, a0, a3
-; RV32I-NEXT: sltu a0, a3, a0
+; RV32I-NEXT: seqz a0, a0
+; RV32I-NEXT: sub a0, a3, a0
+; RV32I-NEXT: xor a3, a3, a0
+; RV32I-NEXT: sltu a0, a0, a3
; RV32I-NEXT: lw a1, 8(a1)
; RV32I-NEXT: bnez a2, .LBB22_4
; RV32I-NEXT: .LBB22_2:
@@ -890,9 +890,9 @@ define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind {
; RV32I-NEXT: sltu a1, a2, a1
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB22_3:
-; RV32I-NEXT: addi a0, a3, -1
-; RV32I-NEXT: xor a3, a3, a0
-; RV32I-NEXT: sltu a0, a0, a3
+; RV32I-NEXT: addi a3, a0, -1
+; RV32I-NEXT: xor a0, a0, a3
+; RV32I-NEXT: sltu a0, a3, a0
; RV32I-NEXT: lw a1, 8(a1)
; RV32I-NEXT: beqz a2, .LBB22_2
; RV32I-NEXT: .LBB22_4:
@@ -927,20 +927,20 @@ define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind {
define <2 x i1> @ctpop_v2i64_ne_one(<2 x i64> %a) nounwind {
; RV32I-LABEL: ctpop_v2i64_ne_one:
; RV32I: # %bb.0:
+; RV32I-NEXT: lw a2, 0(a0)
+; RV32I-NEXT: lw a3, 4(a0)
; RV32I-NEXT: lw a1, 12(a0)
-; RV32I-NEXT: lw a2, 4(a0)
-; RV32I-NEXT: lw a3, 0(a0)
-; RV32I-NEXT: beqz a2, .LBB23_2
+; RV32I-NEXT: beqz a3, .LBB23_2
; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: seqz a3, a3
-; RV32I-NEXT: sub a3, a2, a3
-; RV32I-NEXT: xor a2, a2, a3
-; RV32I-NEXT: sltu a2, a3, a2
-; RV32I-NEXT: j .LBB23_3
-; RV32I-NEXT: .LBB23_2:
-; RV32I-NEXT: addi a2, a3, -1
+; RV32I-NEXT: seqz a2, a2
+; RV32I-NEXT: sub a2, a3, a2
; RV32I-NEXT: xor a3, a3, a2
; RV32I-NEXT: sltu a2, a2, a3
+; RV32I-NEXT: j .LBB23_3
+; RV32I-NEXT: .LBB23_2:
+; RV32I-NEXT: addi a3, a2, -1
+; RV32I-NEXT: xor a2, a2, a3
+; RV32I-NEXT: sltu a2, a3, a2
; RV32I-NEXT: .LBB23_3:
; RV32I-NEXT: lw a3, 8(a0)
; RV32I-NEXT: xori a0, a2, 1
diff --git a/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll b/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll
index f38aa71fb158d0..6c4466796aeedd 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll
@@ -177,12 +177,12 @@ define i8 @test13(ptr %0, i64 %1) {
; RV64I-NEXT: li a2, 1
; RV64I-NEXT: subw a2, a2, a1
; RV64I-NEXT: add a2, a0, a2
-; RV64I-NEXT: lbu a2, 0(a2)
; RV64I-NEXT: li a3, 2
; RV64I-NEXT: subw a3, a3, a1
; RV64I-NEXT: add a0, a0, a3
+; RV64I-NEXT: lbu a1, 0(a2)
; RV64I-NEXT: lbu a0, 0(a0)
-; RV64I-NEXT: add a0, a2, a0
+; RV64I-NEXT: add a0, a1, a0
; RV64I-NEXT: ret
%3 = mul i64 %1, -4294967296
%4 = add i64 %3, 4294967296 ; 1 << 32
diff --git a/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll b/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll
index d34c10798f4821..92b88054a1d3bc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll
@@ -8,14 +8,14 @@ declare i32 @llvm.vp.reduce.add.v4i32(i32, <4 x i32>, <4 x i1>, i32)
define i32 @vpreduce_add_v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) {
; RV32-LABEL: vpreduce_add_v4i32:
; RV32: # %bb.0:
-; RV32-NEXT: lw a4, 4(a1)
-; RV32-NEXT: lw a5, 12(a1)
+; RV32-NEXT: lw a4, 0(a1)
+; RV32-NEXT: lw a5, 4(a1)
; RV32-NEXT: lw a6, 8(a1)
-; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: lw a1, 12(a1)
; RV32-NEXT: lw a7, 0(a2)
-; RV32-NEXT: lw t0, 8(a2)
-; RV32-NEXT: lw t1, 12(a2)
-; RV32-NEXT: lw a2, 4(a2)
+; RV32-NEXT: lw t0, 4(a2)
+; RV32-NEXT: lw t1, 8(a2)
+; RV32-NEXT: lw a2, 12(a2)
; RV32-NEXT: snez t2, a3
; RV32-NEXT: sltiu t3, a3, 3
; RV32-NEXT: xori t3, t3, 1
@@ -23,34 +23,34 @@ define i32 @vpreduce_add_v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) {
; RV32-NEXT: xori t4, t4, 1
; RV32-NEXT: sltiu a3, a3, 2
; RV32-NEXT: xori a3, a3, 1
-; RV32-NEXT: and a2, a3, a2
-; RV32-NEXT: and a3, t4, t1
-; RV32-NEXT: and t0, t3, t0
+; RV32-NEXT: and a3, a3, t0
+; RV32-NEXT: and a2, t4, a2
+; RV32-NEXT: and t0, t3, t1
; RV32-NEXT: and a7, t2, a7
; RV32-NEXT: neg a7, a7
-; RV32-NEXT: and a1, a7, a1
+; RV32-NEXT: and a4, a7, a4
; RV32-NEXT: neg a7, t0
; RV32-NEXT: and a6, a7, a6
-; RV32-NEXT: neg a3, a3
-; RV32-NEXT: and a3, a3, a5
; RV32-NEXT: neg a2, a2
-; RV32-NEXT: and a2, a2, a4
-; RV32-NEXT: add a2, a2, a3
-; RV32-NEXT: add a1, a1, a6
-; RV32-NEXT: add a1, a1, a2
+; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: neg a2, a3
+; RV32-NEXT: and a2, a2, a5
+; RV32-NEXT: add a1, a2, a1
+; RV32-NEXT: add a4, a4, a6
+; RV32-NEXT: add a1, a4, a1
; RV32-NEXT: add a0, a1, a0
; RV32-NEXT: ret
;
; RV64-LABEL: vpreduce_add_v4i32:
; RV64: # %bb.0:
-; RV64-NEXT: lw a4, 8(a1)
-; RV64-NEXT: lw a5, 24(a1)
+; RV64-NEXT: lw a4, 0(a1)
+; RV64-NEXT: lw a5, 8(a1)
; RV64-NEXT: lw a6, 16(a1)
-; RV64-NEXT: lw a1, 0(a1)
+; RV64-NEXT: lw a1, 24(a1)
; RV64-NEXT: ld a7, 0(a2)
-; RV64-NEXT: ld t0, 16(a2)
-; RV64-NEXT: ld t1, 24(a2)
-; RV64-NEXT: ld a2, 8(a2)
+; RV64-NEXT: ld t0, 8(a2)
+; RV64-NEXT: ld t1, 16(a2)
+; RV64-NEXT: ld a2, 24(a2)
; RV64-NEXT: sext.w a3, a3
; RV64-NEXT: snez t2, a3
; RV64-NEXT: sltiu t3, a3, 3
@@ -59,21 +59,21 @@ define i32 @vpreduce_add_v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) {
; RV64-NEXT: xori t4, t4, 1
; RV64-NEXT: sltiu a3, a3, 2
; RV64-NEXT: xori a3, a3, 1
-; RV64-NEXT: and a2, a3, a2
-; RV64-NEXT: and a3, t4, t1
-; RV64-NEXT: and t0, t3, t0
+; RV64-NEXT: and a3, a3, t0
+; RV64-NEXT: and a2, t4, a2
+; RV64-NEXT: and t0, t3, t1
; RV64-NEXT: and a7, t2, a7
; RV64-NEXT: negw a7, a7
-; RV64-NEXT: and a1, a7, a1
+; RV64-NEXT: and a4, a7, a4
; RV64-NEXT: negw a7, t0
; RV64-NEXT: and a6, a7, a6
-; RV64-NEXT: negw a3, a3
-; RV64-NEXT: and a3, a3, a5
; RV64-NEXT: negw a2, a2
-; RV64-NEXT: and a2, a2, a4
-; RV64-NEXT: add a2, a2, a3
-; RV64-NEXT: add a1, a1, a6
-; RV64-NEXT: add a1, a1, a2
+; RV64-NEXT: and a1, a2, a1
+; RV64-NEXT: negw a2, a3
+; RV64-NEXT: and a2, a2, a5
+; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: add a4, a4, a6
+; RV64-NEXT: add a1, a4, a1
; RV64-NEXT: addw a0, a1, a0
; RV64-NEXT: ret
%r = call i32 @llvm.vp.reduce.add.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll
index 309ca1f964287a..f1cfb6748fd619 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll
@@ -121,13 +121,13 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) {
; CHECK-NEXT: lbu a3, 985(sp)
; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma
; CHECK-NEXT: vslideup.vx v8, v24, a2
+; CHECK-NEXT: lbu a1, 1012(sp)
; CHECK-NEXT: vmv.s.x v24, a3
-; CHECK-NEXT: li a1, 478
-; CHECK-NEXT: li a2, 477
-; CHECK-NEXT: lbu a3, 1012(sp)
-; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma
-; CHECK-NEXT: vslideup.vx v8, v24, a2
-; CHECK-NEXT: vmv.s.x v24, a3
+; CHECK-NEXT: li a2, 478
+; CHECK-NEXT: li a3, 477
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v24, a3
+; CHECK-NEXT: vmv.s.x v24, a1
; CHECK-NEXT: li a1, 501
; CHECK-NEXT: li a2, 500
; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
index 8ed19ddb1af5cf..81e20a29881630 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
@@ -26,38 +26,38 @@ define void @add_v4i32(ptr %x, ptr %y) {
define void @add_v2i64(ptr %x, ptr %y) {
; RV32-LABEL: add_v2i64:
; RV32: # %bb.0:
-; RV32-NEXT: lw a2, 8(a0)
-; RV32-NEXT: lw a3, 12(a0)
+; RV32-NEXT: lw a2, 0(a1)
+; RV32-NEXT: lw a3, 4(a1)
; RV32-NEXT: lw a4, 0(a0)
; RV32-NEXT: lw a5, 4(a0)
-; RV32-NEXT: lw a6, 4(a1)
-; RV32-NEXT: lw a7, 0(a1)
+; RV32-NEXT: lw a6, 8(a0)
+; RV32-NEXT: lw a7, 12(a0)
; RV32-NEXT: lw t0, 8(a1)
; RV32-NEXT: lw a1, 12(a1)
-; RV32-NEXT: add a5, a5, a6
-; RV32-NEXT: add a7, a4, a7
-; RV32-NEXT: sltu a4, a7, a4
-; RV32-NEXT: add a4, a5, a4
-; RV32-NEXT: add a1, a3, a1
-; RV32-NEXT: add t0, a2, t0
-; RV32-NEXT: sltu a2, t0, a2
-; RV32-NEXT: add a1, a1, a2
+; RV32-NEXT: add a3, a5, a3
+; RV32-NEXT: add a2, a4, a2
+; RV32-NEXT: sltu a4, a2, a4
+; RV32-NEXT: add a3, a3, a4
+; RV32-NEXT: add a1, a7, a1
+; RV32-NEXT: add t0, a6, t0
+; RV32-NEXT: sltu a4, t0, a6
+; RV32-NEXT: add a1, a1, a4
; RV32-NEXT: sw t0, 8(a0)
-; RV32-NEXT: sw a7, 0(a0)
+; RV32-NEXT: sw a2, 0(a0)
; RV32-NEXT: sw a1, 12(a0)
-; RV32-NEXT: sw a4, 4(a0)
+; RV32-NEXT: sw a3, 4(a0)
; RV32-NEXT: ret
;
; RV64-LABEL: add_v2i64:
; RV64: # %bb.0:
-; RV64-NEXT: ld a2, 8(a0)
-; RV64-NEXT: ld a3, 0(a0)
+; RV64-NEXT: ld a2, 0(a0)
+; RV64-NEXT: ld a3, 8(a0)
; RV64-NEXT: ld a4, 0(a1)
; RV64-NEXT: ld a1, 8(a1)
-; RV64-NEXT: add a3, a3, a4
-; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: add a2, a2, a4
+; RV64-NEXT: add a1, a3, a1
; RV64-NEXT: sd a1, 8(a0)
-; RV64-NEXT: sd a3, 0(a0)
+; RV64-NEXT: sd a2, 0(a0)
; RV64-NEXT: ret
%a = load <2 x i64>, ptr %x
%b = load <2 x i64>, ptr %y
@@ -134,14 +134,14 @@ define void @fadd_v4f32(ptr %x, ptr %y) {
define void @fadd_v2f64(ptr %x, ptr %y) {
; CHECK-LABEL: fadd_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: fld fa5, 8(a0)
-; CHECK-NEXT: fld fa4, 0(a0)
+; CHECK-NEXT: fld fa5, 0(a0)
+; CHECK-NEXT: fld fa4, 8(a0)
; CHECK-NEXT: fld fa3, 0(a1)
; CHECK-NEXT: fld fa2, 8(a1)
-; CHECK-NEXT: fadd.d fa4, fa4, fa3
-; CHECK-NEXT: fadd.d fa5, fa5, fa2
-; CHECK-NEXT: fsd fa5, 8(a0)
-; CHECK-NEXT: fsd fa4, 0(a0)
+; CHECK-NEXT: fadd.d fa5, fa5, fa3
+; CHECK-NEXT: fadd.d fa4, fa4, fa2
+; CHECK-NEXT: fsd fa4, 8(a0)
+; CHECK-NEXT: fsd fa5, 0(a0)
; CHECK-NEXT: ret
%a = load <2 x double>, ptr %x
%b = load <2 x double>, ptr %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index cbea842e28f0f2..43cee6610e7872 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -1398,37 +1398,37 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
; RV32VB-NEXT: slli a3, a3, 16
; RV32VB-NEXT: slli a4, a4, 24
; RV32VB-NEXT: or a3, a4, a3
+; RV32VB-NEXT: lbu a2, 4(a0)
+; RV32VB-NEXT: lbu a4, 5(a0)
; RV32VB-NEXT: or a1, a1, a3
-; RV32VB-NEXT: lbu a2, 5(a0)
-; RV32VB-NEXT: lbu a3, 4(a0)
-; RV32VB-NEXT: lbu a4, 6(a0)
+; RV32VB-NEXT: lbu a3, 6(a0)
; RV32VB-NEXT: lbu a5, 7(a0)
-; RV32VB-NEXT: slli a2, a2, 8
-; RV32VB-NEXT: or a2, a3, a2
-; RV32VB-NEXT: slli a4, a4, 16
-; RV32VB-NEXT: slli a5, a5, 24
-; RV32VB-NEXT: or a4, a5, a4
+; RV32VB-NEXT: slli a4, a4, 8
; RV32VB-NEXT: or a2, a2, a4
-; RV32VB-NEXT: lbu a3, 9(a0)
+; RV32VB-NEXT: slli a3, a3, 16
+; RV32VB-NEXT: slli a5, a5, 24
+; RV32VB-NEXT: or a3, a5, a3
; RV32VB-NEXT: lbu a4, 8(a0)
-; RV32VB-NEXT: lbu a5, 10(a0)
+; RV32VB-NEXT: lbu a5, 9(a0)
+; RV32VB-NEXT: or a2, a2, a3
+; RV32VB-NEXT: lbu a3, 10(a0)
; RV32VB-NEXT: lbu a6, 11(a0)
-; RV32VB-NEXT: slli a3, a3, 8
-; RV32VB-NEXT: or a3, a4, a3
-; RV32VB-NEXT: slli a5, a5, 16
+; RV32VB-NEXT: slli a5, a5, 8
+; RV32VB-NEXT: or a4, a4, a5
+; RV32VB-NEXT: slli a3, a3, 16
; RV32VB-NEXT: slli a6, a6, 24
-; RV32VB-NEXT: or a4, a6, a5
-; RV32VB-NEXT: or a3, a3, a4
-; RV32VB-NEXT: lbu a4, 13(a0)
+; RV32VB-NEXT: or a3, a6, a3
; RV32VB-NEXT: lbu a5, 12(a0)
-; RV32VB-NEXT: lbu a6, 14(a0)
+; RV32VB-NEXT: lbu a6, 13(a0)
+; RV32VB-NEXT: or a3, a4, a3
+; RV32VB-NEXT: lbu a4, 14(a0)
; RV32VB-NEXT: lbu a0, 15(a0)
-; RV32VB-NEXT: slli a4, a4, 8
-; RV32VB-NEXT: or a4, a5, a4
-; RV32VB-NEXT: slli a6, a6, 16
+; RV32VB-NEXT: slli a6, a6, 8
+; RV32VB-NEXT: or a5, a5, a6
+; RV32VB-NEXT: slli a4, a4, 16
; RV32VB-NEXT: slli a0, a0, 24
-; RV32VB-NEXT: or a0, a0, a6
-; RV32VB-NEXT: or a0, a4, a0
+; RV32VB-NEXT: or a0, a0, a4
+; RV32VB-NEXT: or a0, a5, a0
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a1
; RV32VB-NEXT: vslide1down.vx v8, v8, a2
@@ -1443,27 +1443,27 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
; RV32VB-PACK-NEXT: lbu a3, 2(a0)
; RV32VB-PACK-NEXT: lbu a4, 3(a0)
; RV32VB-PACK-NEXT: packh a1, a1, a2
-; RV32VB-PACK-NEXT: packh a2, a3, a4
-; RV32VB-PACK-NEXT: pack a1, a1, a2
; RV32VB-PACK-NEXT: lbu a2, 4(a0)
-; RV32VB-PACK-NEXT: lbu a3, 5(a0)
-; RV32VB-PACK-NEXT: lbu a4, 6(a0)
-; RV32VB-PACK-NEXT: lbu a5, 7(a0)
-; RV32VB-PACK-NEXT: lbu a6, 8(a0)
-; RV32VB-PACK-NEXT: lbu a7, 9(a0)
-; RV32VB-PACK-NEXT: packh a2, a2, a3
-; RV32VB-PACK-NEXT: packh a3, a4, a5
-; RV32VB-PACK-NEXT: pack a2, a2, a3
+; RV32VB-PACK-NEXT: lbu a5, 5(a0)
+; RV32VB-PACK-NEXT: lbu a6, 6(a0)
+; RV32VB-PACK-NEXT: lbu a7, 7(a0)
+; RV32VB-PACK-NEXT: packh a3, a3, a4
+; RV32VB-PACK-NEXT: pack a1, a1, a3
+; RV32VB-PACK-NEXT: packh a2, a2, a5
; RV32VB-PACK-NEXT: packh a3, a6, a7
-; RV32VB-PACK-NEXT: lbu a4, 10(a0)
-; RV32VB-PACK-NEXT: lbu a5, 11(a0)
-; RV32VB-PACK-NEXT: lbu a6, 12(a0)
+; RV32VB-PACK-NEXT: lbu a4, 8(a0)
+; RV32VB-PACK-NEXT: lbu a5, 9(a0)
+; RV32VB-PACK-NEXT: pack a2, a2, a3
+; RV32VB-PACK-NEXT: lbu a3, 10(a0)
+; RV32VB-PACK-NEXT: lbu a6, 11(a0)
+; RV32VB-PACK-NEXT: packh a4, a4, a5
+; RV32VB-PACK-NEXT: lbu a5, 12(a0)
; RV32VB-PACK-NEXT: lbu a7, 13(a0)
; RV32VB-PACK-NEXT: lbu t0, 14(a0)
; RV32VB-PACK-NEXT: lbu a0, 15(a0)
-; RV32VB-PACK-NEXT: packh a4, a4, a5
-; RV32VB-PACK-NEXT: pack a3, a3, a4
-; RV32VB-PACK-NEXT: packh a4, a6, a7
+; RV32VB-PACK-NEXT: packh a3, a3, a6
+; RV32VB-PACK-NEXT: pack a3, a4, a3
+; RV32VB-PACK-NEXT: packh a4, a5, a7
; RV32VB-PACK-NEXT: packh a0, t0, a0
; RV32VB-PACK-NEXT: pack a0, a4, a0
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
@@ -1532,34 +1532,34 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
; RVA22U64-NEXT: slli a3, a3, 16
; RVA22U64-NEXT: slli a4, a4, 24
; RVA22U64-NEXT: or a3, a3, a4
-; RVA22U64-NEXT: lbu a2, 4(a0)
; RVA22U64-NEXT: or a1, a1, a3
+; RVA22U64-NEXT: lbu a2, 4(a0)
; RVA22U64-NEXT: lbu a3, 5(a0)
; RVA22U64-NEXT: lbu a4, 6(a0)
-; RVA22U64-NEXT: slli a2, a2, 32
; RVA22U64-NEXT: lbu a5, 7(a0)
+; RVA22U64-NEXT: slli a2, a2, 32
; RVA22U64-NEXT: slli a3, a3, 40
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: slli a4, a4, 48
; RVA22U64-NEXT: slli a5, a5, 56
; RVA22U64-NEXT: or a4, a4, a5
; RVA22U64-NEXT: or a2, a2, a4
-; RVA22U64-NEXT: or a1, a1, a2
-; RVA22U64-NEXT: lbu a2, 9(a0)
; RVA22U64-NEXT: lbu a3, 8(a0)
-; RVA22U64-NEXT: lbu a4, 10(a0)
+; RVA22U64-NEXT: lbu a4, 9(a0)
+; RVA22U64-NEXT: or a1, a1, a2
+; RVA22U64-NEXT: lbu a2, 10(a0)
; RVA22U64-NEXT: lbu a5, 11(a0)
-; RVA22U64-NEXT: slli a2, a2, 8
-; RVA22U64-NEXT: or a2, a2, a3
-; RVA22U64-NEXT: slli a4, a4, 16
+; RVA22U64-NEXT: slli a4, a4, 8
+; RVA22U64-NEXT: or a3, a3, a4
+; RVA22U64-NEXT: slli a2, a2, 16
; RVA22U64-NEXT: slli a5, a5, 24
-; RVA22U64-NEXT: or a4, a4, a5
+; RVA22U64-NEXT: or a2, a2, a5
+; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: lbu a3, 12(a0)
-; RVA22U64-NEXT: or a2, a2, a4
; RVA22U64-NEXT: lbu a4, 13(a0)
; RVA22U64-NEXT: lbu a5, 14(a0)
-; RVA22U64-NEXT: slli a3, a3, 32
; RVA22U64-NEXT: lbu a0, 15(a0)
+; RVA22U64-NEXT: slli a3, a3, 32
; RVA22U64-NEXT: slli a4, a4, 40
; RVA22U64-NEXT: or a3, a3, a4
; RVA22U64-NEXT: slli a5, a5, 48
@@ -1576,34 +1576,34 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: lbu a1, 0(a0)
; RVA22U64-PACK-NEXT: lbu a2, 1(a0)
-; RVA22U64-PACK-NEXT: lbu a3, 2(a0)
+; RVA22U64-PACK-NEXT: lbu a6, 2(a0)
; RVA22U64-PACK-NEXT: lbu a4, 3(a0)
-; RVA22U64-PACK-NEXT: packh a1, a1, a2
-; RVA22U64-PACK-NEXT: packh a2, a3, a4
-; RVA22U64-PACK-NEXT: lbu a3, 4(a0)
-; RVA22U64-PACK-NEXT: lbu a4, 5(a0)
-; RVA22U64-PACK-NEXT: packw a6, a1, a2
-; RVA22U64-PACK-NEXT: lbu a2, 6(a0)
-; RVA22U64-PACK-NEXT: lbu a5, 7(a0)
-; RVA22U64-PACK-NEXT: packh a3, a3, a4
-; RVA22U64-PACK-NEXT: lbu a4, 8(a0)
-; RVA22U64-PACK-NEXT: lbu a1, 9(a0)
+; RVA22U64-PACK-NEXT: packh a7, a1, a2
+; RVA22U64-PACK-NEXT: lbu a2, 4(a0)
+; RVA22U64-PACK-NEXT: lbu a5, 5(a0)
+; RVA22U64-PACK-NEXT: lbu a3, 6(a0)
+; RVA22U64-PACK-NEXT: lbu a1, 7(a0)
+; RVA22U64-PACK-NEXT: packh a4, a6, a4
+; RVA22U64-PACK-NEXT: packw a4, a7, a4
; RVA22U64-PACK-NEXT: packh a2, a2, a5
-; RVA22U64-PACK-NEXT: packw a2, a3, a2
-; RVA22U64-PACK-NEXT: pack a6, a6, a2
-; RVA22U64-PACK-NEXT: packh a7, a4, a1
-; RVA22U64-PACK-NEXT: lbu a3, 10(a0)
-; RVA22U64-PACK-NEXT: lbu a4, 11(a0)
-; RVA22U64-PACK-NEXT: lbu a5, 12(a0)
-; RVA22U64-PACK-NEXT: lbu a2, 13(a0)
-; RVA22U64-PACK-NEXT: lbu a1, 14(a0)
+; RVA22U64-PACK-NEXT: packh a1, a3, a1
+; RVA22U64-PACK-NEXT: packw a1, a2, a1
+; RVA22U64-PACK-NEXT: lbu a2, 8(a0)
+; RVA22U64-PACK-NEXT: lbu a3, 9(a0)
+; RVA22U64-PACK-NEXT: pack a6, a4, a1
+; RVA22U64-PACK-NEXT: lbu a7, 10(a0)
+; RVA22U64-PACK-NEXT: lbu a5, 11(a0)
+; RVA22U64-PACK-NEXT: packh a2, a2, a3
+; RVA22U64-PACK-NEXT: lbu a3, 12(a0)
+; RVA22U64-PACK-NEXT: lbu a1, 13(a0)
+; RVA22U64-PACK-NEXT: lbu a4, 14(a0)
; RVA22U64-PACK-NEXT: lbu a0, 15(a0)
-; RVA22U64-PACK-NEXT: packh a3, a3, a4
-; RVA22U64-PACK-NEXT: packw a3, a7, a3
-; RVA22U64-PACK-NEXT: packh a2, a5, a2
-; RVA22U64-PACK-NEXT: packh a0, a1, a0
-; RVA22U64-PACK-NEXT: packw a0, a2, a0
-; RVA22U64-PACK-NEXT: pack a0, a3, a0
+; RVA22U64-PACK-NEXT: packh a5, a7, a5
+; RVA22U64-PACK-NEXT: packw a2, a2, a5
+; RVA22U64-PACK-NEXT: packh a1, a3, a1
+; RVA22U64-PACK-NEXT: packh a0, a4, a0
+; RVA22U64-PACK-NEXT: packw a0, a1, a0
+; RVA22U64-PACK-NEXT: pack a0, a2, a0
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a6
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
@@ -1720,39 +1720,39 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
; RV32-ONLY-NEXT: lbu a2, 1(a0)
; RV32-ONLY-NEXT: lbu a3, 22(a0)
; RV32-ONLY-NEXT: lbu a4, 31(a0)
-; RV32-ONLY-NEXT: lbu a5, 44(a0)
-; RV32-ONLY-NEXT: lbu a6, 55(a0)
-; RV32-ONLY-NEXT: lbu a7, 623(a0)
+; RV32-ONLY-NEXT: lbu a5, 623(a0)
+; RV32-ONLY-NEXT: lbu a6, 44(a0)
+; RV32-ONLY-NEXT: lbu a7, 55(a0)
; RV32-ONLY-NEXT: lbu t0, 75(a0)
; RV32-ONLY-NEXT: lbu t1, 82(a0)
-; RV32-ONLY-NEXT: lbu t2, 93(a0)
-; RV32-ONLY-NEXT: lbu t3, 105(a0)
-; RV32-ONLY-NEXT: lbu t4, 161(a0)
-; RV32-ONLY-NEXT: lbu t5, 124(a0)
-; RV32-ONLY-NEXT: lbu t6, 163(a0)
-; RV32-ONLY-NEXT: lbu s0, 144(a0)
-; RV32-ONLY-NEXT: lbu a0, 154(a0)
+; RV32-ONLY-NEXT: lbu t2, 154(a0)
+; RV32-ONLY-NEXT: lbu t3, 161(a0)
+; RV32-ONLY-NEXT: lbu t4, 163(a0)
+; RV32-ONLY-NEXT: lbu t5, 93(a0)
+; RV32-ONLY-NEXT: lbu t6, 105(a0)
+; RV32-ONLY-NEXT: lbu s0, 124(a0)
+; RV32-ONLY-NEXT: lbu a0, 144(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-ONLY-NEXT: vmv.v.x v8, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV32-ONLY-NEXT: vslide1down.vx v9, v8, t0
; RV32-ONLY-NEXT: vmv.v.x v8, t1
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t2
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t3
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t4
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t5
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t6
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, s0
-; RV32-ONLY-NEXT: li a1, 255
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t4
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV32-ONLY-NEXT: li a0, 255
; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32-ONLY-NEXT: vmv.s.x v0, a1
+; RV32-ONLY-NEXT: vmv.s.x v0, a0
; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t2
; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV32-ONLY-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32-ONLY-NEXT: addi sp, sp, 16
@@ -1770,36 +1770,36 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
; RV32VB-NEXT: slli a4, a4, 24
; RV32VB-NEXT: or a3, a4, a3
; RV32VB-NEXT: or a1, a1, a3
-; RV32VB-NEXT: lbu a2, 55(a0)
-; RV32VB-NEXT: lbu a3, 44(a0)
+; RV32VB-NEXT: lbu a2, 44(a0)
+; RV32VB-NEXT: lbu a3, 55(a0)
; RV32VB-NEXT: lbu a4, 623(a0)
; RV32VB-NEXT: lbu a5, 75(a0)
-; RV32VB-NEXT: slli a2, a2, 8
-; RV32VB-NEXT: or a2, a3, a2
+; RV32VB-NEXT: lbu a6, 82(a0)
+; RV32VB-NEXT: slli a3, a3, 8
+; RV32VB-NEXT: or a2, a2, a3
; RV32VB-NEXT: slli a4, a4, 16
; RV32VB-NEXT: slli a5, a5, 24
; RV32VB-NEXT: or a4, a5, a4
; RV32VB-NEXT: or a2, a2, a4
; RV32VB-NEXT: lbu a3, 93(a0)
-; RV32VB-NEXT: lbu a4, 82(a0)
-; RV32VB-NEXT: lbu a5, 105(a0)
-; RV32VB-NEXT: lbu a6, 161(a0)
+; RV32VB-NEXT: lbu a4, 105(a0)
+; RV32VB-NEXT: lbu a5, 124(a0)
+; RV32VB-NEXT: lbu a7, 144(a0)
; RV32VB-NEXT: slli a3, a3, 8
-; RV32VB-NEXT: or a3, a4, a3
-; RV32VB-NEXT: slli a5, a5, 16
-; RV32VB-NEXT: slli a6, a6, 24
-; RV32VB-NEXT: or a4, a6, a5
+; RV32VB-NEXT: lbu t0, 154(a0)
+; RV32VB-NEXT: lbu t1, 161(a0)
+; RV32VB-NEXT: or a3, a6, a3
+; RV32VB-NEXT: slli a4, a4, 16
+; RV32VB-NEXT: lbu a0, 163(a0)
+; RV32VB-NEXT: slli t1, t1, 24
+; RV32VB-NEXT: or a4, t1, a4
; RV32VB-NEXT: or a3, a3, a4
-; RV32VB-NEXT: lbu a4, 163(a0)
-; RV32VB-NEXT: lbu a5, 124(a0)
-; RV32VB-NEXT: lbu a6, 144(a0)
-; RV32VB-NEXT: lbu a0, 154(a0)
-; RV32VB-NEXT: slli a4, a4, 8
-; RV32VB-NEXT: or a4, a5, a4
-; RV32VB-NEXT: slli a6, a6, 16
-; RV32VB-NEXT: slli a0, a0, 24
-; RV32VB-NEXT: or a0, a0, a6
-; RV32VB-NEXT: or a0, a4, a0
+; RV32VB-NEXT: slli a0, a0, 8
+; RV32VB-NEXT: or a0, a5, a0
+; RV32VB-NEXT: slli a7, a7, 16
+; RV32VB-NEXT: slli t0, t0, 24
+; RV32VB-NEXT: or a4, t0, a7
+; RV32VB-NEXT: or a0, a0, a4
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a1
; RV32VB-NEXT: vslide1down.vx v8, v8, a2
@@ -1815,32 +1815,32 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
; RV32VB-PACK-NEXT: lbu a4, 31(a0)
; RV32VB-PACK-NEXT: packh a1, a1, a2
; RV32VB-PACK-NEXT: packh a2, a3, a4
+; RV32VB-PACK-NEXT: lbu a3, 623(a0)
+; RV32VB-PACK-NEXT: lbu a4, 44(a0)
+; RV32VB-PACK-NEXT: lbu a5, 55(a0)
+; RV32VB-PACK-NEXT: lbu a6, 75(a0)
; RV32VB-PACK-NEXT: pack a1, a1, a2
-; RV32VB-PACK-NEXT: lbu a2, 44(a0)
-; RV32VB-PACK-NEXT: lbu a3, 55(a0)
-; RV32VB-PACK-NEXT: lbu a4, 623(a0)
-; RV32VB-PACK-NEXT: lbu a5, 75(a0)
-; RV32VB-PACK-NEXT: lbu a6, 82(a0)
-; RV32VB-PACK-NEXT: lbu a7, 93(a0)
-; RV32VB-PACK-NEXT: packh a2, a2, a3
-; RV32VB-PACK-NEXT: packh a3, a4, a5
-; RV32VB-PACK-NEXT: pack a2, a2, a3
-; RV32VB-PACK-NEXT: packh a3, a6, a7
-; RV32VB-PACK-NEXT: lbu a4, 105(a0)
-; RV32VB-PACK-NEXT: lbu a5, 161(a0)
-; RV32VB-PACK-NEXT: lbu a6, 124(a0)
-; RV32VB-PACK-NEXT: lbu a7, 163(a0)
-; RV32VB-PACK-NEXT: lbu t0, 144(a0)
-; RV32VB-PACK-NEXT: lbu a0, 154(a0)
+; RV32VB-PACK-NEXT: lbu a2, 82(a0)
; RV32VB-PACK-NEXT: packh a4, a4, a5
-; RV32VB-PACK-NEXT: pack a3, a3, a4
-; RV32VB-PACK-NEXT: packh a4, a6, a7
-; RV32VB-PACK-NEXT: packh a0, t0, a0
-; RV32VB-PACK-NEXT: pack a0, a4, a0
+; RV32VB-PACK-NEXT: packh a3, a3, a6
+; RV32VB-PACK-NEXT: pack a3, a4, a3
+; RV32VB-PACK-NEXT: lbu a4, 154(a0)
+; RV32VB-PACK-NEXT: lbu a5, 161(a0)
+; RV32VB-PACK-NEXT: lbu a6, 163(a0)
+; RV32VB-PACK-NEXT: lbu a7, 93(a0)
+; RV32VB-PACK-NEXT: lbu t0, 105(a0)
+; RV32VB-PACK-NEXT: lbu t1, 124(a0)
+; RV32VB-PACK-NEXT: lbu a0, 144(a0)
+; RV32VB-PACK-NEXT: packh a2, a2, a7
+; RV32VB-PACK-NEXT: packh a5, t0, a5
+; RV32VB-PACK-NEXT: pack a2, a2, a5
+; RV32VB-PACK-NEXT: packh a5, t1, a6
+; RV32VB-PACK-NEXT: packh a0, a0, a4
+; RV32VB-PACK-NEXT: pack a0, a5, a0
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a1
-; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3
+; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: ret
;
@@ -1854,39 +1854,39 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
; RV64V-ONLY-NEXT: lbu a2, 1(a0)
; RV64V-ONLY-NEXT: lbu a3, 22(a0)
; RV64V-ONLY-NEXT: lbu a4, 31(a0)
-; RV64V-ONLY-NEXT: lbu a5, 44(a0)
-; RV64V-ONLY-NEXT: lbu a6, 55(a0)
-; RV64V-ONLY-NEXT: lbu a7, 623(a0)
+; RV64V-ONLY-NEXT: lbu a5, 623(a0)
+; RV64V-ONLY-NEXT: lbu a6, 44(a0)
+; RV64V-ONLY-NEXT: lbu a7, 55(a0)
; RV64V-ONLY-NEXT: lbu t0, 75(a0)
; RV64V-ONLY-NEXT: lbu t1, 82(a0)
-; RV64V-ONLY-NEXT: lbu t2, 93(a0)
-; RV64V-ONLY-NEXT: lbu t3, 105(a0)
-; RV64V-ONLY-NEXT: lbu t4, 161(a0)
-; RV64V-ONLY-NEXT: lbu t5, 124(a0)
-; RV64V-ONLY-NEXT: lbu t6, 163(a0)
-; RV64V-ONLY-NEXT: lbu s0, 144(a0)
-; RV64V-ONLY-NEXT: lbu a0, 154(a0)
+; RV64V-ONLY-NEXT: lbu t2, 154(a0)
+; RV64V-ONLY-NEXT: lbu t3, 161(a0)
+; RV64V-ONLY-NEXT: lbu t4, 163(a0)
+; RV64V-ONLY-NEXT: lbu t5, 93(a0)
+; RV64V-ONLY-NEXT: lbu t6, 105(a0)
+; RV64V-ONLY-NEXT: lbu s0, 124(a0)
+; RV64V-ONLY-NEXT: lbu a0, 144(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV64V-ONLY-NEXT: vslide1down.vx v9, v8, t0
; RV64V-ONLY-NEXT: vmv.v.x v8, t1
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t2
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t3
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t4
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t5
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t6
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, s0
-; RV64V-ONLY-NEXT: li a1, 255
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t4
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV64V-ONLY-NEXT: li a0, 255
; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64V-ONLY-NEXT: vmv.s.x v0, a1
+; RV64V-ONLY-NEXT: vmv.s.x v0, a0
; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t2
; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64V-ONLY-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; RV64V-ONLY-NEXT: addi sp, sp, 16
@@ -1903,43 +1903,43 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
; RVA22U64-NEXT: slli a3, a3, 16
; RVA22U64-NEXT: slli a4, a4, 24
; RVA22U64-NEXT: or a3, a3, a4
-; RVA22U64-NEXT: lbu a2, 44(a0)
; RVA22U64-NEXT: or a1, a1, a3
-; RVA22U64-NEXT: lbu a3, 55(a0)
-; RVA22U64-NEXT: lbu a4, 623(a0)
-; RVA22U64-NEXT: slli a2, a2, 32
+; RVA22U64-NEXT: lbu a2, 623(a0)
+; RVA22U64-NEXT: lbu a3, 44(a0)
+; RVA22U64-NEXT: lbu a4, 55(a0)
; RVA22U64-NEXT: lbu a5, 75(a0)
-; RVA22U64-NEXT: slli a3, a3, 40
-; RVA22U64-NEXT: or a2, a2, a3
-; RVA22U64-NEXT: slli a4, a4, 48
+; RVA22U64-NEXT: lbu a6, 82(a0)
+; RVA22U64-NEXT: slli a3, a3, 32
+; RVA22U64-NEXT: slli a4, a4, 40
+; RVA22U64-NEXT: or a3, a3, a4
+; RVA22U64-NEXT: slli a2, a2, 48
; RVA22U64-NEXT: slli a5, a5, 56
-; RVA22U64-NEXT: or a4, a4, a5
-; RVA22U64-NEXT: or a2, a2, a4
-; RVA22U64-NEXT: or a1, a1, a2
+; RVA22U64-NEXT: or a2, a2, a5
+; RVA22U64-NEXT: or a2, a2, a3
+; RVA22U64-NEXT: or a7, a1, a2
; RVA22U64-NEXT: lbu a2, 93(a0)
-; RVA22U64-NEXT: lbu a3, 82(a0)
-; RVA22U64-NEXT: lbu a4, 105(a0)
-; RVA22U64-NEXT: lbu a5, 161(a0)
+; RVA22U64-NEXT: lbu t0, 105(a0)
+; RVA22U64-NEXT: lbu a4, 124(a0)
+; RVA22U64-NEXT: lbu a5, 144(a0)
; RVA22U64-NEXT: slli a2, a2, 8
+; RVA22U64-NEXT: lbu a1, 154(a0)
+; RVA22U64-NEXT: lbu a3, 161(a0)
+; RVA22U64-NEXT: or a2, a6, a2
+; RVA22U64-NEXT: slli t0, t0, 16
+; RVA22U64-NEXT: lbu a0, 163(a0)
+; RVA22U64-NEXT: slli a3, a3, 24
+; RVA22U64-NEXT: or a3, a3, t0
; RVA22U64-NEXT: or a2, a2, a3
-; RVA22U64-NEXT: slli a4, a4, 16
-; RVA22U64-NEXT: slli a5, a5, 24
-; RVA22U64-NEXT: or a4, a4, a5
-; RVA22U64-NEXT: lbu a3, 124(a0)
-; RVA22U64-NEXT: or a2, a2, a4
-; RVA22U64-NEXT: lbu a4, 163(a0)
-; RVA22U64-NEXT: lbu a5, 144(a0)
-; RVA22U64-NEXT: slli a3, a3, 32
-; RVA22U64-NEXT: lbu a0, 154(a0)
-; RVA22U64-NEXT: slli a4, a4, 40
-; RVA22U64-NEXT: or a3, a3, a4
+; RVA22U64-NEXT: slli a4, a4, 32
+; RVA22U64-NEXT: slli a0, a0, 40
+; RVA22U64-NEXT: or a0, a0, a4
; RVA22U64-NEXT: slli a5, a5, 48
-; RVA22U64-NEXT: slli a0, a0, 56
-; RVA22U64-NEXT: or a0, a0, a5
-; RVA22U64-NEXT: or a0, a0, a3
+; RVA22U64-NEXT: slli a1, a1, 56
+; RVA22U64-NEXT: or a1, a1, a5
+; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RVA22U64-NEXT: vmv.v.x v8, a1
+; RVA22U64-NEXT: vmv.v.x v8, a7
; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-NEXT: ret
;
@@ -1949,34 +1949,34 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
; RVA22U64-PACK-NEXT: lbu a2, 1(a0)
; RVA22U64-PACK-NEXT: lbu a3, 22(a0)
; RVA22U64-PACK-NEXT: lbu a4, 31(a0)
-; RVA22U64-PACK-NEXT: packh a1, a1, a2
+; RVA22U64-PACK-NEXT: packh a6, a1, a2
; RVA22U64-PACK-NEXT: packh a2, a3, a4
-; RVA22U64-PACK-NEXT: lbu a3, 44(a0)
-; RVA22U64-PACK-NEXT: lbu a4, 55(a0)
-; RVA22U64-PACK-NEXT: packw a6, a1, a2
-; RVA22U64-PACK-NEXT: lbu a2, 623(a0)
-; RVA22U64-PACK-NEXT: lbu a5, 75(a0)
-; RVA22U64-PACK-NEXT: packh a3, a3, a4
-; RVA22U64-PACK-NEXT: lbu a4, 82(a0)
-; RVA22U64-PACK-NEXT: lbu a1, 93(a0)
-; RVA22U64-PACK-NEXT: packh a2, a2, a5
-; RVA22U64-PACK-NEXT: packw a2, a3, a2
-; RVA22U64-PACK-NEXT: pack a6, a6, a2
-; RVA22U64-PACK-NEXT: packh a7, a4, a1
-; RVA22U64-PACK-NEXT: lbu a3, 105(a0)
-; RVA22U64-PACK-NEXT: lbu a4, 161(a0)
-; RVA22U64-PACK-NEXT: lbu a5, 124(a0)
-; RVA22U64-PACK-NEXT: lbu a2, 163(a0)
-; RVA22U64-PACK-NEXT: lbu a1, 144(a0)
-; RVA22U64-PACK-NEXT: lbu a0, 154(a0)
-; RVA22U64-PACK-NEXT: packh a3, a3, a4
-; RVA22U64-PACK-NEXT: packw a3, a7, a3
-; RVA22U64-PACK-NEXT: packh a2, a5, a2
-; RVA22U64-PACK-NEXT: packh a0, a1, a0
+; RVA22U64-PACK-NEXT: lbu a3, 623(a0)
+; RVA22U64-PACK-NEXT: lbu a4, 44(a0)
+; RVA22U64-PACK-NEXT: lbu a5, 55(a0)
+; RVA22U64-PACK-NEXT: lbu a1, 75(a0)
+; RVA22U64-PACK-NEXT: packw a2, a6, a2
+; RVA22U64-PACK-NEXT: lbu a6, 82(a0)
+; RVA22U64-PACK-NEXT: packh a4, a4, a5
+; RVA22U64-PACK-NEXT: packh a1, a3, a1
+; RVA22U64-PACK-NEXT: packw a1, a4, a1
+; RVA22U64-PACK-NEXT: pack a7, a2, a1
+; RVA22U64-PACK-NEXT: lbu t0, 154(a0)
+; RVA22U64-PACK-NEXT: lbu a3, 161(a0)
+; RVA22U64-PACK-NEXT: lbu a4, 163(a0)
+; RVA22U64-PACK-NEXT: lbu a5, 93(a0)
+; RVA22U64-PACK-NEXT: lbu a1, 105(a0)
+; RVA22U64-PACK-NEXT: lbu a2, 124(a0)
+; RVA22U64-PACK-NEXT: lbu a0, 144(a0)
+; RVA22U64-PACK-NEXT: packh a5, a6, a5
+; RVA22U64-PACK-NEXT: packh a1, a1, a3
+; RVA22U64-PACK-NEXT: packw a1, a5, a1
+; RVA22U64-PACK-NEXT: packh a2, a2, a4
+; RVA22U64-PACK-NEXT: packh a0, a0, t0
; RVA22U64-PACK-NEXT: packw a0, a2, a0
-; RVA22U64-PACK-NEXT: pack a0, a3, a0
+; RVA22U64-PACK-NEXT: pack a0, a1, a0
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RVA22U64-PACK-NEXT: vmv.v.x v8, a6
+; RVA22U64-PACK-NEXT: vmv.v.x v8, a7
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-PACK-NEXT: ret
;
@@ -1990,39 +1990,39 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
; RV64ZVE32-NEXT: lbu a2, 1(a0)
; RV64ZVE32-NEXT: lbu a3, 22(a0)
; RV64ZVE32-NEXT: lbu a4, 31(a0)
-; RV64ZVE32-NEXT: lbu a5, 44(a0)
-; RV64ZVE32-NEXT: lbu a6, 55(a0)
-; RV64ZVE32-NEXT: lbu a7, 623(a0)
+; RV64ZVE32-NEXT: lbu a5, 623(a0)
+; RV64ZVE32-NEXT: lbu a6, 44(a0)
+; RV64ZVE32-NEXT: lbu a7, 55(a0)
; RV64ZVE32-NEXT: lbu t0, 75(a0)
; RV64ZVE32-NEXT: lbu t1, 82(a0)
-; RV64ZVE32-NEXT: lbu t2, 93(a0)
-; RV64ZVE32-NEXT: lbu t3, 105(a0)
-; RV64ZVE32-NEXT: lbu t4, 161(a0)
-; RV64ZVE32-NEXT: lbu t5, 124(a0)
-; RV64ZVE32-NEXT: lbu t6, 163(a0)
-; RV64ZVE32-NEXT: lbu s0, 144(a0)
-; RV64ZVE32-NEXT: lbu a0, 154(a0)
+; RV64ZVE32-NEXT: lbu t2, 154(a0)
+; RV64ZVE32-NEXT: lbu t3, 161(a0)
+; RV64ZVE32-NEXT: lbu t4, 163(a0)
+; RV64ZVE32-NEXT: lbu t5, 93(a0)
+; RV64ZVE32-NEXT: lbu t6, 105(a0)
+; RV64ZVE32-NEXT: lbu s0, 124(a0)
+; RV64ZVE32-NEXT: lbu a0, 144(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32-NEXT: vslide1down.vx v9, v8, t0
; RV64ZVE32-NEXT: vmv.v.x v8, t1
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t2
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t3
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t4
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t5
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t6
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, s0
-; RV64ZVE32-NEXT: li a1, 255
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t4
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32-NEXT: li a0, 255
; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32-NEXT: vmv.s.x v0, a1
+; RV64ZVE32-NEXT: vmv.s.x v0, a0
; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t2
; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64ZVE32-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; RV64ZVE32-NEXT: addi sp, sp, 16
@@ -2085,20 +2085,20 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) {
; RV32-ONLY-NEXT: lbu a1, 82(a0)
; RV32-ONLY-NEXT: lbu a2, 93(a0)
; RV32-ONLY-NEXT: lbu a3, 105(a0)
-; RV32-ONLY-NEXT: lbu a4, 161(a0)
-; RV32-ONLY-NEXT: lbu a5, 124(a0)
-; RV32-ONLY-NEXT: lbu a6, 163(a0)
-; RV32-ONLY-NEXT: lbu a7, 144(a0)
-; RV32-ONLY-NEXT: lbu a0, 154(a0)
+; RV32-ONLY-NEXT: lbu a4, 124(a0)
+; RV32-ONLY-NEXT: lbu a5, 144(a0)
+; RV32-ONLY-NEXT: lbu a6, 154(a0)
+; RV32-ONLY-NEXT: lbu a7, 161(a0)
+; RV32-ONLY-NEXT: lbu a0, 163(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-ONLY-NEXT: vmv.v.x v8, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v16i8_undef_low_half:
@@ -2106,23 +2106,23 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) {
; RV32VB-NEXT: lbu a1, 93(a0)
; RV32VB-NEXT: lbu a2, 82(a0)
; RV32VB-NEXT: lbu a3, 105(a0)
-; RV32VB-NEXT: lbu a4, 161(a0)
+; RV32VB-NEXT: lbu a4, 124(a0)
; RV32VB-NEXT: slli a1, a1, 8
+; RV32VB-NEXT: lbu a5, 144(a0)
+; RV32VB-NEXT: lbu a6, 154(a0)
+; RV32VB-NEXT: lbu a7, 161(a0)
; RV32VB-NEXT: or a1, a2, a1
; RV32VB-NEXT: slli a3, a3, 16
-; RV32VB-NEXT: slli a4, a4, 24
-; RV32VB-NEXT: or a3, a4, a3
-; RV32VB-NEXT: or a1, a1, a3
-; RV32VB-NEXT: lbu a2, 163(a0)
-; RV32VB-NEXT: lbu a3, 124(a0)
-; RV32VB-NEXT: lbu a4, 144(a0)
-; RV32VB-NEXT: lbu a0, 154(a0)
-; RV32VB-NEXT: slli a2, a2, 8
-; RV32VB-NEXT: or a2, a3, a2
-; RV32VB-NEXT: slli a4, a4, 16
-; RV32VB-NEXT: slli a0, a0, 24
-; RV32VB-NEXT: or a0, a0, a4
-; RV32VB-NEXT: or a0, a2, a0
+; RV32VB-NEXT: lbu a0, 163(a0)
+; RV32VB-NEXT: slli a7, a7, 24
+; RV32VB-NEXT: or a2, a7, a3
+; RV32VB-NEXT: or a1, a1, a2
+; RV32VB-NEXT: slli a0, a0, 8
+; RV32VB-NEXT: or a0, a4, a0
+; RV32VB-NEXT: slli a5, a5, 16
+; RV32VB-NEXT: slli a6, a6, 24
+; RV32VB-NEXT: or a2, a6, a5
+; RV32VB-NEXT: or a0, a0, a2
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-NEXT: vmv.v.i v8, 0
; RV32VB-NEXT: vslide1down.vx v8, v8, zero
@@ -2132,26 +2132,26 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) {
;
; RV32VB-PACK-LABEL: buildvec_v16i8_undef_low_half:
; RV32VB-PACK: # %bb.0:
-; RV32VB-PACK-NEXT: lbu a1, 82(a0)
-; RV32VB-PACK-NEXT: lbu a2, 93(a0)
-; RV32VB-PACK-NEXT: packh a1, a1, a2
-; RV32VB-PACK-NEXT: lbu a2, 105(a0)
+; RV32VB-PACK-NEXT: lbu a1, 144(a0)
+; RV32VB-PACK-NEXT: lbu a2, 154(a0)
; RV32VB-PACK-NEXT: lbu a3, 161(a0)
-; RV32VB-PACK-NEXT: lbu a4, 124(a0)
-; RV32VB-PACK-NEXT: lbu a5, 163(a0)
-; RV32VB-PACK-NEXT: lbu a6, 144(a0)
-; RV32VB-PACK-NEXT: lbu a0, 154(a0)
-; RV32VB-PACK-NEXT: packh a2, a2, a3
-; RV32VB-PACK-NEXT: pack a1, a1, a2
-; RV32VB-PACK-NEXT: packh a2, a4, a5
-; RV32VB-PACK-NEXT: packh a0, a6, a0
-; RV32VB-PACK-NEXT: pack a0, a2, a0
-; RV32VB-PACK-NEXT: packh a2, a0, a0
-; RV32VB-PACK-NEXT: pack a2, a2, a2
+; RV32VB-PACK-NEXT: lbu a4, 82(a0)
+; RV32VB-PACK-NEXT: lbu a5, 93(a0)
+; RV32VB-PACK-NEXT: lbu a6, 105(a0)
+; RV32VB-PACK-NEXT: lbu a7, 124(a0)
+; RV32VB-PACK-NEXT: lbu a0, 163(a0)
+; RV32VB-PACK-NEXT: packh a4, a4, a5
+; RV32VB-PACK-NEXT: packh a3, a6, a3
+; RV32VB-PACK-NEXT: pack a3, a4, a3
+; RV32VB-PACK-NEXT: packh a0, a7, a0
+; RV32VB-PACK-NEXT: packh a1, a1, a2
+; RV32VB-PACK-NEXT: pack a0, a0, a1
+; RV32VB-PACK-NEXT: packh a1, a0, a0
+; RV32VB-PACK-NEXT: pack a1, a1, a1
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32VB-PACK-NEXT: vmv.v.x v8, a2
-; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
+; RV32VB-PACK-NEXT: vmv.v.x v8, a1
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1
+; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: ret
;
@@ -2160,44 +2160,44 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) {
; RV64V-ONLY-NEXT: lbu a1, 82(a0)
; RV64V-ONLY-NEXT: lbu a2, 93(a0)
; RV64V-ONLY-NEXT: lbu a3, 105(a0)
-; RV64V-ONLY-NEXT: lbu a4, 161(a0)
-; RV64V-ONLY-NEXT: lbu a5, 124(a0)
-; RV64V-ONLY-NEXT: lbu a6, 163(a0)
-; RV64V-ONLY-NEXT: lbu a7, 144(a0)
-; RV64V-ONLY-NEXT: lbu a0, 154(a0)
+; RV64V-ONLY-NEXT: lbu a4, 124(a0)
+; RV64V-ONLY-NEXT: lbu a5, 144(a0)
+; RV64V-ONLY-NEXT: lbu a6, 154(a0)
+; RV64V-ONLY-NEXT: lbu a7, 161(a0)
+; RV64V-ONLY-NEXT: lbu a0, 163(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v16i8_undef_low_half:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: lbu a1, 93(a0)
-; RVA22U64-NEXT: lbu a2, 82(a0)
-; RVA22U64-NEXT: lbu a3, 105(a0)
-; RVA22U64-NEXT: lbu a4, 161(a0)
+; RVA22U64-NEXT: lbu a6, 82(a0)
+; RVA22U64-NEXT: lbu a7, 105(a0)
+; RVA22U64-NEXT: lbu a4, 124(a0)
; RVA22U64-NEXT: slli a1, a1, 8
-; RVA22U64-NEXT: or a1, a1, a2
-; RVA22U64-NEXT: slli a3, a3, 16
-; RVA22U64-NEXT: slli a4, a4, 24
-; RVA22U64-NEXT: or a3, a3, a4
-; RVA22U64-NEXT: lbu a2, 124(a0)
+; RVA22U64-NEXT: lbu a5, 144(a0)
+; RVA22U64-NEXT: lbu a2, 154(a0)
+; RVA22U64-NEXT: lbu a3, 161(a0)
+; RVA22U64-NEXT: or a1, a6, a1
+; RVA22U64-NEXT: slli a7, a7, 16
+; RVA22U64-NEXT: lbu a0, 163(a0)
+; RVA22U64-NEXT: slli a3, a3, 24
+; RVA22U64-NEXT: or a3, a3, a7
; RVA22U64-NEXT: or a1, a1, a3
-; RVA22U64-NEXT: lbu a3, 163(a0)
-; RVA22U64-NEXT: lbu a4, 144(a0)
-; RVA22U64-NEXT: slli a2, a2, 32
-; RVA22U64-NEXT: lbu a0, 154(a0)
-; RVA22U64-NEXT: slli a3, a3, 40
-; RVA22U64-NEXT: or a2, a2, a3
-; RVA22U64-NEXT: slli a4, a4, 48
-; RVA22U64-NEXT: slli a0, a0, 56
+; RVA22U64-NEXT: slli a4, a4, 32
+; RVA22U64-NEXT: slli a0, a0, 40
; RVA22U64-NEXT: or a0, a0, a4
+; RVA22U64-NEXT: slli a5, a5, 48
+; RVA22U64-NEXT: slli a2, a2, 56
+; RVA22U64-NEXT: or a2, a2, a5
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
@@ -2207,21 +2207,21 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) {
;
; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_low_half:
; RVA22U64-PACK: # %bb.0:
-; RVA22U64-PACK-NEXT: lbu a1, 82(a0)
-; RVA22U64-PACK-NEXT: lbu a2, 93(a0)
-; RVA22U64-PACK-NEXT: packh a6, a1, a2
-; RVA22U64-PACK-NEXT: lbu a2, 105(a0)
+; RVA22U64-PACK-NEXT: lbu a6, 144(a0)
+; RVA22U64-PACK-NEXT: lbu a7, 154(a0)
; RVA22U64-PACK-NEXT: lbu a3, 161(a0)
-; RVA22U64-PACK-NEXT: lbu a4, 124(a0)
-; RVA22U64-PACK-NEXT: lbu a5, 163(a0)
-; RVA22U64-PACK-NEXT: lbu a1, 144(a0)
-; RVA22U64-PACK-NEXT: lbu a0, 154(a0)
-; RVA22U64-PACK-NEXT: packh a2, a2, a3
-; RVA22U64-PACK-NEXT: packw a2, a6, a2
-; RVA22U64-PACK-NEXT: packh a3, a4, a5
-; RVA22U64-PACK-NEXT: packh a0, a1, a0
-; RVA22U64-PACK-NEXT: packw a0, a3, a0
-; RVA22U64-PACK-NEXT: pack a0, a2, a0
+; RVA22U64-PACK-NEXT: lbu a4, 82(a0)
+; RVA22U64-PACK-NEXT: lbu a5, 93(a0)
+; RVA22U64-PACK-NEXT: lbu a1, 105(a0)
+; RVA22U64-PACK-NEXT: lbu a2, 124(a0)
+; RVA22U64-PACK-NEXT: lbu a0, 163(a0)
+; RVA22U64-PACK-NEXT: packh a4, a4, a5
+; RVA22U64-PACK-NEXT: packh a1, a1, a3
+; RVA22U64-PACK-NEXT: packw a1, a4, a1
+; RVA22U64-PACK-NEXT: packh a0, a2, a0
+; RVA22U64-PACK-NEXT: packh a2, a6, a7
+; RVA22U64-PACK-NEXT: packw a0, a0, a2
+; RVA22U64-PACK-NEXT: pack a0, a1, a0
; RVA22U64-PACK-NEXT: packh a1, a0, a0
; RVA22U64-PACK-NEXT: packw a1, a1, a1
; RVA22U64-PACK-NEXT: pack a1, a1, a1
@@ -2235,20 +2235,20 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) {
; RV64ZVE32-NEXT: lbu a1, 82(a0)
; RV64ZVE32-NEXT: lbu a2, 93(a0)
; RV64ZVE32-NEXT: lbu a3, 105(a0)
-; RV64ZVE32-NEXT: lbu a4, 161(a0)
-; RV64ZVE32-NEXT: lbu a5, 124(a0)
-; RV64ZVE32-NEXT: lbu a6, 163(a0)
-; RV64ZVE32-NEXT: lbu a7, 144(a0)
-; RV64ZVE32-NEXT: lbu a0, 154(a0)
+; RV64ZVE32-NEXT: lbu a4, 124(a0)
+; RV64ZVE32-NEXT: lbu a5, 144(a0)
+; RV64ZVE32-NEXT: lbu a6, 154(a0)
+; RV64ZVE32-NEXT: lbu a7, 161(a0)
+; RV64ZVE32-NEXT: lbu a0, 163(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: ret
%p9 = getelementptr i8, ptr %p, i32 82
%p10 = getelementptr i8, ptr %p, i32 93
@@ -2286,18 +2286,18 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) {
; RV32-ONLY-NEXT: lbu a2, 1(a0)
; RV32-ONLY-NEXT: lbu a3, 22(a0)
; RV32-ONLY-NEXT: lbu a4, 31(a0)
-; RV32-ONLY-NEXT: lbu a5, 44(a0)
-; RV32-ONLY-NEXT: lbu a6, 55(a0)
-; RV32-ONLY-NEXT: lbu a7, 623(a0)
+; RV32-ONLY-NEXT: lbu a5, 623(a0)
+; RV32-ONLY-NEXT: lbu a6, 44(a0)
+; RV32-ONLY-NEXT: lbu a7, 55(a0)
; RV32-ONLY-NEXT: lbu a0, 75(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-ONLY-NEXT: vmv.v.x v8, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 8
; RV32-ONLY-NEXT: ret
@@ -2313,16 +2313,16 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) {
; RV32VB-NEXT: slli a3, a3, 16
; RV32VB-NEXT: slli a4, a4, 24
; RV32VB-NEXT: or a3, a4, a3
+; RV32VB-NEXT: lbu a2, 44(a0)
+; RV32VB-NEXT: lbu a4, 55(a0)
; RV32VB-NEXT: or a1, a1, a3
-; RV32VB-NEXT: lbu a2, 55(a0)
-; RV32VB-NEXT: lbu a3, 44(a0)
-; RV32VB-NEXT: lbu a4, 623(a0)
+; RV32VB-NEXT: lbu a3, 623(a0)
; RV32VB-NEXT: lbu a0, 75(a0)
-; RV32VB-NEXT: slli a2, a2, 8
-; RV32VB-NEXT: or a2, a3, a2
-; RV32VB-NEXT: slli a4, a4, 16
+; RV32VB-NEXT: slli a4, a4, 8
+; RV32VB-NEXT: or a2, a2, a4
+; RV32VB-NEXT: slli a3, a3, 16
; RV32VB-NEXT: slli a0, a0, 24
-; RV32VB-NEXT: or a0, a0, a4
+; RV32VB-NEXT: or a0, a0, a3
; RV32VB-NEXT: or a0, a2, a0
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a1
@@ -2335,18 +2335,18 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) {
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: lbu a1, 0(a0)
; RV32VB-PACK-NEXT: lbu a2, 1(a0)
+; RV32VB-PACK-NEXT: lbu a3, 22(a0)
+; RV32VB-PACK-NEXT: lbu a4, 31(a0)
; RV32VB-PACK-NEXT: packh a1, a1, a2
-; RV32VB-PACK-NEXT: lbu a2, 22(a0)
-; RV32VB-PACK-NEXT: lbu a3, 31(a0)
-; RV32VB-PACK-NEXT: lbu a4, 44(a0)
-; RV32VB-PACK-NEXT: lbu a5, 55(a0)
-; RV32VB-PACK-NEXT: lbu a6, 623(a0)
+; RV32VB-PACK-NEXT: lbu a2, 623(a0)
+; RV32VB-PACK-NEXT: lbu a5, 44(a0)
+; RV32VB-PACK-NEXT: lbu a6, 55(a0)
; RV32VB-PACK-NEXT: lbu a0, 75(a0)
-; RV32VB-PACK-NEXT: packh a2, a2, a3
-; RV32VB-PACK-NEXT: pack a1, a1, a2
-; RV32VB-PACK-NEXT: packh a2, a4, a5
-; RV32VB-PACK-NEXT: packh a0, a6, a0
-; RV32VB-PACK-NEXT: pack a0, a2, a0
+; RV32VB-PACK-NEXT: packh a3, a3, a4
+; RV32VB-PACK-NEXT: pack a1, a1, a3
+; RV32VB-PACK-NEXT: packh a3, a5, a6
+; RV32VB-PACK-NEXT: packh a0, a2, a0
+; RV32VB-PACK-NEXT: pack a0, a3, a0
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a1
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
@@ -2362,18 +2362,18 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) {
; RV64V-ONLY-NEXT: lbu a2, 1(a0)
; RV64V-ONLY-NEXT: lbu a3, 22(a0)
; RV64V-ONLY-NEXT: lbu a4, 31(a0)
-; RV64V-ONLY-NEXT: lbu a5, 44(a0)
-; RV64V-ONLY-NEXT: lbu a6, 55(a0)
-; RV64V-ONLY-NEXT: lbu a7, 623(a0)
+; RV64V-ONLY-NEXT: lbu a5, 623(a0)
+; RV64V-ONLY-NEXT: lbu a6, 44(a0)
+; RV64V-ONLY-NEXT: lbu a7, 55(a0)
; RV64V-ONLY-NEXT: lbu a0, 75(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 8
; RV64V-ONLY-NEXT: ret
@@ -2389,12 +2389,12 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) {
; RVA22U64-NEXT: slli a3, a3, 16
; RVA22U64-NEXT: slli a4, a4, 24
; RVA22U64-NEXT: or a3, a3, a4
-; RVA22U64-NEXT: lbu a2, 44(a0)
; RVA22U64-NEXT: or a1, a1, a3
+; RVA22U64-NEXT: lbu a2, 44(a0)
; RVA22U64-NEXT: lbu a3, 55(a0)
; RVA22U64-NEXT: lbu a4, 623(a0)
-; RVA22U64-NEXT: slli a2, a2, 32
; RVA22U64-NEXT: lbu a0, 75(a0)
+; RVA22U64-NEXT: slli a2, a2, 32
; RVA22U64-NEXT: slli a3, a3, 40
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: slli a4, a4, 48
@@ -2411,19 +2411,19 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) {
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: lbu a1, 0(a0)
; RVA22U64-PACK-NEXT: lbu a2, 1(a0)
-; RVA22U64-PACK-NEXT: packh a6, a1, a2
-; RVA22U64-PACK-NEXT: lbu a2, 22(a0)
-; RVA22U64-PACK-NEXT: lbu a3, 31(a0)
-; RVA22U64-PACK-NEXT: lbu a4, 44(a0)
-; RVA22U64-PACK-NEXT: lbu a5, 55(a0)
-; RVA22U64-PACK-NEXT: lbu a1, 623(a0)
+; RVA22U64-PACK-NEXT: lbu a6, 22(a0)
+; RVA22U64-PACK-NEXT: lbu a4, 31(a0)
+; RVA22U64-PACK-NEXT: packh a1, a1, a2
+; RVA22U64-PACK-NEXT: lbu a2, 623(a0)
+; RVA22U64-PACK-NEXT: lbu a5, 44(a0)
+; RVA22U64-PACK-NEXT: lbu a3, 55(a0)
; RVA22U64-PACK-NEXT: lbu a0, 75(a0)
-; RVA22U64-PACK-NEXT: packh a2, a2, a3
-; RVA22U64-PACK-NEXT: packw a2, a6, a2
-; RVA22U64-PACK-NEXT: packh a3, a4, a5
-; RVA22U64-PACK-NEXT: packh a0, a1, a0
+; RVA22U64-PACK-NEXT: packh a4, a6, a4
+; RVA22U64-PACK-NEXT: packw a1, a1, a4
+; RVA22U64-PACK-NEXT: packh a3, a5, a3
+; RVA22U64-PACK-NEXT: packh a0, a2, a0
; RVA22U64-PACK-NEXT: packw a0, a3, a0
-; RVA22U64-PACK-NEXT: pack a0, a2, a0
+; RVA22U64-PACK-NEXT: pack a0, a1, a0
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a0
; RVA22U64-PACK-NEXT: packh a0, a0, a0
@@ -2438,18 +2438,18 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) {
; RV64ZVE32-NEXT: lbu a2, 1(a0)
; RV64ZVE32-NEXT: lbu a3, 22(a0)
; RV64ZVE32-NEXT: lbu a4, 31(a0)
-; RV64ZVE32-NEXT: lbu a5, 44(a0)
-; RV64ZVE32-NEXT: lbu a6, 55(a0)
-; RV64ZVE32-NEXT: lbu a7, 623(a0)
+; RV64ZVE32-NEXT: lbu a5, 623(a0)
+; RV64ZVE32-NEXT: lbu a6, 44(a0)
+; RV64ZVE32-NEXT: lbu a7, 55(a0)
; RV64ZVE32-NEXT: lbu a0, 75(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 8
; RV64ZVE32-NEXT: ret
@@ -2484,20 +2484,20 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) {
define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
; RV32-ONLY-LABEL: buildvec_v16i8_undef_edges:
; RV32-ONLY: # %bb.0:
-; RV32-ONLY-NEXT: lbu a1, 31(a0)
-; RV32-ONLY-NEXT: lbu a2, 44(a0)
-; RV32-ONLY-NEXT: lbu a3, 55(a0)
-; RV32-ONLY-NEXT: lbu a4, 623(a0)
+; RV32-ONLY-NEXT: lbu a1, 623(a0)
+; RV32-ONLY-NEXT: lbu a2, 31(a0)
+; RV32-ONLY-NEXT: lbu a3, 44(a0)
+; RV32-ONLY-NEXT: lbu a4, 55(a0)
; RV32-ONLY-NEXT: lbu a5, 75(a0)
; RV32-ONLY-NEXT: lbu a6, 82(a0)
; RV32-ONLY-NEXT: lbu a7, 93(a0)
; RV32-ONLY-NEXT: lbu t0, 105(a0)
; RV32-ONLY-NEXT: lbu a0, 161(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-ONLY-NEXT: vmv.v.x v8, a1
-; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
+; RV32-ONLY-NEXT: vmv.v.x v8, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
+; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1
; RV32-ONLY-NEXT: vslide1down.vx v9, v8, a5
; RV32-ONLY-NEXT: vmv.v.x v8, a6
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
@@ -2513,58 +2513,58 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
;
; RV32VB-LABEL: buildvec_v16i8_undef_edges:
; RV32VB: # %bb.0:
-; RV32VB-NEXT: lbu a1, 31(a0)
-; RV32VB-NEXT: lbu a2, 55(a0)
+; RV32VB-NEXT: lbu a1, 55(a0)
+; RV32VB-NEXT: lbu a2, 31(a0)
; RV32VB-NEXT: lbu a3, 44(a0)
; RV32VB-NEXT: lbu a4, 623(a0)
; RV32VB-NEXT: lbu a5, 75(a0)
-; RV32VB-NEXT: slli a2, a2, 8
-; RV32VB-NEXT: or a2, a3, a2
+; RV32VB-NEXT: slli a1, a1, 8
+; RV32VB-NEXT: or a1, a3, a1
; RV32VB-NEXT: slli a4, a4, 16
; RV32VB-NEXT: slli a5, a5, 24
-; RV32VB-NEXT: lbu a3, 93(a0)
; RV32VB-NEXT: or a4, a5, a4
-; RV32VB-NEXT: or a2, a2, a4
-; RV32VB-NEXT: lbu a4, 82(a0)
-; RV32VB-NEXT: slli a3, a3, 8
-; RV32VB-NEXT: lbu a5, 105(a0)
+; RV32VB-NEXT: lbu a3, 82(a0)
+; RV32VB-NEXT: lbu a5, 93(a0)
+; RV32VB-NEXT: or a1, a1, a4
+; RV32VB-NEXT: lbu a4, 105(a0)
; RV32VB-NEXT: lbu a0, 161(a0)
-; RV32VB-NEXT: or a3, a4, a3
-; RV32VB-NEXT: slli a1, a1, 24
-; RV32VB-NEXT: slli a5, a5, 16
+; RV32VB-NEXT: slli a5, a5, 8
+; RV32VB-NEXT: or a3, a3, a5
+; RV32VB-NEXT: slli a2, a2, 24
+; RV32VB-NEXT: slli a4, a4, 16
; RV32VB-NEXT: slli a0, a0, 24
-; RV32VB-NEXT: or a0, a0, a5
+; RV32VB-NEXT: or a0, a0, a4
; RV32VB-NEXT: or a0, a3, a0
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32VB-NEXT: vmv.v.x v8, a1
-; RV32VB-NEXT: vslide1down.vx v8, v8, a2
+; RV32VB-NEXT: vmv.v.x v8, a2
+; RV32VB-NEXT: vslide1down.vx v8, v8, a1
; RV32VB-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-NEXT: vslide1down.vx v8, v8, zero
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v16i8_undef_edges:
; RV32VB-PACK: # %bb.0:
-; RV32VB-PACK-NEXT: lbu a1, 31(a0)
-; RV32VB-PACK-NEXT: lbu a2, 44(a0)
-; RV32VB-PACK-NEXT: lbu a3, 55(a0)
-; RV32VB-PACK-NEXT: lbu a4, 623(a0)
+; RV32VB-PACK-NEXT: lbu a1, 623(a0)
+; RV32VB-PACK-NEXT: lbu a2, 31(a0)
+; RV32VB-PACK-NEXT: lbu a3, 44(a0)
+; RV32VB-PACK-NEXT: lbu a4, 55(a0)
; RV32VB-PACK-NEXT: lbu a5, 75(a0)
-; RV32VB-PACK-NEXT: packh a1, a0, a1
-; RV32VB-PACK-NEXT: packh a2, a2, a3
-; RV32VB-PACK-NEXT: packh a3, a4, a5
+; RV32VB-PACK-NEXT: packh a2, a0, a2
+; RV32VB-PACK-NEXT: packh a3, a3, a4
+; RV32VB-PACK-NEXT: packh a1, a1, a5
; RV32VB-PACK-NEXT: lbu a4, 82(a0)
; RV32VB-PACK-NEXT: lbu a5, 93(a0)
-; RV32VB-PACK-NEXT: pack a2, a2, a3
+; RV32VB-PACK-NEXT: pack a1, a3, a1
; RV32VB-PACK-NEXT: lbu a3, 105(a0)
; RV32VB-PACK-NEXT: lbu a0, 161(a0)
; RV32VB-PACK-NEXT: packh a4, a4, a5
; RV32VB-PACK-NEXT: packh a5, a0, a0
-; RV32VB-PACK-NEXT: pack a1, a5, a1
+; RV32VB-PACK-NEXT: pack a2, a5, a2
; RV32VB-PACK-NEXT: packh a0, a3, a0
; RV32VB-PACK-NEXT: pack a0, a4, a0
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32VB-PACK-NEXT: vmv.v.x v8, a1
-; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
+; RV32VB-PACK-NEXT: vmv.v.x v8, a2
+; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: pack a0, a5, a5
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
@@ -2572,20 +2572,20 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
;
; RV64V-ONLY-LABEL: buildvec_v16i8_undef_edges:
; RV64V-ONLY: # %bb.0:
-; RV64V-ONLY-NEXT: lbu a1, 31(a0)
-; RV64V-ONLY-NEXT: lbu a2, 44(a0)
-; RV64V-ONLY-NEXT: lbu a3, 55(a0)
-; RV64V-ONLY-NEXT: lbu a4, 623(a0)
+; RV64V-ONLY-NEXT: lbu a1, 623(a0)
+; RV64V-ONLY-NEXT: lbu a2, 31(a0)
+; RV64V-ONLY-NEXT: lbu a3, 44(a0)
+; RV64V-ONLY-NEXT: lbu a4, 55(a0)
; RV64V-ONLY-NEXT: lbu a5, 75(a0)
; RV64V-ONLY-NEXT: lbu a6, 82(a0)
; RV64V-ONLY-NEXT: lbu a7, 93(a0)
; RV64V-ONLY-NEXT: lbu t0, 105(a0)
; RV64V-ONLY-NEXT: lbu a0, 161(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64V-ONLY-NEXT: vmv.v.x v8, a1
-; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
+; RV64V-ONLY-NEXT: vmv.v.x v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v9, v8, a5
; RV64V-ONLY-NEXT: vmv.v.x v8, a6
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
@@ -2601,30 +2601,30 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
;
; RVA22U64-LABEL: buildvec_v16i8_undef_edges:
; RVA22U64: # %bb.0:
-; RVA22U64-NEXT: lbu a1, 44(a0)
-; RVA22U64-NEXT: lbu a2, 55(a0)
-; RVA22U64-NEXT: lbu a3, 31(a0)
+; RVA22U64-NEXT: lbu a1, 31(a0)
+; RVA22U64-NEXT: lbu a2, 44(a0)
+; RVA22U64-NEXT: lbu a3, 55(a0)
; RVA22U64-NEXT: lbu a4, 623(a0)
-; RVA22U64-NEXT: slli a1, a1, 32
-; RVA22U64-NEXT: slli a2, a2, 40
; RVA22U64-NEXT: lbu a5, 75(a0)
-; RVA22U64-NEXT: or a1, a1, a2
-; RVA22U64-NEXT: slli a3, a3, 24
+; RVA22U64-NEXT: slli a2, a2, 32
+; RVA22U64-NEXT: slli a3, a3, 40
+; RVA22U64-NEXT: or a2, a2, a3
+; RVA22U64-NEXT: slli a1, a1, 24
; RVA22U64-NEXT: slli a4, a4, 48
; RVA22U64-NEXT: slli a5, a5, 56
; RVA22U64-NEXT: or a4, a4, a5
-; RVA22U64-NEXT: or a1, a1, a4
-; RVA22U64-NEXT: add.uw a1, a3, a1
-; RVA22U64-NEXT: lbu a2, 93(a0)
+; RVA22U64-NEXT: or a2, a2, a4
; RVA22U64-NEXT: lbu a3, 82(a0)
-; RVA22U64-NEXT: lbu a4, 105(a0)
+; RVA22U64-NEXT: lbu a4, 93(a0)
+; RVA22U64-NEXT: add.uw a1, a1, a2
+; RVA22U64-NEXT: lbu a2, 105(a0)
; RVA22U64-NEXT: lbu a0, 161(a0)
-; RVA22U64-NEXT: slli a2, a2, 8
-; RVA22U64-NEXT: or a2, a2, a3
-; RVA22U64-NEXT: slli a4, a4, 16
+; RVA22U64-NEXT: slli a4, a4, 8
+; RVA22U64-NEXT: or a3, a3, a4
+; RVA22U64-NEXT: slli a2, a2, 16
; RVA22U64-NEXT: slli a0, a0, 24
-; RVA22U64-NEXT: or a0, a0, a4
; RVA22U64-NEXT: or a0, a0, a2
+; RVA22U64-NEXT: or a0, a0, a3
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.v.x v8, a1
; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
@@ -2632,48 +2632,48 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
;
; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_edges:
; RVA22U64-PACK: # %bb.0:
-; RVA22U64-PACK-NEXT: lbu a1, 31(a0)
-; RVA22U64-PACK-NEXT: lbu a2, 44(a0)
-; RVA22U64-PACK-NEXT: lbu a3, 55(a0)
-; RVA22U64-PACK-NEXT: lbu a4, 623(a0)
+; RVA22U64-PACK-NEXT: lbu a1, 623(a0)
+; RVA22U64-PACK-NEXT: lbu a2, 31(a0)
+; RVA22U64-PACK-NEXT: lbu a3, 44(a0)
+; RVA22U64-PACK-NEXT: lbu a4, 55(a0)
; RVA22U64-PACK-NEXT: lbu a5, 75(a0)
-; RVA22U64-PACK-NEXT: packh a6, a0, a1
-; RVA22U64-PACK-NEXT: packh a1, a0, a0
-; RVA22U64-PACK-NEXT: packh a2, a2, a3
-; RVA22U64-PACK-NEXT: packh a3, a4, a5
-; RVA22U64-PACK-NEXT: packw a7, a2, a3
+; RVA22U64-PACK-NEXT: packh a6, a0, a2
+; RVA22U64-PACK-NEXT: packh a2, a0, a0
+; RVA22U64-PACK-NEXT: packh a3, a3, a4
+; RVA22U64-PACK-NEXT: packh a1, a1, a5
+; RVA22U64-PACK-NEXT: packw a7, a3, a1
; RVA22U64-PACK-NEXT: lbu a3, 82(a0)
; RVA22U64-PACK-NEXT: lbu a4, 93(a0)
; RVA22U64-PACK-NEXT: lbu a5, 105(a0)
; RVA22U64-PACK-NEXT: lbu a0, 161(a0)
-; RVA22U64-PACK-NEXT: packw a2, a1, a6
-; RVA22U64-PACK-NEXT: pack a2, a2, a7
+; RVA22U64-PACK-NEXT: packw a1, a2, a6
+; RVA22U64-PACK-NEXT: pack a1, a1, a7
; RVA22U64-PACK-NEXT: packh a3, a3, a4
; RVA22U64-PACK-NEXT: packh a0, a5, a0
; RVA22U64-PACK-NEXT: packw a0, a3, a0
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RVA22U64-PACK-NEXT: vmv.v.x v8, a2
-; RVA22U64-PACK-NEXT: packw a1, a1, a1
+; RVA22U64-PACK-NEXT: vmv.v.x v8, a1
+; RVA22U64-PACK-NEXT: packw a1, a2, a2
; RVA22U64-PACK-NEXT: pack a0, a0, a1
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v16i8_undef_edges:
; RV64ZVE32: # %bb.0:
-; RV64ZVE32-NEXT: lbu a1, 31(a0)
-; RV64ZVE32-NEXT: lbu a2, 44(a0)
-; RV64ZVE32-NEXT: lbu a3, 55(a0)
-; RV64ZVE32-NEXT: lbu a4, 623(a0)
+; RV64ZVE32-NEXT: lbu a1, 623(a0)
+; RV64ZVE32-NEXT: lbu a2, 31(a0)
+; RV64ZVE32-NEXT: lbu a3, 44(a0)
+; RV64ZVE32-NEXT: lbu a4, 55(a0)
; RV64ZVE32-NEXT: lbu a5, 75(a0)
; RV64ZVE32-NEXT: lbu a6, 82(a0)
; RV64ZVE32-NEXT: lbu a7, 93(a0)
; RV64ZVE32-NEXT: lbu t0, 105(a0)
; RV64ZVE32-NEXT: lbu a0, 161(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64ZVE32-NEXT: vmv.v.x v8, a1
-; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32-NEXT: vmv.v.x v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v9, v8, a5
; RV64ZVE32-NEXT: vmv.v.x v8, a6
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
@@ -2757,30 +2757,30 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) {
; RV32VB: # %bb.0:
; RV32VB-NEXT: lbu a1, 1(a0)
; RV32VB-NEXT: lbu a2, 0(a0)
-; RV32VB-NEXT: slli a1, a1, 8
; RV32VB-NEXT: lbu a3, 55(a0)
; RV32VB-NEXT: lbu a4, 44(a0)
+; RV32VB-NEXT: slli a1, a1, 8
; RV32VB-NEXT: or a1, a2, a1
-; RV32VB-NEXT: lbu a2, 75(a0)
; RV32VB-NEXT: slli a3, a3, 8
; RV32VB-NEXT: or a3, a4, a3
-; RV32VB-NEXT: lbu a4, 93(a0)
+; RV32VB-NEXT: lbu a2, 75(a0)
+; RV32VB-NEXT: lbu a4, 82(a0)
+; RV32VB-NEXT: lbu a5, 93(a0)
+; RV32VB-NEXT: lbu a6, 124(a0)
; RV32VB-NEXT: slli a2, a2, 24
; RV32VB-NEXT: or a2, a3, a2
-; RV32VB-NEXT: lbu a3, 82(a0)
-; RV32VB-NEXT: slli a4, a4, 8
-; RV32VB-NEXT: lbu a5, 144(a0)
-; RV32VB-NEXT: lbu a6, 154(a0)
-; RV32VB-NEXT: or a3, a3, a4
-; RV32VB-NEXT: lbu a0, 124(a0)
-; RV32VB-NEXT: slli a5, a5, 16
-; RV32VB-NEXT: slli a6, a6, 24
-; RV32VB-NEXT: or a4, a6, a5
-; RV32VB-NEXT: or a0, a0, a4
+; RV32VB-NEXT: lbu a3, 144(a0)
+; RV32VB-NEXT: lbu a0, 154(a0)
+; RV32VB-NEXT: slli a5, a5, 8
+; RV32VB-NEXT: or a4, a4, a5
+; RV32VB-NEXT: slli a3, a3, 16
+; RV32VB-NEXT: slli a0, a0, 24
+; RV32VB-NEXT: or a0, a0, a3
+; RV32VB-NEXT: or a0, a6, a0
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a1
; RV32VB-NEXT: vslide1down.vx v8, v8, a2
-; RV32VB-NEXT: vslide1down.vx v8, v8, a3
+; RV32VB-NEXT: vslide1down.vx v8, v8, a4
; RV32VB-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-NEXT: ret
;
@@ -2790,20 +2790,20 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) {
; RV32VB-PACK-NEXT: lbu a2, 1(a0)
; RV32VB-PACK-NEXT: lbu a3, 44(a0)
; RV32VB-PACK-NEXT: lbu a4, 55(a0)
-; RV32VB-PACK-NEXT: lbu a5, 75(a0)
; RV32VB-PACK-NEXT: packh a1, a1, a2
; RV32VB-PACK-NEXT: packh a2, a3, a4
-; RV32VB-PACK-NEXT: packh a3, a0, a5
+; RV32VB-PACK-NEXT: lbu a3, 75(a0)
; RV32VB-PACK-NEXT: lbu a4, 82(a0)
; RV32VB-PACK-NEXT: lbu a5, 93(a0)
-; RV32VB-PACK-NEXT: lbu a6, 144(a0)
-; RV32VB-PACK-NEXT: lbu a7, 154(a0)
-; RV32VB-PACK-NEXT: lbu a0, 124(a0)
+; RV32VB-PACK-NEXT: lbu a6, 124(a0)
+; RV32VB-PACK-NEXT: lbu a7, 144(a0)
+; RV32VB-PACK-NEXT: lbu a0, 154(a0)
+; RV32VB-PACK-NEXT: packh a3, a0, a3
; RV32VB-PACK-NEXT: pack a2, a2, a3
; RV32VB-PACK-NEXT: packh a3, a4, a5
-; RV32VB-PACK-NEXT: packh a4, a6, a7
-; RV32VB-PACK-NEXT: packh a0, a0, a0
-; RV32VB-PACK-NEXT: pack a0, a0, a4
+; RV32VB-PACK-NEXT: packh a0, a7, a0
+; RV32VB-PACK-NEXT: packh a4, a6, a0
+; RV32VB-PACK-NEXT: pack a0, a4, a0
; RV32VB-PACK-NEXT: packh a4, a0, a0
; RV32VB-PACK-NEXT: pack a1, a1, a4
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
@@ -2852,32 +2852,32 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) {
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: lbu a1, 1(a0)
; RVA22U64-NEXT: lbu a2, 0(a0)
-; RVA22U64-NEXT: slli a1, a1, 8
; RVA22U64-NEXT: lbu a3, 44(a0)
; RVA22U64-NEXT: lbu a4, 55(a0)
-; RVA22U64-NEXT: or a1, a1, a2
-; RVA22U64-NEXT: lbu a2, 75(a0)
+; RVA22U64-NEXT: slli a1, a1, 8
+; RVA22U64-NEXT: or a6, a2, a1
; RVA22U64-NEXT: slli a3, a3, 32
; RVA22U64-NEXT: slli a4, a4, 40
; RVA22U64-NEXT: or a3, a3, a4
+; RVA22U64-NEXT: lbu a2, 75(a0)
+; RVA22U64-NEXT: lbu a4, 82(a0)
+; RVA22U64-NEXT: lbu a5, 93(a0)
+; RVA22U64-NEXT: lbu a1, 124(a0)
; RVA22U64-NEXT: slli a2, a2, 56
-; RVA22U64-NEXT: lbu a4, 93(a0)
; RVA22U64-NEXT: or a2, a2, a3
-; RVA22U64-NEXT: or a1, a1, a2
-; RVA22U64-NEXT: lbu a2, 82(a0)
-; RVA22U64-NEXT: slli a4, a4, 8
+; RVA22U64-NEXT: or a2, a6, a2
; RVA22U64-NEXT: lbu a3, 144(a0)
-; RVA22U64-NEXT: lbu a5, 154(a0)
-; RVA22U64-NEXT: or a2, a2, a4
-; RVA22U64-NEXT: lbu a0, 124(a0)
+; RVA22U64-NEXT: lbu a0, 154(a0)
+; RVA22U64-NEXT: slli a5, a5, 8
+; RVA22U64-NEXT: or a4, a4, a5
; RVA22U64-NEXT: slli a3, a3, 48
-; RVA22U64-NEXT: slli a5, a5, 56
-; RVA22U64-NEXT: or a3, a3, a5
-; RVA22U64-NEXT: slli a0, a0, 32
+; RVA22U64-NEXT: slli a0, a0, 56
; RVA22U64-NEXT: or a0, a0, a3
-; RVA22U64-NEXT: or a0, a0, a2
+; RVA22U64-NEXT: slli a1, a1, 32
+; RVA22U64-NEXT: or a0, a0, a1
+; RVA22U64-NEXT: or a0, a0, a4
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RVA22U64-NEXT: vmv.v.x v8, a1
+; RVA22U64-NEXT: vmv.v.x v8, a2
; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-NEXT: ret
;
@@ -2887,27 +2887,27 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) {
; RVA22U64-PACK-NEXT: lbu a2, 1(a0)
; RVA22U64-PACK-NEXT: lbu a3, 44(a0)
; RVA22U64-PACK-NEXT: lbu a4, 55(a0)
-; RVA22U64-PACK-NEXT: lbu a5, 75(a0)
-; RVA22U64-PACK-NEXT: packh a1, a1, a2
+; RVA22U64-PACK-NEXT: packh a6, a1, a2
; RVA22U64-PACK-NEXT: packh a2, a3, a4
-; RVA22U64-PACK-NEXT: packh a3, a0, a5
-; RVA22U64-PACK-NEXT: packw a6, a2, a3
-; RVA22U64-PACK-NEXT: packh a3, a0, a0
-; RVA22U64-PACK-NEXT: packw a7, a1, a3
-; RVA22U64-PACK-NEXT: lbu a4, 82(a0)
+; RVA22U64-PACK-NEXT: lbu a3, 75(a0)
+; RVA22U64-PACK-NEXT: lbu a7, 82(a0)
; RVA22U64-PACK-NEXT: lbu a5, 93(a0)
-; RVA22U64-PACK-NEXT: lbu a2, 144(a0)
-; RVA22U64-PACK-NEXT: lbu a1, 154(a0)
-; RVA22U64-PACK-NEXT: lbu a0, 124(a0)
-; RVA22U64-PACK-NEXT: pack a6, a7, a6
-; RVA22U64-PACK-NEXT: packh a4, a4, a5
-; RVA22U64-PACK-NEXT: packh a1, a2, a1
-; RVA22U64-PACK-NEXT: packh a0, a0, a0
-; RVA22U64-PACK-NEXT: packw a0, a0, a1
-; RVA22U64-PACK-NEXT: packw a1, a4, a3
-; RVA22U64-PACK-NEXT: pack a0, a1, a0
+; RVA22U64-PACK-NEXT: lbu t0, 124(a0)
+; RVA22U64-PACK-NEXT: packh a3, a0, a3
+; RVA22U64-PACK-NEXT: packw a2, a2, a3
+; RVA22U64-PACK-NEXT: packh a3, a0, a0
+; RVA22U64-PACK-NEXT: lbu a4, 144(a0)
+; RVA22U64-PACK-NEXT: lbu a0, 154(a0)
+; RVA22U64-PACK-NEXT: packw a1, a6, a3
+; RVA22U64-PACK-NEXT: pack a1, a1, a2
+; RVA22U64-PACK-NEXT: packh a2, a7, a5
+; RVA22U64-PACK-NEXT: packh a0, a4, a0
+; RVA22U64-PACK-NEXT: packh a4, t0, a0
+; RVA22U64-PACK-NEXT: packw a0, a4, a0
+; RVA22U64-PACK-NEXT: packw a2, a2, a3
+; RVA22U64-PACK-NEXT: pack a0, a2, a0
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RVA22U64-PACK-NEXT: vmv.v.x v8, a6
+; RVA22U64-PACK-NEXT: vmv.v.x v8, a1
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-PACK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll
index 82e0760d593c26..af46849ae08719 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll
@@ -7,25 +7,25 @@
define <4 x i1> @load_large_vector(ptr %p) {
; ZVE32X-LABEL: load_large_vector:
; ZVE32X: # %bb.0:
-; ZVE32X-NEXT: ld a1, 80(a0)
-; ZVE32X-NEXT: ld a2, 72(a0)
-; ZVE32X-NEXT: ld a3, 56(a0)
+; ZVE32X-NEXT: ld a1, 0(a0)
+; ZVE32X-NEXT: ld a2, 8(a0)
+; ZVE32X-NEXT: ld a3, 24(a0)
; ZVE32X-NEXT: ld a4, 32(a0)
-; ZVE32X-NEXT: ld a5, 24(a0)
-; ZVE32X-NEXT: ld a6, 48(a0)
-; ZVE32X-NEXT: ld a7, 8(a0)
-; ZVE32X-NEXT: ld a0, 0(a0)
-; ZVE32X-NEXT: xor a4, a5, a4
-; ZVE32X-NEXT: snez a4, a4
+; ZVE32X-NEXT: ld a5, 48(a0)
+; ZVE32X-NEXT: ld a6, 56(a0)
+; ZVE32X-NEXT: ld a7, 72(a0)
+; ZVE32X-NEXT: ld a0, 80(a0)
+; ZVE32X-NEXT: xor a3, a3, a4
+; ZVE32X-NEXT: snez a3, a3
; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
-; ZVE32X-NEXT: vmv.s.x v8, a4
+; ZVE32X-NEXT: vmv.s.x v8, a3
; ZVE32X-NEXT: vand.vi v8, v8, 1
; ZVE32X-NEXT: vmsne.vi v0, v8, 0
; ZVE32X-NEXT: vmv.s.x v9, zero
; ZVE32X-NEXT: vmerge.vim v8, v9, 1, v0
-; ZVE32X-NEXT: xor a0, a0, a7
-; ZVE32X-NEXT: snez a0, a0
-; ZVE32X-NEXT: vmv.s.x v10, a0
+; ZVE32X-NEXT: xor a1, a1, a2
+; ZVE32X-NEXT: snez a1, a1
+; ZVE32X-NEXT: vmv.s.x v10, a1
; ZVE32X-NEXT: vand.vi v10, v10, 1
; ZVE32X-NEXT: vmsne.vi v0, v10, 0
; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
@@ -35,9 +35,9 @@ define <4 x i1> @load_large_vector(ptr %p) {
; ZVE32X-NEXT: vslideup.vi v11, v8, 1
; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVE32X-NEXT: vmsne.vi v0, v11, 0
-; ZVE32X-NEXT: xor a0, a6, a3
-; ZVE32X-NEXT: snez a0, a0
-; ZVE32X-NEXT: vmv.s.x v8, a0
+; ZVE32X-NEXT: xor a1, a5, a6
+; ZVE32X-NEXT: snez a1, a1
+; ZVE32X-NEXT: vmv.s.x v8, a1
; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; ZVE32X-NEXT: vand.vi v8, v8, 1
; ZVE32X-NEXT: vmsne.vi v8, v8, 0
@@ -50,8 +50,8 @@ define <4 x i1> @load_large_vector(ptr %p) {
; ZVE32X-NEXT: vslideup.vi v11, v8, 2
; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; ZVE32X-NEXT: vmsne.vi v0, v11, 0
-; ZVE32X-NEXT: xor a1, a2, a1
-; ZVE32X-NEXT: snez a0, a1
+; ZVE32X-NEXT: xor a0, a7, a0
+; ZVE32X-NEXT: snez a0, a0
; ZVE32X-NEXT: vmv.s.x v8, a0
; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; ZVE32X-NEXT: vand.vi v8, v8, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
index a90ee3ebb87668..43184a28ba3238 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll
@@ -777,24 +777,24 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
; RV32-NEXT: vfmv.f.s fa5, v10
; RV32-NEXT: fcvt.w.d a2, fa5
; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vfmv.f.s fa5, v8
+; RV32-NEXT: fcvt.w.d a3, fa5
; RV32-NEXT: fld fa5, 32(sp)
-; RV32-NEXT: vfmv.f.s fa4, v8
-; RV32-NEXT: fld fa3, 40(sp)
-; RV32-NEXT: fcvt.w.d a3, fa4
+; RV32-NEXT: fld fa4, 40(sp)
+; RV32-NEXT: fld fa3, 48(sp)
+; RV32-NEXT: fld fa2, 56(sp)
; RV32-NEXT: fcvt.w.d a4, fa5
+; RV32-NEXT: fcvt.w.d a5, fa4
+; RV32-NEXT: fcvt.w.d a6, fa3
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vmv.v.x v8, a1
-; RV32-NEXT: fcvt.w.d a1, fa3
-; RV32-NEXT: fld fa5, 48(sp)
; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
-; RV32-NEXT: fcvt.w.d a0, fa5
-; RV32-NEXT: fld fa5, 56(sp)
; RV32-NEXT: vslide1down.vx v8, v8, a4
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: fcvt.w.d a0, fa5
+; RV32-NEXT: vslide1down.vx v8, v8, a5
+; RV32-NEXT: vslide1down.vx v8, v8, a6
+; RV32-NEXT: fcvt.w.d a0, fa2
; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: addi sp, s0, -128
; RV32-NEXT: lw ra, 124(sp) # 4-byte Folded Reload
@@ -827,24 +827,24 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
; RV64-i32-NEXT: vfmv.f.s fa5, v10
; RV64-i32-NEXT: fcvt.l.d a2, fa5
; RV64-i32-NEXT: vslidedown.vi v8, v8, 3
+; RV64-i32-NEXT: vfmv.f.s fa5, v8
+; RV64-i32-NEXT: fcvt.l.d a3, fa5
; RV64-i32-NEXT: fld fa5, 32(sp)
-; RV64-i32-NEXT: vfmv.f.s fa4, v8
-; RV64-i32-NEXT: fld fa3, 40(sp)
-; RV64-i32-NEXT: fcvt.l.d a3, fa4
+; RV64-i32-NEXT: fld fa4, 40(sp)
+; RV64-i32-NEXT: fld fa3, 48(sp)
+; RV64-i32-NEXT: fld fa2, 56(sp)
; RV64-i32-NEXT: fcvt.l.d a4, fa5
+; RV64-i32-NEXT: fcvt.l.d a5, fa4
+; RV64-i32-NEXT: fcvt.l.d a6, fa3
; RV64-i32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64-i32-NEXT: vmv.v.x v8, a1
-; RV64-i32-NEXT: fcvt.l.d a1, fa3
-; RV64-i32-NEXT: fld fa5, 48(sp)
; RV64-i32-NEXT: vslide1down.vx v8, v8, a0
; RV64-i32-NEXT: vslide1down.vx v8, v8, a2
; RV64-i32-NEXT: vslide1down.vx v8, v8, a3
-; RV64-i32-NEXT: fcvt.l.d a0, fa5
-; RV64-i32-NEXT: fld fa5, 56(sp)
; RV64-i32-NEXT: vslide1down.vx v8, v8, a4
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a1
-; RV64-i32-NEXT: vslide1down.vx v8, v8, a0
-; RV64-i32-NEXT: fcvt.l.d a0, fa5
+; RV64-i32-NEXT: vslide1down.vx v8, v8, a5
+; RV64-i32-NEXT: vslide1down.vx v8, v8, a6
+; RV64-i32-NEXT: fcvt.l.d a0, fa2
; RV64-i32-NEXT: vslide1down.vx v8, v8, a0
; RV64-i32-NEXT: addi sp, s0, -128
; RV64-i32-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 24a5bd154c64f5..9cd38056364494 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -3525,9 +3525,9 @@ define <1 x i64> @mgather_v1i64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i64> %passthr
; RV32ZVE32F-NEXT: bnez a2, .LBB42_2
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: lw a1, 4(a0)
-; RV32ZVE32F-NEXT: lw a0, 0(a0)
+; RV32ZVE32F-NEXT: vmv.x.s a1, v8
+; RV32ZVE32F-NEXT: lw a0, 0(a1)
+; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: .LBB42_2: # %else
; RV32ZVE32F-NEXT: ret
;
@@ -3571,30 +3571,30 @@ define <2 x i64> @mgather_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> %passthr
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
-; RV32ZVE32F-NEXT: lw a2, 4(a3)
-; RV32ZVE32F-NEXT: lw a3, 0(a3)
+; RV32ZVE32F-NEXT: lw a2, 0(a3)
+; RV32ZVE32F-NEXT: lw a3, 4(a3)
; RV32ZVE32F-NEXT: andi a4, a4, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB43_4
; RV32ZVE32F-NEXT: .LBB43_2:
-; RV32ZVE32F-NEXT: lw a4, 12(a1)
-; RV32ZVE32F-NEXT: lw a1, 8(a1)
+; RV32ZVE32F-NEXT: lw a4, 8(a1)
+; RV32ZVE32F-NEXT: lw a1, 12(a1)
; RV32ZVE32F-NEXT: j .LBB43_5
; RV32ZVE32F-NEXT: .LBB43_3:
-; RV32ZVE32F-NEXT: lw a2, 4(a1)
-; RV32ZVE32F-NEXT: lw a3, 0(a1)
+; RV32ZVE32F-NEXT: lw a2, 0(a1)
+; RV32ZVE32F-NEXT: lw a3, 4(a1)
; RV32ZVE32F-NEXT: andi a4, a4, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB43_2
; RV32ZVE32F-NEXT: .LBB43_4: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
-; RV32ZVE32F-NEXT: lw a4, 4(a1)
-; RV32ZVE32F-NEXT: lw a1, 0(a1)
+; RV32ZVE32F-NEXT: lw a4, 0(a1)
+; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: .LBB43_5: # %else2
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a2, 4(a0)
-; RV32ZVE32F-NEXT: sw a1, 8(a0)
-; RV32ZVE32F-NEXT: sw a4, 12(a0)
+; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 8(a0)
+; RV32ZVE32F-NEXT: sw a1, 12(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i64:
@@ -3644,60 +3644,60 @@ define <4 x i64> @mgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i64> %passthr
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
-; RV32ZVE32F-NEXT: lw a2, 4(a3)
-; RV32ZVE32F-NEXT: lw a3, 0(a3)
+; RV32ZVE32F-NEXT: lw a2, 0(a3)
+; RV32ZVE32F-NEXT: lw a3, 4(a3)
; RV32ZVE32F-NEXT: andi a4, a6, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB44_6
; RV32ZVE32F-NEXT: .LBB44_2:
-; RV32ZVE32F-NEXT: lw a4, 12(a1)
-; RV32ZVE32F-NEXT: lw a5, 8(a1)
+; RV32ZVE32F-NEXT: lw a4, 8(a1)
+; RV32ZVE32F-NEXT: lw a5, 12(a1)
; RV32ZVE32F-NEXT: andi a7, a6, 4
; RV32ZVE32F-NEXT: bnez a7, .LBB44_7
; RV32ZVE32F-NEXT: .LBB44_3:
-; RV32ZVE32F-NEXT: lw a7, 20(a1)
-; RV32ZVE32F-NEXT: lw t0, 16(a1)
+; RV32ZVE32F-NEXT: lw a7, 16(a1)
+; RV32ZVE32F-NEXT: lw t0, 20(a1)
; RV32ZVE32F-NEXT: andi a6, a6, 8
; RV32ZVE32F-NEXT: bnez a6, .LBB44_8
; RV32ZVE32F-NEXT: .LBB44_4:
-; RV32ZVE32F-NEXT: lw a6, 28(a1)
-; RV32ZVE32F-NEXT: lw a1, 24(a1)
+; RV32ZVE32F-NEXT: lw a6, 24(a1)
+; RV32ZVE32F-NEXT: lw a1, 28(a1)
; RV32ZVE32F-NEXT: j .LBB44_9
; RV32ZVE32F-NEXT: .LBB44_5:
-; RV32ZVE32F-NEXT: lw a2, 4(a1)
-; RV32ZVE32F-NEXT: lw a3, 0(a1)
+; RV32ZVE32F-NEXT: lw a2, 0(a1)
+; RV32ZVE32F-NEXT: lw a3, 4(a1)
; RV32ZVE32F-NEXT: andi a4, a6, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB44_2
; RV32ZVE32F-NEXT: .LBB44_6: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v9
-; RV32ZVE32F-NEXT: lw a4, 4(a5)
-; RV32ZVE32F-NEXT: lw a5, 0(a5)
+; RV32ZVE32F-NEXT: lw a4, 0(a5)
+; RV32ZVE32F-NEXT: lw a5, 4(a5)
; RV32ZVE32F-NEXT: andi a7, a6, 4
; RV32ZVE32F-NEXT: beqz a7, .LBB44_3
; RV32ZVE32F-NEXT: .LBB44_7: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s t0, v9
-; RV32ZVE32F-NEXT: lw a7, 4(t0)
-; RV32ZVE32F-NEXT: lw t0, 0(t0)
+; RV32ZVE32F-NEXT: lw a7, 0(t0)
+; RV32ZVE32F-NEXT: lw t0, 4(t0)
; RV32ZVE32F-NEXT: andi a6, a6, 8
; RV32ZVE32F-NEXT: beqz a6, .LBB44_4
; RV32ZVE32F-NEXT: .LBB44_8: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
-; RV32ZVE32F-NEXT: lw a6, 4(a1)
-; RV32ZVE32F-NEXT: lw a1, 0(a1)
+; RV32ZVE32F-NEXT: lw a6, 0(a1)
+; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: .LBB44_9: # %else8
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a2, 4(a0)
-; RV32ZVE32F-NEXT: sw a5, 8(a0)
-; RV32ZVE32F-NEXT: sw a4, 12(a0)
-; RV32ZVE32F-NEXT: sw t0, 16(a0)
-; RV32ZVE32F-NEXT: sw a7, 20(a0)
-; RV32ZVE32F-NEXT: sw a1, 24(a0)
-; RV32ZVE32F-NEXT: sw a6, 28(a0)
+; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 8(a0)
+; RV32ZVE32F-NEXT: sw a5, 12(a0)
+; RV32ZVE32F-NEXT: sw a7, 16(a0)
+; RV32ZVE32F-NEXT: sw t0, 20(a0)
+; RV32ZVE32F-NEXT: sw a6, 24(a0)
+; RV32ZVE32F-NEXT: sw a1, 28(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v4i64:
@@ -3775,18 +3775,18 @@ define <4 x i64> @mgather_truemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) {
; RV32ZVE32F-NEXT: lw a3, 4(a3)
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a5, v9
-; RV32ZVE32F-NEXT: lw a6, 0(a5)
-; RV32ZVE32F-NEXT: lw a5, 4(a5)
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
-; RV32ZVE32F-NEXT: vmv.x.s a7, v8
-; RV32ZVE32F-NEXT: lw t0, 4(a7)
-; RV32ZVE32F-NEXT: lw a7, 0(a7)
+; RV32ZVE32F-NEXT: vmv.x.s a6, v8
+; RV32ZVE32F-NEXT: lw a7, 0(a6)
+; RV32ZVE32F-NEXT: lw a6, 4(a6)
+; RV32ZVE32F-NEXT: lw t0, 0(a5)
+; RV32ZVE32F-NEXT: lw a5, 4(a5)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a2, 0(a0)
-; RV32ZVE32F-NEXT: sw t0, 28(a0)
+; RV32ZVE32F-NEXT: sw a6, 28(a0)
; RV32ZVE32F-NEXT: sw a7, 24(a0)
; RV32ZVE32F-NEXT: sw a5, 20(a0)
-; RV32ZVE32F-NEXT: sw a6, 16(a0)
+; RV32ZVE32F-NEXT: sw t0, 16(a0)
; RV32ZVE32F-NEXT: sw a3, 12(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
; RV32ZVE32F-NEXT: ret
@@ -3823,22 +3823,22 @@ define <4 x i64> @mgather_falsemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru)
;
; RV32ZVE32F-LABEL: mgather_falsemask_v4i64:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: lw a2, 0(a1)
-; RV32ZVE32F-NEXT: lw a3, 4(a1)
-; RV32ZVE32F-NEXT: lw a4, 8(a1)
-; RV32ZVE32F-NEXT: lw a5, 12(a1)
-; RV32ZVE32F-NEXT: lw a6, 28(a1)
-; RV32ZVE32F-NEXT: lw a7, 24(a1)
-; RV32ZVE32F-NEXT: lw t0, 20(a1)
-; RV32ZVE32F-NEXT: lw a1, 16(a1)
-; RV32ZVE32F-NEXT: sw a6, 28(a0)
-; RV32ZVE32F-NEXT: sw a7, 24(a0)
-; RV32ZVE32F-NEXT: sw t0, 20(a0)
-; RV32ZVE32F-NEXT: sw a1, 16(a0)
-; RV32ZVE32F-NEXT: sw a5, 12(a0)
-; RV32ZVE32F-NEXT: sw a4, 8(a0)
-; RV32ZVE32F-NEXT: sw a3, 4(a0)
-; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: lw a2, 16(a1)
+; RV32ZVE32F-NEXT: lw a3, 20(a1)
+; RV32ZVE32F-NEXT: lw a4, 24(a1)
+; RV32ZVE32F-NEXT: lw a5, 28(a1)
+; RV32ZVE32F-NEXT: lw a6, 0(a1)
+; RV32ZVE32F-NEXT: lw a7, 4(a1)
+; RV32ZVE32F-NEXT: lw t0, 8(a1)
+; RV32ZVE32F-NEXT: lw a1, 12(a1)
+; RV32ZVE32F-NEXT: sw a5, 28(a0)
+; RV32ZVE32F-NEXT: sw a4, 24(a0)
+; RV32ZVE32F-NEXT: sw a3, 20(a0)
+; RV32ZVE32F-NEXT: sw a2, 16(a0)
+; RV32ZVE32F-NEXT: sw a1, 12(a0)
+; RV32ZVE32F-NEXT: sw t0, 8(a0)
+; RV32ZVE32F-NEXT: sw a7, 4(a0)
+; RV32ZVE32F-NEXT: sw a6, 0(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_falsemask_v4i64:
@@ -3882,77 +3882,77 @@ define <8 x i64> @mgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i64> %passthr
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
-; RV32ZVE32F-NEXT: lw a2, 4(a3)
-; RV32ZVE32F-NEXT: lw a3, 0(a3)
+; RV32ZVE32F-NEXT: lw a2, 0(a3)
+; RV32ZVE32F-NEXT: lw a3, 4(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB47_8
; RV32ZVE32F-NEXT: .LBB47_2:
-; RV32ZVE32F-NEXT: lw a4, 12(a1)
-; RV32ZVE32F-NEXT: lw a5, 8(a1)
+; RV32ZVE32F-NEXT: lw a4, 8(a1)
+; RV32ZVE32F-NEXT: lw a5, 12(a1)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB47_9
; RV32ZVE32F-NEXT: .LBB47_3:
-; RV32ZVE32F-NEXT: lw a6, 20(a1)
-; RV32ZVE32F-NEXT: lw a7, 16(a1)
+; RV32ZVE32F-NEXT: lw a6, 16(a1)
+; RV32ZVE32F-NEXT: lw a7, 20(a1)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB47_10
; RV32ZVE32F-NEXT: .LBB47_4:
-; RV32ZVE32F-NEXT: lw t1, 28(a1)
-; RV32ZVE32F-NEXT: lw t2, 24(a1)
+; RV32ZVE32F-NEXT: lw t1, 24(a1)
+; RV32ZVE32F-NEXT: lw t2, 28(a1)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB47_11
; RV32ZVE32F-NEXT: .LBB47_5:
-; RV32ZVE32F-NEXT: lw t3, 36(a1)
-; RV32ZVE32F-NEXT: lw t4, 32(a1)
+; RV32ZVE32F-NEXT: lw t3, 32(a1)
+; RV32ZVE32F-NEXT: lw t4, 36(a1)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB47_12
; RV32ZVE32F-NEXT: .LBB47_6:
-; RV32ZVE32F-NEXT: lw t5, 44(a1)
-; RV32ZVE32F-NEXT: lw t6, 40(a1)
+; RV32ZVE32F-NEXT: lw t5, 40(a1)
+; RV32ZVE32F-NEXT: lw t6, 44(a1)
; RV32ZVE32F-NEXT: j .LBB47_13
; RV32ZVE32F-NEXT: .LBB47_7:
-; RV32ZVE32F-NEXT: lw a2, 4(a1)
-; RV32ZVE32F-NEXT: lw a3, 0(a1)
+; RV32ZVE32F-NEXT: lw a2, 0(a1)
+; RV32ZVE32F-NEXT: lw a3, 4(a1)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB47_2
; RV32ZVE32F-NEXT: .LBB47_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
-; RV32ZVE32F-NEXT: lw a4, 4(a5)
-; RV32ZVE32F-NEXT: lw a5, 0(a5)
+; RV32ZVE32F-NEXT: lw a4, 0(a5)
+; RV32ZVE32F-NEXT: lw a5, 4(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB47_3
; RV32ZVE32F-NEXT: .LBB47_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
-; RV32ZVE32F-NEXT: lw a6, 4(a7)
-; RV32ZVE32F-NEXT: lw a7, 0(a7)
+; RV32ZVE32F-NEXT: lw a6, 0(a7)
+; RV32ZVE32F-NEXT: lw a7, 4(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB47_4
; RV32ZVE32F-NEXT: .LBB47_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
-; RV32ZVE32F-NEXT: lw t1, 4(t2)
-; RV32ZVE32F-NEXT: lw t2, 0(t2)
+; RV32ZVE32F-NEXT: lw t1, 0(t2)
+; RV32ZVE32F-NEXT: lw t2, 4(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB47_5
; RV32ZVE32F-NEXT: .LBB47_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
-; RV32ZVE32F-NEXT: lw t3, 4(t4)
-; RV32ZVE32F-NEXT: lw t4, 0(t4)
+; RV32ZVE32F-NEXT: lw t3, 0(t4)
+; RV32ZVE32F-NEXT: lw t4, 4(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB47_6
; RV32ZVE32F-NEXT: .LBB47_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
-; RV32ZVE32F-NEXT: lw t5, 4(t6)
-; RV32ZVE32F-NEXT: lw t6, 0(t6)
+; RV32ZVE32F-NEXT: lw t5, 0(t6)
+; RV32ZVE32F-NEXT: lw t6, 4(t6)
; RV32ZVE32F-NEXT: .LBB47_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
@@ -3966,42 +3966,42 @@ define <8 x i64> @mgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i64> %passthr
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
-; RV32ZVE32F-NEXT: lw s0, 4(s1)
-; RV32ZVE32F-NEXT: lw s1, 0(s1)
+; RV32ZVE32F-NEXT: lw s0, 0(s1)
+; RV32ZVE32F-NEXT: lw s1, 4(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB47_17
; RV32ZVE32F-NEXT: .LBB47_15:
-; RV32ZVE32F-NEXT: lw t0, 60(a1)
-; RV32ZVE32F-NEXT: lw a1, 56(a1)
+; RV32ZVE32F-NEXT: lw t0, 56(a1)
+; RV32ZVE32F-NEXT: lw a1, 60(a1)
; RV32ZVE32F-NEXT: j .LBB47_18
; RV32ZVE32F-NEXT: .LBB47_16:
-; RV32ZVE32F-NEXT: lw s0, 52(a1)
-; RV32ZVE32F-NEXT: lw s1, 48(a1)
+; RV32ZVE32F-NEXT: lw s0, 48(a1)
+; RV32ZVE32F-NEXT: lw s1, 52(a1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB47_15
; RV32ZVE32F-NEXT: .LBB47_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
-; RV32ZVE32F-NEXT: lw t0, 4(a1)
-; RV32ZVE32F-NEXT: lw a1, 0(a1)
+; RV32ZVE32F-NEXT: lw t0, 0(a1)
+; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: .LBB47_18: # %else20
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a2, 4(a0)
-; RV32ZVE32F-NEXT: sw a5, 8(a0)
-; RV32ZVE32F-NEXT: sw a4, 12(a0)
-; RV32ZVE32F-NEXT: sw a7, 16(a0)
-; RV32ZVE32F-NEXT: sw a6, 20(a0)
-; RV32ZVE32F-NEXT: sw t2, 24(a0)
-; RV32ZVE32F-NEXT: sw t1, 28(a0)
-; RV32ZVE32F-NEXT: sw t4, 32(a0)
-; RV32ZVE32F-NEXT: sw t3, 36(a0)
-; RV32ZVE32F-NEXT: sw t6, 40(a0)
-; RV32ZVE32F-NEXT: sw t5, 44(a0)
-; RV32ZVE32F-NEXT: sw s1, 48(a0)
-; RV32ZVE32F-NEXT: sw s0, 52(a0)
-; RV32ZVE32F-NEXT: sw a1, 56(a0)
-; RV32ZVE32F-NEXT: sw t0, 60(a0)
+; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 8(a0)
+; RV32ZVE32F-NEXT: sw a5, 12(a0)
+; RV32ZVE32F-NEXT: sw a6, 16(a0)
+; RV32ZVE32F-NEXT: sw a7, 20(a0)
+; RV32ZVE32F-NEXT: sw t1, 24(a0)
+; RV32ZVE32F-NEXT: sw t2, 28(a0)
+; RV32ZVE32F-NEXT: sw t3, 32(a0)
+; RV32ZVE32F-NEXT: sw t4, 36(a0)
+; RV32ZVE32F-NEXT: sw t5, 40(a0)
+; RV32ZVE32F-NEXT: sw t6, 44(a0)
+; RV32ZVE32F-NEXT: sw s0, 48(a0)
+; RV32ZVE32F-NEXT: sw s1, 52(a0)
+; RV32ZVE32F-NEXT: sw t0, 56(a0)
+; RV32ZVE32F-NEXT: sw a1, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
@@ -4129,77 +4129,77 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV32ZVE32F-NEXT: beqz a3, .LBB48_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
-; RV32ZVE32F-NEXT: lw a1, 4(a3)
-; RV32ZVE32F-NEXT: lw a3, 0(a3)
+; RV32ZVE32F-NEXT: lw a1, 0(a3)
+; RV32ZVE32F-NEXT: lw a3, 4(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB48_8
; RV32ZVE32F-NEXT: .LBB48_2:
-; RV32ZVE32F-NEXT: lw a4, 12(a2)
-; RV32ZVE32F-NEXT: lw a5, 8(a2)
+; RV32ZVE32F-NEXT: lw a4, 8(a2)
+; RV32ZVE32F-NEXT: lw a5, 12(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB48_9
; RV32ZVE32F-NEXT: .LBB48_3:
-; RV32ZVE32F-NEXT: lw a6, 20(a2)
-; RV32ZVE32F-NEXT: lw a7, 16(a2)
+; RV32ZVE32F-NEXT: lw a6, 16(a2)
+; RV32ZVE32F-NEXT: lw a7, 20(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB48_10
; RV32ZVE32F-NEXT: .LBB48_4:
-; RV32ZVE32F-NEXT: lw t1, 28(a2)
-; RV32ZVE32F-NEXT: lw t2, 24(a2)
+; RV32ZVE32F-NEXT: lw t1, 24(a2)
+; RV32ZVE32F-NEXT: lw t2, 28(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB48_11
; RV32ZVE32F-NEXT: .LBB48_5:
-; RV32ZVE32F-NEXT: lw t3, 36(a2)
-; RV32ZVE32F-NEXT: lw t4, 32(a2)
+; RV32ZVE32F-NEXT: lw t3, 32(a2)
+; RV32ZVE32F-NEXT: lw t4, 36(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB48_12
; RV32ZVE32F-NEXT: .LBB48_6:
-; RV32ZVE32F-NEXT: lw t5, 44(a2)
-; RV32ZVE32F-NEXT: lw t6, 40(a2)
+; RV32ZVE32F-NEXT: lw t5, 40(a2)
+; RV32ZVE32F-NEXT: lw t6, 44(a2)
; RV32ZVE32F-NEXT: j .LBB48_13
; RV32ZVE32F-NEXT: .LBB48_7:
-; RV32ZVE32F-NEXT: lw a1, 4(a2)
-; RV32ZVE32F-NEXT: lw a3, 0(a2)
+; RV32ZVE32F-NEXT: lw a1, 0(a2)
+; RV32ZVE32F-NEXT: lw a3, 4(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB48_2
; RV32ZVE32F-NEXT: .LBB48_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
-; RV32ZVE32F-NEXT: lw a4, 4(a5)
-; RV32ZVE32F-NEXT: lw a5, 0(a5)
+; RV32ZVE32F-NEXT: lw a4, 0(a5)
+; RV32ZVE32F-NEXT: lw a5, 4(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB48_3
; RV32ZVE32F-NEXT: .LBB48_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
-; RV32ZVE32F-NEXT: lw a6, 4(a7)
-; RV32ZVE32F-NEXT: lw a7, 0(a7)
+; RV32ZVE32F-NEXT: lw a6, 0(a7)
+; RV32ZVE32F-NEXT: lw a7, 4(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB48_4
; RV32ZVE32F-NEXT: .LBB48_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
-; RV32ZVE32F-NEXT: lw t1, 4(t2)
-; RV32ZVE32F-NEXT: lw t2, 0(t2)
+; RV32ZVE32F-NEXT: lw t1, 0(t2)
+; RV32ZVE32F-NEXT: lw t2, 4(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB48_5
; RV32ZVE32F-NEXT: .LBB48_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
-; RV32ZVE32F-NEXT: lw t3, 4(t4)
-; RV32ZVE32F-NEXT: lw t4, 0(t4)
+; RV32ZVE32F-NEXT: lw t3, 0(t4)
+; RV32ZVE32F-NEXT: lw t4, 4(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB48_6
; RV32ZVE32F-NEXT: .LBB48_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
-; RV32ZVE32F-NEXT: lw t5, 4(t6)
-; RV32ZVE32F-NEXT: lw t6, 0(t6)
+; RV32ZVE32F-NEXT: lw t5, 0(t6)
+; RV32ZVE32F-NEXT: lw t6, 4(t6)
; RV32ZVE32F-NEXT: .LBB48_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
@@ -4213,42 +4213,42 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1>
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
-; RV32ZVE32F-NEXT: lw s0, 4(s1)
-; RV32ZVE32F-NEXT: lw s1, 0(s1)
+; RV32ZVE32F-NEXT: lw s0, 0(s1)
+; RV32ZVE32F-NEXT: lw s1, 4(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB48_17
; RV32ZVE32F-NEXT: .LBB48_15:
-; RV32ZVE32F-NEXT: lw t0, 60(a2)
-; RV32ZVE32F-NEXT: lw a2, 56(a2)
+; RV32ZVE32F-NEXT: lw t0, 56(a2)
+; RV32ZVE32F-NEXT: lw a2, 60(a2)
; RV32ZVE32F-NEXT: j .LBB48_18
; RV32ZVE32F-NEXT: .LBB48_16:
-; RV32ZVE32F-NEXT: lw s0, 52(a2)
-; RV32ZVE32F-NEXT: lw s1, 48(a2)
+; RV32ZVE32F-NEXT: lw s0, 48(a2)
+; RV32ZVE32F-NEXT: lw s1, 52(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB48_15
; RV32ZVE32F-NEXT: .LBB48_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
-; RV32ZVE32F-NEXT: lw t0, 4(a2)
-; RV32ZVE32F-NEXT: lw a2, 0(a2)
+; RV32ZVE32F-NEXT: lw t0, 0(a2)
+; RV32ZVE32F-NEXT: lw a2, 4(a2)
; RV32ZVE32F-NEXT: .LBB48_18: # %else20
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a1, 4(a0)
-; RV32ZVE32F-NEXT: sw a5, 8(a0)
-; RV32ZVE32F-NEXT: sw a4, 12(a0)
-; RV32ZVE32F-NEXT: sw a7, 16(a0)
-; RV32ZVE32F-NEXT: sw a6, 20(a0)
-; RV32ZVE32F-NEXT: sw t2, 24(a0)
-; RV32ZVE32F-NEXT: sw t1, 28(a0)
-; RV32ZVE32F-NEXT: sw t4, 32(a0)
-; RV32ZVE32F-NEXT: sw t3, 36(a0)
-; RV32ZVE32F-NEXT: sw t6, 40(a0)
-; RV32ZVE32F-NEXT: sw t5, 44(a0)
-; RV32ZVE32F-NEXT: sw s1, 48(a0)
-; RV32ZVE32F-NEXT: sw s0, 52(a0)
-; RV32ZVE32F-NEXT: sw a2, 56(a0)
-; RV32ZVE32F-NEXT: sw t0, 60(a0)
+; RV32ZVE32F-NEXT: sw a1, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 8(a0)
+; RV32ZVE32F-NEXT: sw a5, 12(a0)
+; RV32ZVE32F-NEXT: sw a6, 16(a0)
+; RV32ZVE32F-NEXT: sw a7, 20(a0)
+; RV32ZVE32F-NEXT: sw t1, 24(a0)
+; RV32ZVE32F-NEXT: sw t2, 28(a0)
+; RV32ZVE32F-NEXT: sw t3, 32(a0)
+; RV32ZVE32F-NEXT: sw t4, 36(a0)
+; RV32ZVE32F-NEXT: sw t5, 40(a0)
+; RV32ZVE32F-NEXT: sw t6, 44(a0)
+; RV32ZVE32F-NEXT: sw s0, 48(a0)
+; RV32ZVE32F-NEXT: sw s1, 52(a0)
+; RV32ZVE32F-NEXT: sw t0, 56(a0)
+; RV32ZVE32F-NEXT: sw a2, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
@@ -4403,77 +4403,77 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV32ZVE32F-NEXT: beqz a3, .LBB49_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
-; RV32ZVE32F-NEXT: lw a1, 4(a3)
-; RV32ZVE32F-NEXT: lw a3, 0(a3)
+; RV32ZVE32F-NEXT: lw a1, 0(a3)
+; RV32ZVE32F-NEXT: lw a3, 4(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB49_8
; RV32ZVE32F-NEXT: .LBB49_2:
-; RV32ZVE32F-NEXT: lw a4, 12(a2)
-; RV32ZVE32F-NEXT: lw a5, 8(a2)
+; RV32ZVE32F-NEXT: lw a4, 8(a2)
+; RV32ZVE32F-NEXT: lw a5, 12(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB49_9
; RV32ZVE32F-NEXT: .LBB49_3:
-; RV32ZVE32F-NEXT: lw a6, 20(a2)
-; RV32ZVE32F-NEXT: lw a7, 16(a2)
+; RV32ZVE32F-NEXT: lw a6, 16(a2)
+; RV32ZVE32F-NEXT: lw a7, 20(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB49_10
; RV32ZVE32F-NEXT: .LBB49_4:
-; RV32ZVE32F-NEXT: lw t1, 28(a2)
-; RV32ZVE32F-NEXT: lw t2, 24(a2)
+; RV32ZVE32F-NEXT: lw t1, 24(a2)
+; RV32ZVE32F-NEXT: lw t2, 28(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB49_11
; RV32ZVE32F-NEXT: .LBB49_5:
-; RV32ZVE32F-NEXT: lw t3, 36(a2)
-; RV32ZVE32F-NEXT: lw t4, 32(a2)
+; RV32ZVE32F-NEXT: lw t3, 32(a2)
+; RV32ZVE32F-NEXT: lw t4, 36(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB49_12
; RV32ZVE32F-NEXT: .LBB49_6:
-; RV32ZVE32F-NEXT: lw t5, 44(a2)
-; RV32ZVE32F-NEXT: lw t6, 40(a2)
+; RV32ZVE32F-NEXT: lw t5, 40(a2)
+; RV32ZVE32F-NEXT: lw t6, 44(a2)
; RV32ZVE32F-NEXT: j .LBB49_13
; RV32ZVE32F-NEXT: .LBB49_7:
-; RV32ZVE32F-NEXT: lw a1, 4(a2)
-; RV32ZVE32F-NEXT: lw a3, 0(a2)
+; RV32ZVE32F-NEXT: lw a1, 0(a2)
+; RV32ZVE32F-NEXT: lw a3, 4(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB49_2
; RV32ZVE32F-NEXT: .LBB49_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
-; RV32ZVE32F-NEXT: lw a4, 4(a5)
-; RV32ZVE32F-NEXT: lw a5, 0(a5)
+; RV32ZVE32F-NEXT: lw a4, 0(a5)
+; RV32ZVE32F-NEXT: lw a5, 4(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB49_3
; RV32ZVE32F-NEXT: .LBB49_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
-; RV32ZVE32F-NEXT: lw a6, 4(a7)
-; RV32ZVE32F-NEXT: lw a7, 0(a7)
+; RV32ZVE32F-NEXT: lw a6, 0(a7)
+; RV32ZVE32F-NEXT: lw a7, 4(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB49_4
; RV32ZVE32F-NEXT: .LBB49_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
-; RV32ZVE32F-NEXT: lw t1, 4(t2)
-; RV32ZVE32F-NEXT: lw t2, 0(t2)
+; RV32ZVE32F-NEXT: lw t1, 0(t2)
+; RV32ZVE32F-NEXT: lw t2, 4(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB49_5
; RV32ZVE32F-NEXT: .LBB49_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
-; RV32ZVE32F-NEXT: lw t3, 4(t4)
-; RV32ZVE32F-NEXT: lw t4, 0(t4)
+; RV32ZVE32F-NEXT: lw t3, 0(t4)
+; RV32ZVE32F-NEXT: lw t4, 4(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB49_6
; RV32ZVE32F-NEXT: .LBB49_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
-; RV32ZVE32F-NEXT: lw t5, 4(t6)
-; RV32ZVE32F-NEXT: lw t6, 0(t6)
+; RV32ZVE32F-NEXT: lw t5, 0(t6)
+; RV32ZVE32F-NEXT: lw t6, 4(t6)
; RV32ZVE32F-NEXT: .LBB49_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
@@ -4487,42 +4487,42 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
-; RV32ZVE32F-NEXT: lw s0, 4(s1)
-; RV32ZVE32F-NEXT: lw s1, 0(s1)
+; RV32ZVE32F-NEXT: lw s0, 0(s1)
+; RV32ZVE32F-NEXT: lw s1, 4(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB49_17
; RV32ZVE32F-NEXT: .LBB49_15:
-; RV32ZVE32F-NEXT: lw t0, 60(a2)
-; RV32ZVE32F-NEXT: lw a2, 56(a2)
+; RV32ZVE32F-NEXT: lw t0, 56(a2)
+; RV32ZVE32F-NEXT: lw a2, 60(a2)
; RV32ZVE32F-NEXT: j .LBB49_18
; RV32ZVE32F-NEXT: .LBB49_16:
-; RV32ZVE32F-NEXT: lw s0, 52(a2)
-; RV32ZVE32F-NEXT: lw s1, 48(a2)
+; RV32ZVE32F-NEXT: lw s0, 48(a2)
+; RV32ZVE32F-NEXT: lw s1, 52(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB49_15
; RV32ZVE32F-NEXT: .LBB49_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
-; RV32ZVE32F-NEXT: lw t0, 4(a2)
-; RV32ZVE32F-NEXT: lw a2, 0(a2)
+; RV32ZVE32F-NEXT: lw t0, 0(a2)
+; RV32ZVE32F-NEXT: lw a2, 4(a2)
; RV32ZVE32F-NEXT: .LBB49_18: # %else20
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a1, 4(a0)
-; RV32ZVE32F-NEXT: sw a5, 8(a0)
-; RV32ZVE32F-NEXT: sw a4, 12(a0)
-; RV32ZVE32F-NEXT: sw a7, 16(a0)
-; RV32ZVE32F-NEXT: sw a6, 20(a0)
-; RV32ZVE32F-NEXT: sw t2, 24(a0)
-; RV32ZVE32F-NEXT: sw t1, 28(a0)
-; RV32ZVE32F-NEXT: sw t4, 32(a0)
-; RV32ZVE32F-NEXT: sw t3, 36(a0)
-; RV32ZVE32F-NEXT: sw t6, 40(a0)
-; RV32ZVE32F-NEXT: sw t5, 44(a0)
-; RV32ZVE32F-NEXT: sw s1, 48(a0)
-; RV32ZVE32F-NEXT: sw s0, 52(a0)
-; RV32ZVE32F-NEXT: sw a2, 56(a0)
-; RV32ZVE32F-NEXT: sw t0, 60(a0)
+; RV32ZVE32F-NEXT: sw a1, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 8(a0)
+; RV32ZVE32F-NEXT: sw a5, 12(a0)
+; RV32ZVE32F-NEXT: sw a6, 16(a0)
+; RV32ZVE32F-NEXT: sw a7, 20(a0)
+; RV32ZVE32F-NEXT: sw t1, 24(a0)
+; RV32ZVE32F-NEXT: sw t2, 28(a0)
+; RV32ZVE32F-NEXT: sw t3, 32(a0)
+; RV32ZVE32F-NEXT: sw t4, 36(a0)
+; RV32ZVE32F-NEXT: sw t5, 40(a0)
+; RV32ZVE32F-NEXT: sw t6, 44(a0)
+; RV32ZVE32F-NEXT: sw s0, 48(a0)
+; RV32ZVE32F-NEXT: sw s1, 52(a0)
+; RV32ZVE32F-NEXT: sw t0, 56(a0)
+; RV32ZVE32F-NEXT: sw a2, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
@@ -4679,77 +4679,77 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV32ZVE32F-NEXT: beqz a3, .LBB50_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
-; RV32ZVE32F-NEXT: lw a1, 4(a3)
-; RV32ZVE32F-NEXT: lw a3, 0(a3)
+; RV32ZVE32F-NEXT: lw a1, 0(a3)
+; RV32ZVE32F-NEXT: lw a3, 4(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB50_8
; RV32ZVE32F-NEXT: .LBB50_2:
-; RV32ZVE32F-NEXT: lw a4, 12(a2)
-; RV32ZVE32F-NEXT: lw a5, 8(a2)
+; RV32ZVE32F-NEXT: lw a4, 8(a2)
+; RV32ZVE32F-NEXT: lw a5, 12(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB50_9
; RV32ZVE32F-NEXT: .LBB50_3:
-; RV32ZVE32F-NEXT: lw a6, 20(a2)
-; RV32ZVE32F-NEXT: lw a7, 16(a2)
+; RV32ZVE32F-NEXT: lw a6, 16(a2)
+; RV32ZVE32F-NEXT: lw a7, 20(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB50_10
; RV32ZVE32F-NEXT: .LBB50_4:
-; RV32ZVE32F-NEXT: lw t1, 28(a2)
-; RV32ZVE32F-NEXT: lw t2, 24(a2)
+; RV32ZVE32F-NEXT: lw t1, 24(a2)
+; RV32ZVE32F-NEXT: lw t2, 28(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB50_11
; RV32ZVE32F-NEXT: .LBB50_5:
-; RV32ZVE32F-NEXT: lw t3, 36(a2)
-; RV32ZVE32F-NEXT: lw t4, 32(a2)
+; RV32ZVE32F-NEXT: lw t3, 32(a2)
+; RV32ZVE32F-NEXT: lw t4, 36(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB50_12
; RV32ZVE32F-NEXT: .LBB50_6:
-; RV32ZVE32F-NEXT: lw t5, 44(a2)
-; RV32ZVE32F-NEXT: lw t6, 40(a2)
+; RV32ZVE32F-NEXT: lw t5, 40(a2)
+; RV32ZVE32F-NEXT: lw t6, 44(a2)
; RV32ZVE32F-NEXT: j .LBB50_13
; RV32ZVE32F-NEXT: .LBB50_7:
-; RV32ZVE32F-NEXT: lw a1, 4(a2)
-; RV32ZVE32F-NEXT: lw a3, 0(a2)
+; RV32ZVE32F-NEXT: lw a1, 0(a2)
+; RV32ZVE32F-NEXT: lw a3, 4(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB50_2
; RV32ZVE32F-NEXT: .LBB50_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
-; RV32ZVE32F-NEXT: lw a4, 4(a5)
-; RV32ZVE32F-NEXT: lw a5, 0(a5)
+; RV32ZVE32F-NEXT: lw a4, 0(a5)
+; RV32ZVE32F-NEXT: lw a5, 4(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB50_3
; RV32ZVE32F-NEXT: .LBB50_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
-; RV32ZVE32F-NEXT: lw a6, 4(a7)
-; RV32ZVE32F-NEXT: lw a7, 0(a7)
+; RV32ZVE32F-NEXT: lw a6, 0(a7)
+; RV32ZVE32F-NEXT: lw a7, 4(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB50_4
; RV32ZVE32F-NEXT: .LBB50_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
-; RV32ZVE32F-NEXT: lw t1, 4(t2)
-; RV32ZVE32F-NEXT: lw t2, 0(t2)
+; RV32ZVE32F-NEXT: lw t1, 0(t2)
+; RV32ZVE32F-NEXT: lw t2, 4(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB50_5
; RV32ZVE32F-NEXT: .LBB50_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
-; RV32ZVE32F-NEXT: lw t3, 4(t4)
-; RV32ZVE32F-NEXT: lw t4, 0(t4)
+; RV32ZVE32F-NEXT: lw t3, 0(t4)
+; RV32ZVE32F-NEXT: lw t4, 4(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB50_6
; RV32ZVE32F-NEXT: .LBB50_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
-; RV32ZVE32F-NEXT: lw t5, 4(t6)
-; RV32ZVE32F-NEXT: lw t6, 0(t6)
+; RV32ZVE32F-NEXT: lw t5, 0(t6)
+; RV32ZVE32F-NEXT: lw t6, 4(t6)
; RV32ZVE32F-NEXT: .LBB50_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
@@ -4763,42 +4763,42 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
-; RV32ZVE32F-NEXT: lw s0, 4(s1)
-; RV32ZVE32F-NEXT: lw s1, 0(s1)
+; RV32ZVE32F-NEXT: lw s0, 0(s1)
+; RV32ZVE32F-NEXT: lw s1, 4(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB50_17
; RV32ZVE32F-NEXT: .LBB50_15:
-; RV32ZVE32F-NEXT: lw t0, 60(a2)
-; RV32ZVE32F-NEXT: lw a2, 56(a2)
+; RV32ZVE32F-NEXT: lw t0, 56(a2)
+; RV32ZVE32F-NEXT: lw a2, 60(a2)
; RV32ZVE32F-NEXT: j .LBB50_18
; RV32ZVE32F-NEXT: .LBB50_16:
-; RV32ZVE32F-NEXT: lw s0, 52(a2)
-; RV32ZVE32F-NEXT: lw s1, 48(a2)
+; RV32ZVE32F-NEXT: lw s0, 48(a2)
+; RV32ZVE32F-NEXT: lw s1, 52(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB50_15
; RV32ZVE32F-NEXT: .LBB50_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
-; RV32ZVE32F-NEXT: lw t0, 4(a2)
-; RV32ZVE32F-NEXT: lw a2, 0(a2)
+; RV32ZVE32F-NEXT: lw t0, 0(a2)
+; RV32ZVE32F-NEXT: lw a2, 4(a2)
; RV32ZVE32F-NEXT: .LBB50_18: # %else20
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a1, 4(a0)
-; RV32ZVE32F-NEXT: sw a5, 8(a0)
-; RV32ZVE32F-NEXT: sw a4, 12(a0)
-; RV32ZVE32F-NEXT: sw a7, 16(a0)
-; RV32ZVE32F-NEXT: sw a6, 20(a0)
-; RV32ZVE32F-NEXT: sw t2, 24(a0)
-; RV32ZVE32F-NEXT: sw t1, 28(a0)
-; RV32ZVE32F-NEXT: sw t4, 32(a0)
-; RV32ZVE32F-NEXT: sw t3, 36(a0)
-; RV32ZVE32F-NEXT: sw t6, 40(a0)
-; RV32ZVE32F-NEXT: sw t5, 44(a0)
-; RV32ZVE32F-NEXT: sw s1, 48(a0)
-; RV32ZVE32F-NEXT: sw s0, 52(a0)
-; RV32ZVE32F-NEXT: sw a2, 56(a0)
-; RV32ZVE32F-NEXT: sw t0, 60(a0)
+; RV32ZVE32F-NEXT: sw a1, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 8(a0)
+; RV32ZVE32F-NEXT: sw a5, 12(a0)
+; RV32ZVE32F-NEXT: sw a6, 16(a0)
+; RV32ZVE32F-NEXT: sw a7, 20(a0)
+; RV32ZVE32F-NEXT: sw t1, 24(a0)
+; RV32ZVE32F-NEXT: sw t2, 28(a0)
+; RV32ZVE32F-NEXT: sw t3, 32(a0)
+; RV32ZVE32F-NEXT: sw t4, 36(a0)
+; RV32ZVE32F-NEXT: sw t5, 40(a0)
+; RV32ZVE32F-NEXT: sw t6, 44(a0)
+; RV32ZVE32F-NEXT: sw s0, 48(a0)
+; RV32ZVE32F-NEXT: sw s1, 52(a0)
+; RV32ZVE32F-NEXT: sw t0, 56(a0)
+; RV32ZVE32F-NEXT: sw a2, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
@@ -4962,77 +4962,77 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i
; RV32ZVE32F-NEXT: beqz a3, .LBB51_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
-; RV32ZVE32F-NEXT: lw a1, 4(a3)
-; RV32ZVE32F-NEXT: lw a3, 0(a3)
+; RV32ZVE32F-NEXT: lw a1, 0(a3)
+; RV32ZVE32F-NEXT: lw a3, 4(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB51_8
; RV32ZVE32F-NEXT: .LBB51_2:
-; RV32ZVE32F-NEXT: lw a4, 12(a2)
-; RV32ZVE32F-NEXT: lw a5, 8(a2)
+; RV32ZVE32F-NEXT: lw a4, 8(a2)
+; RV32ZVE32F-NEXT: lw a5, 12(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB51_9
; RV32ZVE32F-NEXT: .LBB51_3:
-; RV32ZVE32F-NEXT: lw a6, 20(a2)
-; RV32ZVE32F-NEXT: lw a7, 16(a2)
+; RV32ZVE32F-NEXT: lw a6, 16(a2)
+; RV32ZVE32F-NEXT: lw a7, 20(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB51_10
; RV32ZVE32F-NEXT: .LBB51_4:
-; RV32ZVE32F-NEXT: lw t1, 28(a2)
-; RV32ZVE32F-NEXT: lw t2, 24(a2)
+; RV32ZVE32F-NEXT: lw t1, 24(a2)
+; RV32ZVE32F-NEXT: lw t2, 28(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB51_11
; RV32ZVE32F-NEXT: .LBB51_5:
-; RV32ZVE32F-NEXT: lw t3, 36(a2)
-; RV32ZVE32F-NEXT: lw t4, 32(a2)
+; RV32ZVE32F-NEXT: lw t3, 32(a2)
+; RV32ZVE32F-NEXT: lw t4, 36(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB51_12
; RV32ZVE32F-NEXT: .LBB51_6:
-; RV32ZVE32F-NEXT: lw t5, 44(a2)
-; RV32ZVE32F-NEXT: lw t6, 40(a2)
+; RV32ZVE32F-NEXT: lw t5, 40(a2)
+; RV32ZVE32F-NEXT: lw t6, 44(a2)
; RV32ZVE32F-NEXT: j .LBB51_13
; RV32ZVE32F-NEXT: .LBB51_7:
-; RV32ZVE32F-NEXT: lw a1, 4(a2)
-; RV32ZVE32F-NEXT: lw a3, 0(a2)
+; RV32ZVE32F-NEXT: lw a1, 0(a2)
+; RV32ZVE32F-NEXT: lw a3, 4(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB51_2
; RV32ZVE32F-NEXT: .LBB51_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
-; RV32ZVE32F-NEXT: lw a4, 4(a5)
-; RV32ZVE32F-NEXT: lw a5, 0(a5)
+; RV32ZVE32F-NEXT: lw a4, 0(a5)
+; RV32ZVE32F-NEXT: lw a5, 4(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB51_3
; RV32ZVE32F-NEXT: .LBB51_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
-; RV32ZVE32F-NEXT: lw a6, 4(a7)
-; RV32ZVE32F-NEXT: lw a7, 0(a7)
+; RV32ZVE32F-NEXT: lw a6, 0(a7)
+; RV32ZVE32F-NEXT: lw a7, 4(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB51_4
; RV32ZVE32F-NEXT: .LBB51_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
-; RV32ZVE32F-NEXT: lw t1, 4(t2)
-; RV32ZVE32F-NEXT: lw t2, 0(t2)
+; RV32ZVE32F-NEXT: lw t1, 0(t2)
+; RV32ZVE32F-NEXT: lw t2, 4(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB51_5
; RV32ZVE32F-NEXT: .LBB51_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
-; RV32ZVE32F-NEXT: lw t3, 4(t4)
-; RV32ZVE32F-NEXT: lw t4, 0(t4)
+; RV32ZVE32F-NEXT: lw t3, 0(t4)
+; RV32ZVE32F-NEXT: lw t4, 4(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB51_6
; RV32ZVE32F-NEXT: .LBB51_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
-; RV32ZVE32F-NEXT: lw t5, 4(t6)
-; RV32ZVE32F-NEXT: lw t6, 0(t6)
+; RV32ZVE32F-NEXT: lw t5, 0(t6)
+; RV32ZVE32F-NEXT: lw t6, 4(t6)
; RV32ZVE32F-NEXT: .LBB51_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
@@ -5046,42 +5046,42 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
-; RV32ZVE32F-NEXT: lw s0, 4(s1)
-; RV32ZVE32F-NEXT: lw s1, 0(s1)
+; RV32ZVE32F-NEXT: lw s0, 0(s1)
+; RV32ZVE32F-NEXT: lw s1, 4(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB51_17
; RV32ZVE32F-NEXT: .LBB51_15:
-; RV32ZVE32F-NEXT: lw t0, 60(a2)
-; RV32ZVE32F-NEXT: lw a2, 56(a2)
+; RV32ZVE32F-NEXT: lw t0, 56(a2)
+; RV32ZVE32F-NEXT: lw a2, 60(a2)
; RV32ZVE32F-NEXT: j .LBB51_18
; RV32ZVE32F-NEXT: .LBB51_16:
-; RV32ZVE32F-NEXT: lw s0, 52(a2)
-; RV32ZVE32F-NEXT: lw s1, 48(a2)
+; RV32ZVE32F-NEXT: lw s0, 48(a2)
+; RV32ZVE32F-NEXT: lw s1, 52(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB51_15
; RV32ZVE32F-NEXT: .LBB51_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
-; RV32ZVE32F-NEXT: lw t0, 4(a2)
-; RV32ZVE32F-NEXT: lw a2, 0(a2)
+; RV32ZVE32F-NEXT: lw t0, 0(a2)
+; RV32ZVE32F-NEXT: lw a2, 4(a2)
; RV32ZVE32F-NEXT: .LBB51_18: # %else20
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a1, 4(a0)
-; RV32ZVE32F-NEXT: sw a5, 8(a0)
-; RV32ZVE32F-NEXT: sw a4, 12(a0)
-; RV32ZVE32F-NEXT: sw a7, 16(a0)
-; RV32ZVE32F-NEXT: sw a6, 20(a0)
-; RV32ZVE32F-NEXT: sw t2, 24(a0)
-; RV32ZVE32F-NEXT: sw t1, 28(a0)
-; RV32ZVE32F-NEXT: sw t4, 32(a0)
-; RV32ZVE32F-NEXT: sw t3, 36(a0)
-; RV32ZVE32F-NEXT: sw t6, 40(a0)
-; RV32ZVE32F-NEXT: sw t5, 44(a0)
-; RV32ZVE32F-NEXT: sw s1, 48(a0)
-; RV32ZVE32F-NEXT: sw s0, 52(a0)
-; RV32ZVE32F-NEXT: sw a2, 56(a0)
-; RV32ZVE32F-NEXT: sw t0, 60(a0)
+; RV32ZVE32F-NEXT: sw a1, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 8(a0)
+; RV32ZVE32F-NEXT: sw a5, 12(a0)
+; RV32ZVE32F-NEXT: sw a6, 16(a0)
+; RV32ZVE32F-NEXT: sw a7, 20(a0)
+; RV32ZVE32F-NEXT: sw t1, 24(a0)
+; RV32ZVE32F-NEXT: sw t2, 28(a0)
+; RV32ZVE32F-NEXT: sw t3, 32(a0)
+; RV32ZVE32F-NEXT: sw t4, 36(a0)
+; RV32ZVE32F-NEXT: sw t5, 40(a0)
+; RV32ZVE32F-NEXT: sw t6, 44(a0)
+; RV32ZVE32F-NEXT: sw s0, 48(a0)
+; RV32ZVE32F-NEXT: sw s1, 52(a0)
+; RV32ZVE32F-NEXT: sw t0, 56(a0)
+; RV32ZVE32F-NEXT: sw a2, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
@@ -5237,77 +5237,77 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV32ZVE32F-NEXT: beqz a3, .LBB52_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
-; RV32ZVE32F-NEXT: lw a1, 4(a3)
-; RV32ZVE32F-NEXT: lw a3, 0(a3)
+; RV32ZVE32F-NEXT: lw a1, 0(a3)
+; RV32ZVE32F-NEXT: lw a3, 4(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB52_8
; RV32ZVE32F-NEXT: .LBB52_2:
-; RV32ZVE32F-NEXT: lw a4, 12(a2)
-; RV32ZVE32F-NEXT: lw a5, 8(a2)
+; RV32ZVE32F-NEXT: lw a4, 8(a2)
+; RV32ZVE32F-NEXT: lw a5, 12(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB52_9
; RV32ZVE32F-NEXT: .LBB52_3:
-; RV32ZVE32F-NEXT: lw a6, 20(a2)
-; RV32ZVE32F-NEXT: lw a7, 16(a2)
+; RV32ZVE32F-NEXT: lw a6, 16(a2)
+; RV32ZVE32F-NEXT: lw a7, 20(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB52_10
; RV32ZVE32F-NEXT: .LBB52_4:
-; RV32ZVE32F-NEXT: lw t1, 28(a2)
-; RV32ZVE32F-NEXT: lw t2, 24(a2)
+; RV32ZVE32F-NEXT: lw t1, 24(a2)
+; RV32ZVE32F-NEXT: lw t2, 28(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB52_11
; RV32ZVE32F-NEXT: .LBB52_5:
-; RV32ZVE32F-NEXT: lw t3, 36(a2)
-; RV32ZVE32F-NEXT: lw t4, 32(a2)
+; RV32ZVE32F-NEXT: lw t3, 32(a2)
+; RV32ZVE32F-NEXT: lw t4, 36(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB52_12
; RV32ZVE32F-NEXT: .LBB52_6:
-; RV32ZVE32F-NEXT: lw t5, 44(a2)
-; RV32ZVE32F-NEXT: lw t6, 40(a2)
+; RV32ZVE32F-NEXT: lw t5, 40(a2)
+; RV32ZVE32F-NEXT: lw t6, 44(a2)
; RV32ZVE32F-NEXT: j .LBB52_13
; RV32ZVE32F-NEXT: .LBB52_7:
-; RV32ZVE32F-NEXT: lw a1, 4(a2)
-; RV32ZVE32F-NEXT: lw a3, 0(a2)
+; RV32ZVE32F-NEXT: lw a1, 0(a2)
+; RV32ZVE32F-NEXT: lw a3, 4(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB52_2
; RV32ZVE32F-NEXT: .LBB52_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
-; RV32ZVE32F-NEXT: lw a4, 4(a5)
-; RV32ZVE32F-NEXT: lw a5, 0(a5)
+; RV32ZVE32F-NEXT: lw a4, 0(a5)
+; RV32ZVE32F-NEXT: lw a5, 4(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB52_3
; RV32ZVE32F-NEXT: .LBB52_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
-; RV32ZVE32F-NEXT: lw a6, 4(a7)
-; RV32ZVE32F-NEXT: lw a7, 0(a7)
+; RV32ZVE32F-NEXT: lw a6, 0(a7)
+; RV32ZVE32F-NEXT: lw a7, 4(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB52_4
; RV32ZVE32F-NEXT: .LBB52_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
-; RV32ZVE32F-NEXT: lw t1, 4(t2)
-; RV32ZVE32F-NEXT: lw t2, 0(t2)
+; RV32ZVE32F-NEXT: lw t1, 0(t2)
+; RV32ZVE32F-NEXT: lw t2, 4(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB52_5
; RV32ZVE32F-NEXT: .LBB52_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
-; RV32ZVE32F-NEXT: lw t3, 4(t4)
-; RV32ZVE32F-NEXT: lw t4, 0(t4)
+; RV32ZVE32F-NEXT: lw t3, 0(t4)
+; RV32ZVE32F-NEXT: lw t4, 4(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB52_6
; RV32ZVE32F-NEXT: .LBB52_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
-; RV32ZVE32F-NEXT: lw t5, 4(t6)
-; RV32ZVE32F-NEXT: lw t6, 0(t6)
+; RV32ZVE32F-NEXT: lw t5, 0(t6)
+; RV32ZVE32F-NEXT: lw t6, 4(t6)
; RV32ZVE32F-NEXT: .LBB52_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
@@ -5321,42 +5321,42 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
-; RV32ZVE32F-NEXT: lw s0, 4(s1)
-; RV32ZVE32F-NEXT: lw s1, 0(s1)
+; RV32ZVE32F-NEXT: lw s0, 0(s1)
+; RV32ZVE32F-NEXT: lw s1, 4(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB52_17
; RV32ZVE32F-NEXT: .LBB52_15:
-; RV32ZVE32F-NEXT: lw t0, 60(a2)
-; RV32ZVE32F-NEXT: lw a2, 56(a2)
+; RV32ZVE32F-NEXT: lw t0, 56(a2)
+; RV32ZVE32F-NEXT: lw a2, 60(a2)
; RV32ZVE32F-NEXT: j .LBB52_18
; RV32ZVE32F-NEXT: .LBB52_16:
-; RV32ZVE32F-NEXT: lw s0, 52(a2)
-; RV32ZVE32F-NEXT: lw s1, 48(a2)
+; RV32ZVE32F-NEXT: lw s0, 48(a2)
+; RV32ZVE32F-NEXT: lw s1, 52(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB52_15
; RV32ZVE32F-NEXT: .LBB52_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
-; RV32ZVE32F-NEXT: lw t0, 4(a2)
-; RV32ZVE32F-NEXT: lw a2, 0(a2)
+; RV32ZVE32F-NEXT: lw t0, 0(a2)
+; RV32ZVE32F-NEXT: lw a2, 4(a2)
; RV32ZVE32F-NEXT: .LBB52_18: # %else20
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a1, 4(a0)
-; RV32ZVE32F-NEXT: sw a5, 8(a0)
-; RV32ZVE32F-NEXT: sw a4, 12(a0)
-; RV32ZVE32F-NEXT: sw a7, 16(a0)
-; RV32ZVE32F-NEXT: sw a6, 20(a0)
-; RV32ZVE32F-NEXT: sw t2, 24(a0)
-; RV32ZVE32F-NEXT: sw t1, 28(a0)
-; RV32ZVE32F-NEXT: sw t4, 32(a0)
-; RV32ZVE32F-NEXT: sw t3, 36(a0)
-; RV32ZVE32F-NEXT: sw t6, 40(a0)
-; RV32ZVE32F-NEXT: sw t5, 44(a0)
-; RV32ZVE32F-NEXT: sw s1, 48(a0)
-; RV32ZVE32F-NEXT: sw s0, 52(a0)
-; RV32ZVE32F-NEXT: sw a2, 56(a0)
-; RV32ZVE32F-NEXT: sw t0, 60(a0)
+; RV32ZVE32F-NEXT: sw a1, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 8(a0)
+; RV32ZVE32F-NEXT: sw a5, 12(a0)
+; RV32ZVE32F-NEXT: sw a6, 16(a0)
+; RV32ZVE32F-NEXT: sw a7, 20(a0)
+; RV32ZVE32F-NEXT: sw t1, 24(a0)
+; RV32ZVE32F-NEXT: sw t2, 28(a0)
+; RV32ZVE32F-NEXT: sw t3, 32(a0)
+; RV32ZVE32F-NEXT: sw t4, 36(a0)
+; RV32ZVE32F-NEXT: sw t5, 40(a0)
+; RV32ZVE32F-NEXT: sw t6, 44(a0)
+; RV32ZVE32F-NEXT: sw s0, 48(a0)
+; RV32ZVE32F-NEXT: sw s1, 52(a0)
+; RV32ZVE32F-NEXT: sw t0, 56(a0)
+; RV32ZVE32F-NEXT: sw a2, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
@@ -5514,77 +5514,77 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV32ZVE32F-NEXT: beqz a3, .LBB53_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
-; RV32ZVE32F-NEXT: lw a1, 4(a3)
-; RV32ZVE32F-NEXT: lw a3, 0(a3)
+; RV32ZVE32F-NEXT: lw a1, 0(a3)
+; RV32ZVE32F-NEXT: lw a3, 4(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB53_8
; RV32ZVE32F-NEXT: .LBB53_2:
-; RV32ZVE32F-NEXT: lw a4, 12(a2)
-; RV32ZVE32F-NEXT: lw a5, 8(a2)
+; RV32ZVE32F-NEXT: lw a4, 8(a2)
+; RV32ZVE32F-NEXT: lw a5, 12(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB53_9
; RV32ZVE32F-NEXT: .LBB53_3:
-; RV32ZVE32F-NEXT: lw a6, 20(a2)
-; RV32ZVE32F-NEXT: lw a7, 16(a2)
+; RV32ZVE32F-NEXT: lw a6, 16(a2)
+; RV32ZVE32F-NEXT: lw a7, 20(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB53_10
; RV32ZVE32F-NEXT: .LBB53_4:
-; RV32ZVE32F-NEXT: lw t1, 28(a2)
-; RV32ZVE32F-NEXT: lw t2, 24(a2)
+; RV32ZVE32F-NEXT: lw t1, 24(a2)
+; RV32ZVE32F-NEXT: lw t2, 28(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB53_11
; RV32ZVE32F-NEXT: .LBB53_5:
-; RV32ZVE32F-NEXT: lw t3, 36(a2)
-; RV32ZVE32F-NEXT: lw t4, 32(a2)
+; RV32ZVE32F-NEXT: lw t3, 32(a2)
+; RV32ZVE32F-NEXT: lw t4, 36(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB53_12
; RV32ZVE32F-NEXT: .LBB53_6:
-; RV32ZVE32F-NEXT: lw t5, 44(a2)
-; RV32ZVE32F-NEXT: lw t6, 40(a2)
+; RV32ZVE32F-NEXT: lw t5, 40(a2)
+; RV32ZVE32F-NEXT: lw t6, 44(a2)
; RV32ZVE32F-NEXT: j .LBB53_13
; RV32ZVE32F-NEXT: .LBB53_7:
-; RV32ZVE32F-NEXT: lw a1, 4(a2)
-; RV32ZVE32F-NEXT: lw a3, 0(a2)
+; RV32ZVE32F-NEXT: lw a1, 0(a2)
+; RV32ZVE32F-NEXT: lw a3, 4(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB53_2
; RV32ZVE32F-NEXT: .LBB53_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
-; RV32ZVE32F-NEXT: lw a4, 4(a5)
-; RV32ZVE32F-NEXT: lw a5, 0(a5)
+; RV32ZVE32F-NEXT: lw a4, 0(a5)
+; RV32ZVE32F-NEXT: lw a5, 4(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB53_3
; RV32ZVE32F-NEXT: .LBB53_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
-; RV32ZVE32F-NEXT: lw a6, 4(a7)
-; RV32ZVE32F-NEXT: lw a7, 0(a7)
+; RV32ZVE32F-NEXT: lw a6, 0(a7)
+; RV32ZVE32F-NEXT: lw a7, 4(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB53_4
; RV32ZVE32F-NEXT: .LBB53_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
-; RV32ZVE32F-NEXT: lw t1, 4(t2)
-; RV32ZVE32F-NEXT: lw t2, 0(t2)
+; RV32ZVE32F-NEXT: lw t1, 0(t2)
+; RV32ZVE32F-NEXT: lw t2, 4(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB53_5
; RV32ZVE32F-NEXT: .LBB53_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
-; RV32ZVE32F-NEXT: lw t3, 4(t4)
-; RV32ZVE32F-NEXT: lw t4, 0(t4)
+; RV32ZVE32F-NEXT: lw t3, 0(t4)
+; RV32ZVE32F-NEXT: lw t4, 4(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB53_6
; RV32ZVE32F-NEXT: .LBB53_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
-; RV32ZVE32F-NEXT: lw t5, 4(t6)
-; RV32ZVE32F-NEXT: lw t6, 0(t6)
+; RV32ZVE32F-NEXT: lw t5, 0(t6)
+; RV32ZVE32F-NEXT: lw t6, 4(t6)
; RV32ZVE32F-NEXT: .LBB53_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
@@ -5598,42 +5598,42 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
-; RV32ZVE32F-NEXT: lw s0, 4(s1)
-; RV32ZVE32F-NEXT: lw s1, 0(s1)
+; RV32ZVE32F-NEXT: lw s0, 0(s1)
+; RV32ZVE32F-NEXT: lw s1, 4(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB53_17
; RV32ZVE32F-NEXT: .LBB53_15:
-; RV32ZVE32F-NEXT: lw t0, 60(a2)
-; RV32ZVE32F-NEXT: lw a2, 56(a2)
+; RV32ZVE32F-NEXT: lw t0, 56(a2)
+; RV32ZVE32F-NEXT: lw a2, 60(a2)
; RV32ZVE32F-NEXT: j .LBB53_18
; RV32ZVE32F-NEXT: .LBB53_16:
-; RV32ZVE32F-NEXT: lw s0, 52(a2)
-; RV32ZVE32F-NEXT: lw s1, 48(a2)
+; RV32ZVE32F-NEXT: lw s0, 48(a2)
+; RV32ZVE32F-NEXT: lw s1, 52(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB53_15
; RV32ZVE32F-NEXT: .LBB53_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
-; RV32ZVE32F-NEXT: lw t0, 4(a2)
-; RV32ZVE32F-NEXT: lw a2, 0(a2)
+; RV32ZVE32F-NEXT: lw t0, 0(a2)
+; RV32ZVE32F-NEXT: lw a2, 4(a2)
; RV32ZVE32F-NEXT: .LBB53_18: # %else20
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a1, 4(a0)
-; RV32ZVE32F-NEXT: sw a5, 8(a0)
-; RV32ZVE32F-NEXT: sw a4, 12(a0)
-; RV32ZVE32F-NEXT: sw a7, 16(a0)
-; RV32ZVE32F-NEXT: sw a6, 20(a0)
-; RV32ZVE32F-NEXT: sw t2, 24(a0)
-; RV32ZVE32F-NEXT: sw t1, 28(a0)
-; RV32ZVE32F-NEXT: sw t4, 32(a0)
-; RV32ZVE32F-NEXT: sw t3, 36(a0)
-; RV32ZVE32F-NEXT: sw t6, 40(a0)
-; RV32ZVE32F-NEXT: sw t5, 44(a0)
-; RV32ZVE32F-NEXT: sw s1, 48(a0)
-; RV32ZVE32F-NEXT: sw s0, 52(a0)
-; RV32ZVE32F-NEXT: sw a2, 56(a0)
-; RV32ZVE32F-NEXT: sw t0, 60(a0)
+; RV32ZVE32F-NEXT: sw a1, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 8(a0)
+; RV32ZVE32F-NEXT: sw a5, 12(a0)
+; RV32ZVE32F-NEXT: sw a6, 16(a0)
+; RV32ZVE32F-NEXT: sw a7, 20(a0)
+; RV32ZVE32F-NEXT: sw t1, 24(a0)
+; RV32ZVE32F-NEXT: sw t2, 28(a0)
+; RV32ZVE32F-NEXT: sw t3, 32(a0)
+; RV32ZVE32F-NEXT: sw t4, 36(a0)
+; RV32ZVE32F-NEXT: sw t5, 40(a0)
+; RV32ZVE32F-NEXT: sw t6, 44(a0)
+; RV32ZVE32F-NEXT: sw s0, 48(a0)
+; RV32ZVE32F-NEXT: sw s1, 52(a0)
+; RV32ZVE32F-NEXT: sw t0, 56(a0)
+; RV32ZVE32F-NEXT: sw a2, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
@@ -5798,77 +5798,77 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i
; RV32ZVE32F-NEXT: beqz a3, .LBB54_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
-; RV32ZVE32F-NEXT: lw a1, 4(a3)
-; RV32ZVE32F-NEXT: lw a3, 0(a3)
+; RV32ZVE32F-NEXT: lw a1, 0(a3)
+; RV32ZVE32F-NEXT: lw a3, 4(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB54_8
; RV32ZVE32F-NEXT: .LBB54_2:
-; RV32ZVE32F-NEXT: lw a4, 12(a2)
-; RV32ZVE32F-NEXT: lw a5, 8(a2)
+; RV32ZVE32F-NEXT: lw a4, 8(a2)
+; RV32ZVE32F-NEXT: lw a5, 12(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB54_9
; RV32ZVE32F-NEXT: .LBB54_3:
-; RV32ZVE32F-NEXT: lw a6, 20(a2)
-; RV32ZVE32F-NEXT: lw a7, 16(a2)
+; RV32ZVE32F-NEXT: lw a6, 16(a2)
+; RV32ZVE32F-NEXT: lw a7, 20(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB54_10
; RV32ZVE32F-NEXT: .LBB54_4:
-; RV32ZVE32F-NEXT: lw t1, 28(a2)
-; RV32ZVE32F-NEXT: lw t2, 24(a2)
+; RV32ZVE32F-NEXT: lw t1, 24(a2)
+; RV32ZVE32F-NEXT: lw t2, 28(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB54_11
; RV32ZVE32F-NEXT: .LBB54_5:
-; RV32ZVE32F-NEXT: lw t3, 36(a2)
-; RV32ZVE32F-NEXT: lw t4, 32(a2)
+; RV32ZVE32F-NEXT: lw t3, 32(a2)
+; RV32ZVE32F-NEXT: lw t4, 36(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB54_12
; RV32ZVE32F-NEXT: .LBB54_6:
-; RV32ZVE32F-NEXT: lw t5, 44(a2)
-; RV32ZVE32F-NEXT: lw t6, 40(a2)
+; RV32ZVE32F-NEXT: lw t5, 40(a2)
+; RV32ZVE32F-NEXT: lw t6, 44(a2)
; RV32ZVE32F-NEXT: j .LBB54_13
; RV32ZVE32F-NEXT: .LBB54_7:
-; RV32ZVE32F-NEXT: lw a1, 4(a2)
-; RV32ZVE32F-NEXT: lw a3, 0(a2)
+; RV32ZVE32F-NEXT: lw a1, 0(a2)
+; RV32ZVE32F-NEXT: lw a3, 4(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB54_2
; RV32ZVE32F-NEXT: .LBB54_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
-; RV32ZVE32F-NEXT: lw a4, 4(a5)
-; RV32ZVE32F-NEXT: lw a5, 0(a5)
+; RV32ZVE32F-NEXT: lw a4, 0(a5)
+; RV32ZVE32F-NEXT: lw a5, 4(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB54_3
; RV32ZVE32F-NEXT: .LBB54_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
-; RV32ZVE32F-NEXT: lw a6, 4(a7)
-; RV32ZVE32F-NEXT: lw a7, 0(a7)
+; RV32ZVE32F-NEXT: lw a6, 0(a7)
+; RV32ZVE32F-NEXT: lw a7, 4(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB54_4
; RV32ZVE32F-NEXT: .LBB54_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
-; RV32ZVE32F-NEXT: lw t1, 4(t2)
-; RV32ZVE32F-NEXT: lw t2, 0(t2)
+; RV32ZVE32F-NEXT: lw t1, 0(t2)
+; RV32ZVE32F-NEXT: lw t2, 4(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB54_5
; RV32ZVE32F-NEXT: .LBB54_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
-; RV32ZVE32F-NEXT: lw t3, 4(t4)
-; RV32ZVE32F-NEXT: lw t4, 0(t4)
+; RV32ZVE32F-NEXT: lw t3, 0(t4)
+; RV32ZVE32F-NEXT: lw t4, 4(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB54_6
; RV32ZVE32F-NEXT: .LBB54_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
-; RV32ZVE32F-NEXT: lw t5, 4(t6)
-; RV32ZVE32F-NEXT: lw t6, 0(t6)
+; RV32ZVE32F-NEXT: lw t5, 0(t6)
+; RV32ZVE32F-NEXT: lw t6, 4(t6)
; RV32ZVE32F-NEXT: .LBB54_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
@@ -5882,42 +5882,42 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
-; RV32ZVE32F-NEXT: lw s0, 4(s1)
-; RV32ZVE32F-NEXT: lw s1, 0(s1)
+; RV32ZVE32F-NEXT: lw s0, 0(s1)
+; RV32ZVE32F-NEXT: lw s1, 4(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB54_17
; RV32ZVE32F-NEXT: .LBB54_15:
-; RV32ZVE32F-NEXT: lw t0, 60(a2)
-; RV32ZVE32F-NEXT: lw a2, 56(a2)
+; RV32ZVE32F-NEXT: lw t0, 56(a2)
+; RV32ZVE32F-NEXT: lw a2, 60(a2)
; RV32ZVE32F-NEXT: j .LBB54_18
; RV32ZVE32F-NEXT: .LBB54_16:
-; RV32ZVE32F-NEXT: lw s0, 52(a2)
-; RV32ZVE32F-NEXT: lw s1, 48(a2)
+; RV32ZVE32F-NEXT: lw s0, 48(a2)
+; RV32ZVE32F-NEXT: lw s1, 52(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB54_15
; RV32ZVE32F-NEXT: .LBB54_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
-; RV32ZVE32F-NEXT: lw t0, 4(a2)
-; RV32ZVE32F-NEXT: lw a2, 0(a2)
+; RV32ZVE32F-NEXT: lw t0, 0(a2)
+; RV32ZVE32F-NEXT: lw a2, 4(a2)
; RV32ZVE32F-NEXT: .LBB54_18: # %else20
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a1, 4(a0)
-; RV32ZVE32F-NEXT: sw a5, 8(a0)
-; RV32ZVE32F-NEXT: sw a4, 12(a0)
-; RV32ZVE32F-NEXT: sw a7, 16(a0)
-; RV32ZVE32F-NEXT: sw a6, 20(a0)
-; RV32ZVE32F-NEXT: sw t2, 24(a0)
-; RV32ZVE32F-NEXT: sw t1, 28(a0)
-; RV32ZVE32F-NEXT: sw t4, 32(a0)
-; RV32ZVE32F-NEXT: sw t3, 36(a0)
-; RV32ZVE32F-NEXT: sw t6, 40(a0)
-; RV32ZVE32F-NEXT: sw t5, 44(a0)
-; RV32ZVE32F-NEXT: sw s1, 48(a0)
-; RV32ZVE32F-NEXT: sw s0, 52(a0)
-; RV32ZVE32F-NEXT: sw a2, 56(a0)
-; RV32ZVE32F-NEXT: sw t0, 60(a0)
+; RV32ZVE32F-NEXT: sw a1, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 8(a0)
+; RV32ZVE32F-NEXT: sw a5, 12(a0)
+; RV32ZVE32F-NEXT: sw a6, 16(a0)
+; RV32ZVE32F-NEXT: sw a7, 20(a0)
+; RV32ZVE32F-NEXT: sw t1, 24(a0)
+; RV32ZVE32F-NEXT: sw t2, 28(a0)
+; RV32ZVE32F-NEXT: sw t3, 32(a0)
+; RV32ZVE32F-NEXT: sw t4, 36(a0)
+; RV32ZVE32F-NEXT: sw t5, 40(a0)
+; RV32ZVE32F-NEXT: sw t6, 44(a0)
+; RV32ZVE32F-NEXT: sw s0, 48(a0)
+; RV32ZVE32F-NEXT: sw s1, 52(a0)
+; RV32ZVE32F-NEXT: sw t0, 56(a0)
+; RV32ZVE32F-NEXT: sw a2, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
@@ -6071,77 +6071,77 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV32ZVE32F-NEXT: beqz a3, .LBB55_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
-; RV32ZVE32F-NEXT: lw a1, 4(a3)
-; RV32ZVE32F-NEXT: lw a3, 0(a3)
+; RV32ZVE32F-NEXT: lw a1, 0(a3)
+; RV32ZVE32F-NEXT: lw a3, 4(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB55_8
; RV32ZVE32F-NEXT: .LBB55_2:
-; RV32ZVE32F-NEXT: lw a4, 12(a2)
-; RV32ZVE32F-NEXT: lw a5, 8(a2)
+; RV32ZVE32F-NEXT: lw a4, 8(a2)
+; RV32ZVE32F-NEXT: lw a5, 12(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB55_9
; RV32ZVE32F-NEXT: .LBB55_3:
-; RV32ZVE32F-NEXT: lw a6, 20(a2)
-; RV32ZVE32F-NEXT: lw a7, 16(a2)
+; RV32ZVE32F-NEXT: lw a6, 16(a2)
+; RV32ZVE32F-NEXT: lw a7, 20(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB55_10
; RV32ZVE32F-NEXT: .LBB55_4:
-; RV32ZVE32F-NEXT: lw t1, 28(a2)
-; RV32ZVE32F-NEXT: lw t2, 24(a2)
+; RV32ZVE32F-NEXT: lw t1, 24(a2)
+; RV32ZVE32F-NEXT: lw t2, 28(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB55_11
; RV32ZVE32F-NEXT: .LBB55_5:
-; RV32ZVE32F-NEXT: lw t3, 36(a2)
-; RV32ZVE32F-NEXT: lw t4, 32(a2)
+; RV32ZVE32F-NEXT: lw t3, 32(a2)
+; RV32ZVE32F-NEXT: lw t4, 36(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB55_12
; RV32ZVE32F-NEXT: .LBB55_6:
-; RV32ZVE32F-NEXT: lw t5, 44(a2)
-; RV32ZVE32F-NEXT: lw t6, 40(a2)
+; RV32ZVE32F-NEXT: lw t5, 40(a2)
+; RV32ZVE32F-NEXT: lw t6, 44(a2)
; RV32ZVE32F-NEXT: j .LBB55_13
; RV32ZVE32F-NEXT: .LBB55_7:
-; RV32ZVE32F-NEXT: lw a1, 4(a2)
-; RV32ZVE32F-NEXT: lw a3, 0(a2)
+; RV32ZVE32F-NEXT: lw a1, 0(a2)
+; RV32ZVE32F-NEXT: lw a3, 4(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB55_2
; RV32ZVE32F-NEXT: .LBB55_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
-; RV32ZVE32F-NEXT: lw a4, 4(a5)
-; RV32ZVE32F-NEXT: lw a5, 0(a5)
+; RV32ZVE32F-NEXT: lw a4, 0(a5)
+; RV32ZVE32F-NEXT: lw a5, 4(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB55_3
; RV32ZVE32F-NEXT: .LBB55_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
-; RV32ZVE32F-NEXT: lw a6, 4(a7)
-; RV32ZVE32F-NEXT: lw a7, 0(a7)
+; RV32ZVE32F-NEXT: lw a6, 0(a7)
+; RV32ZVE32F-NEXT: lw a7, 4(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB55_4
; RV32ZVE32F-NEXT: .LBB55_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
-; RV32ZVE32F-NEXT: lw t1, 4(t2)
-; RV32ZVE32F-NEXT: lw t2, 0(t2)
+; RV32ZVE32F-NEXT: lw t1, 0(t2)
+; RV32ZVE32F-NEXT: lw t2, 4(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB55_5
; RV32ZVE32F-NEXT: .LBB55_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
-; RV32ZVE32F-NEXT: lw t3, 4(t4)
-; RV32ZVE32F-NEXT: lw t4, 0(t4)
+; RV32ZVE32F-NEXT: lw t3, 0(t4)
+; RV32ZVE32F-NEXT: lw t4, 4(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB55_6
; RV32ZVE32F-NEXT: .LBB55_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
-; RV32ZVE32F-NEXT: lw t5, 4(t6)
-; RV32ZVE32F-NEXT: lw t6, 0(t6)
+; RV32ZVE32F-NEXT: lw t5, 0(t6)
+; RV32ZVE32F-NEXT: lw t6, 4(t6)
; RV32ZVE32F-NEXT: .LBB55_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
@@ -6155,42 +6155,42 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
-; RV32ZVE32F-NEXT: lw s0, 4(s1)
-; RV32ZVE32F-NEXT: lw s1, 0(s1)
+; RV32ZVE32F-NEXT: lw s0, 0(s1)
+; RV32ZVE32F-NEXT: lw s1, 4(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB55_17
; RV32ZVE32F-NEXT: .LBB55_15:
-; RV32ZVE32F-NEXT: lw t0, 60(a2)
-; RV32ZVE32F-NEXT: lw a2, 56(a2)
+; RV32ZVE32F-NEXT: lw t0, 56(a2)
+; RV32ZVE32F-NEXT: lw a2, 60(a2)
; RV32ZVE32F-NEXT: j .LBB55_18
; RV32ZVE32F-NEXT: .LBB55_16:
-; RV32ZVE32F-NEXT: lw s0, 52(a2)
-; RV32ZVE32F-NEXT: lw s1, 48(a2)
+; RV32ZVE32F-NEXT: lw s0, 48(a2)
+; RV32ZVE32F-NEXT: lw s1, 52(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB55_15
; RV32ZVE32F-NEXT: .LBB55_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
-; RV32ZVE32F-NEXT: lw t0, 4(a2)
-; RV32ZVE32F-NEXT: lw a2, 0(a2)
+; RV32ZVE32F-NEXT: lw t0, 0(a2)
+; RV32ZVE32F-NEXT: lw a2, 4(a2)
; RV32ZVE32F-NEXT: .LBB55_18: # %else20
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a1, 4(a0)
-; RV32ZVE32F-NEXT: sw a5, 8(a0)
-; RV32ZVE32F-NEXT: sw a4, 12(a0)
-; RV32ZVE32F-NEXT: sw a7, 16(a0)
-; RV32ZVE32F-NEXT: sw a6, 20(a0)
-; RV32ZVE32F-NEXT: sw t2, 24(a0)
-; RV32ZVE32F-NEXT: sw t1, 28(a0)
-; RV32ZVE32F-NEXT: sw t4, 32(a0)
-; RV32ZVE32F-NEXT: sw t3, 36(a0)
-; RV32ZVE32F-NEXT: sw t6, 40(a0)
-; RV32ZVE32F-NEXT: sw t5, 44(a0)
-; RV32ZVE32F-NEXT: sw s1, 48(a0)
-; RV32ZVE32F-NEXT: sw s0, 52(a0)
-; RV32ZVE32F-NEXT: sw a2, 56(a0)
-; RV32ZVE32F-NEXT: sw t0, 60(a0)
+; RV32ZVE32F-NEXT: sw a1, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 8(a0)
+; RV32ZVE32F-NEXT: sw a5, 12(a0)
+; RV32ZVE32F-NEXT: sw a6, 16(a0)
+; RV32ZVE32F-NEXT: sw a7, 20(a0)
+; RV32ZVE32F-NEXT: sw t1, 24(a0)
+; RV32ZVE32F-NEXT: sw t2, 28(a0)
+; RV32ZVE32F-NEXT: sw t3, 32(a0)
+; RV32ZVE32F-NEXT: sw t4, 36(a0)
+; RV32ZVE32F-NEXT: sw t5, 40(a0)
+; RV32ZVE32F-NEXT: sw t6, 44(a0)
+; RV32ZVE32F-NEXT: sw s0, 48(a0)
+; RV32ZVE32F-NEXT: sw s1, 52(a0)
+; RV32ZVE32F-NEXT: sw t0, 56(a0)
+; RV32ZVE32F-NEXT: sw a2, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
@@ -6345,77 +6345,77 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV32ZVE32F-NEXT: beqz a3, .LBB56_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
-; RV32ZVE32F-NEXT: lw a1, 4(a3)
-; RV32ZVE32F-NEXT: lw a3, 0(a3)
+; RV32ZVE32F-NEXT: lw a1, 0(a3)
+; RV32ZVE32F-NEXT: lw a3, 4(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB56_8
; RV32ZVE32F-NEXT: .LBB56_2:
-; RV32ZVE32F-NEXT: lw a4, 12(a2)
-; RV32ZVE32F-NEXT: lw a5, 8(a2)
+; RV32ZVE32F-NEXT: lw a4, 8(a2)
+; RV32ZVE32F-NEXT: lw a5, 12(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB56_9
; RV32ZVE32F-NEXT: .LBB56_3:
-; RV32ZVE32F-NEXT: lw a6, 20(a2)
-; RV32ZVE32F-NEXT: lw a7, 16(a2)
+; RV32ZVE32F-NEXT: lw a6, 16(a2)
+; RV32ZVE32F-NEXT: lw a7, 20(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB56_10
; RV32ZVE32F-NEXT: .LBB56_4:
-; RV32ZVE32F-NEXT: lw t1, 28(a2)
-; RV32ZVE32F-NEXT: lw t2, 24(a2)
+; RV32ZVE32F-NEXT: lw t1, 24(a2)
+; RV32ZVE32F-NEXT: lw t2, 28(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB56_11
; RV32ZVE32F-NEXT: .LBB56_5:
-; RV32ZVE32F-NEXT: lw t3, 36(a2)
-; RV32ZVE32F-NEXT: lw t4, 32(a2)
+; RV32ZVE32F-NEXT: lw t3, 32(a2)
+; RV32ZVE32F-NEXT: lw t4, 36(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB56_12
; RV32ZVE32F-NEXT: .LBB56_6:
-; RV32ZVE32F-NEXT: lw t5, 44(a2)
-; RV32ZVE32F-NEXT: lw t6, 40(a2)
+; RV32ZVE32F-NEXT: lw t5, 40(a2)
+; RV32ZVE32F-NEXT: lw t6, 44(a2)
; RV32ZVE32F-NEXT: j .LBB56_13
; RV32ZVE32F-NEXT: .LBB56_7:
-; RV32ZVE32F-NEXT: lw a1, 4(a2)
-; RV32ZVE32F-NEXT: lw a3, 0(a2)
+; RV32ZVE32F-NEXT: lw a1, 0(a2)
+; RV32ZVE32F-NEXT: lw a3, 4(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB56_2
; RV32ZVE32F-NEXT: .LBB56_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
-; RV32ZVE32F-NEXT: lw a4, 4(a5)
-; RV32ZVE32F-NEXT: lw a5, 0(a5)
+; RV32ZVE32F-NEXT: lw a4, 0(a5)
+; RV32ZVE32F-NEXT: lw a5, 4(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB56_3
; RV32ZVE32F-NEXT: .LBB56_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
-; RV32ZVE32F-NEXT: lw a6, 4(a7)
-; RV32ZVE32F-NEXT: lw a7, 0(a7)
+; RV32ZVE32F-NEXT: lw a6, 0(a7)
+; RV32ZVE32F-NEXT: lw a7, 4(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB56_4
; RV32ZVE32F-NEXT: .LBB56_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
-; RV32ZVE32F-NEXT: lw t1, 4(t2)
-; RV32ZVE32F-NEXT: lw t2, 0(t2)
+; RV32ZVE32F-NEXT: lw t1, 0(t2)
+; RV32ZVE32F-NEXT: lw t2, 4(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB56_5
; RV32ZVE32F-NEXT: .LBB56_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
-; RV32ZVE32F-NEXT: lw t3, 4(t4)
-; RV32ZVE32F-NEXT: lw t4, 0(t4)
+; RV32ZVE32F-NEXT: lw t3, 0(t4)
+; RV32ZVE32F-NEXT: lw t4, 4(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB56_6
; RV32ZVE32F-NEXT: .LBB56_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
-; RV32ZVE32F-NEXT: lw t5, 4(t6)
-; RV32ZVE32F-NEXT: lw t6, 0(t6)
+; RV32ZVE32F-NEXT: lw t5, 0(t6)
+; RV32ZVE32F-NEXT: lw t6, 4(t6)
; RV32ZVE32F-NEXT: .LBB56_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
@@ -6429,42 +6429,42 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
-; RV32ZVE32F-NEXT: lw s0, 4(s1)
-; RV32ZVE32F-NEXT: lw s1, 0(s1)
+; RV32ZVE32F-NEXT: lw s0, 0(s1)
+; RV32ZVE32F-NEXT: lw s1, 4(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB56_17
; RV32ZVE32F-NEXT: .LBB56_15:
-; RV32ZVE32F-NEXT: lw t0, 60(a2)
-; RV32ZVE32F-NEXT: lw a2, 56(a2)
+; RV32ZVE32F-NEXT: lw t0, 56(a2)
+; RV32ZVE32F-NEXT: lw a2, 60(a2)
; RV32ZVE32F-NEXT: j .LBB56_18
; RV32ZVE32F-NEXT: .LBB56_16:
-; RV32ZVE32F-NEXT: lw s0, 52(a2)
-; RV32ZVE32F-NEXT: lw s1, 48(a2)
+; RV32ZVE32F-NEXT: lw s0, 48(a2)
+; RV32ZVE32F-NEXT: lw s1, 52(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB56_15
; RV32ZVE32F-NEXT: .LBB56_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
-; RV32ZVE32F-NEXT: lw t0, 4(a2)
-; RV32ZVE32F-NEXT: lw a2, 0(a2)
+; RV32ZVE32F-NEXT: lw t0, 0(a2)
+; RV32ZVE32F-NEXT: lw a2, 4(a2)
; RV32ZVE32F-NEXT: .LBB56_18: # %else20
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a1, 4(a0)
-; RV32ZVE32F-NEXT: sw a5, 8(a0)
-; RV32ZVE32F-NEXT: sw a4, 12(a0)
-; RV32ZVE32F-NEXT: sw a7, 16(a0)
-; RV32ZVE32F-NEXT: sw a6, 20(a0)
-; RV32ZVE32F-NEXT: sw t2, 24(a0)
-; RV32ZVE32F-NEXT: sw t1, 28(a0)
-; RV32ZVE32F-NEXT: sw t4, 32(a0)
-; RV32ZVE32F-NEXT: sw t3, 36(a0)
-; RV32ZVE32F-NEXT: sw t6, 40(a0)
-; RV32ZVE32F-NEXT: sw t5, 44(a0)
-; RV32ZVE32F-NEXT: sw s1, 48(a0)
-; RV32ZVE32F-NEXT: sw s0, 52(a0)
-; RV32ZVE32F-NEXT: sw a2, 56(a0)
-; RV32ZVE32F-NEXT: sw t0, 60(a0)
+; RV32ZVE32F-NEXT: sw a1, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 8(a0)
+; RV32ZVE32F-NEXT: sw a5, 12(a0)
+; RV32ZVE32F-NEXT: sw a6, 16(a0)
+; RV32ZVE32F-NEXT: sw a7, 20(a0)
+; RV32ZVE32F-NEXT: sw t1, 24(a0)
+; RV32ZVE32F-NEXT: sw t2, 28(a0)
+; RV32ZVE32F-NEXT: sw t3, 32(a0)
+; RV32ZVE32F-NEXT: sw t4, 36(a0)
+; RV32ZVE32F-NEXT: sw t5, 40(a0)
+; RV32ZVE32F-NEXT: sw t6, 44(a0)
+; RV32ZVE32F-NEXT: sw s0, 48(a0)
+; RV32ZVE32F-NEXT: sw s1, 52(a0)
+; RV32ZVE32F-NEXT: sw t0, 56(a0)
+; RV32ZVE32F-NEXT: sw a2, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
@@ -6617,10 +6617,10 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8i64:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: lw a4, 56(a2)
-; RV32ZVE32F-NEXT: lw a5, 48(a2)
-; RV32ZVE32F-NEXT: lw a6, 40(a2)
-; RV32ZVE32F-NEXT: lw a7, 32(a2)
+; RV32ZVE32F-NEXT: lw a4, 32(a2)
+; RV32ZVE32F-NEXT: lw a5, 40(a2)
+; RV32ZVE32F-NEXT: lw a6, 48(a2)
+; RV32ZVE32F-NEXT: lw a7, 56(a2)
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw t1, 8(a2)
; RV32ZVE32F-NEXT: lw t2, 16(a2)
@@ -6630,10 +6630,10 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t2
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
@@ -6643,77 +6643,77 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
; RV32ZVE32F-NEXT: beqz a2, .LBB57_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
-; RV32ZVE32F-NEXT: lw a1, 4(a2)
-; RV32ZVE32F-NEXT: lw a2, 0(a2)
+; RV32ZVE32F-NEXT: lw a1, 0(a2)
+; RV32ZVE32F-NEXT: lw a2, 4(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB57_8
; RV32ZVE32F-NEXT: .LBB57_2:
-; RV32ZVE32F-NEXT: lw a4, 12(a3)
-; RV32ZVE32F-NEXT: lw a5, 8(a3)
+; RV32ZVE32F-NEXT: lw a4, 8(a3)
+; RV32ZVE32F-NEXT: lw a5, 12(a3)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB57_9
; RV32ZVE32F-NEXT: .LBB57_3:
-; RV32ZVE32F-NEXT: lw a6, 20(a3)
-; RV32ZVE32F-NEXT: lw a7, 16(a3)
+; RV32ZVE32F-NEXT: lw a6, 16(a3)
+; RV32ZVE32F-NEXT: lw a7, 20(a3)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB57_10
; RV32ZVE32F-NEXT: .LBB57_4:
-; RV32ZVE32F-NEXT: lw t1, 28(a3)
-; RV32ZVE32F-NEXT: lw t2, 24(a3)
+; RV32ZVE32F-NEXT: lw t1, 24(a3)
+; RV32ZVE32F-NEXT: lw t2, 28(a3)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB57_11
; RV32ZVE32F-NEXT: .LBB57_5:
-; RV32ZVE32F-NEXT: lw t3, 36(a3)
-; RV32ZVE32F-NEXT: lw t4, 32(a3)
+; RV32ZVE32F-NEXT: lw t3, 32(a3)
+; RV32ZVE32F-NEXT: lw t4, 36(a3)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB57_12
; RV32ZVE32F-NEXT: .LBB57_6:
-; RV32ZVE32F-NEXT: lw t5, 44(a3)
-; RV32ZVE32F-NEXT: lw t6, 40(a3)
+; RV32ZVE32F-NEXT: lw t5, 40(a3)
+; RV32ZVE32F-NEXT: lw t6, 44(a3)
; RV32ZVE32F-NEXT: j .LBB57_13
; RV32ZVE32F-NEXT: .LBB57_7:
-; RV32ZVE32F-NEXT: lw a1, 4(a3)
-; RV32ZVE32F-NEXT: lw a2, 0(a3)
+; RV32ZVE32F-NEXT: lw a1, 0(a3)
+; RV32ZVE32F-NEXT: lw a2, 4(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB57_2
; RV32ZVE32F-NEXT: .LBB57_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
-; RV32ZVE32F-NEXT: lw a4, 4(a5)
-; RV32ZVE32F-NEXT: lw a5, 0(a5)
+; RV32ZVE32F-NEXT: lw a4, 0(a5)
+; RV32ZVE32F-NEXT: lw a5, 4(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB57_3
; RV32ZVE32F-NEXT: .LBB57_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
-; RV32ZVE32F-NEXT: lw a6, 4(a7)
-; RV32ZVE32F-NEXT: lw a7, 0(a7)
+; RV32ZVE32F-NEXT: lw a6, 0(a7)
+; RV32ZVE32F-NEXT: lw a7, 4(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB57_4
; RV32ZVE32F-NEXT: .LBB57_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
-; RV32ZVE32F-NEXT: lw t1, 4(t2)
-; RV32ZVE32F-NEXT: lw t2, 0(t2)
+; RV32ZVE32F-NEXT: lw t1, 0(t2)
+; RV32ZVE32F-NEXT: lw t2, 4(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB57_5
; RV32ZVE32F-NEXT: .LBB57_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
-; RV32ZVE32F-NEXT: lw t3, 4(t4)
-; RV32ZVE32F-NEXT: lw t4, 0(t4)
+; RV32ZVE32F-NEXT: lw t3, 0(t4)
+; RV32ZVE32F-NEXT: lw t4, 4(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB57_6
; RV32ZVE32F-NEXT: .LBB57_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
-; RV32ZVE32F-NEXT: lw t5, 4(t6)
-; RV32ZVE32F-NEXT: lw t6, 0(t6)
+; RV32ZVE32F-NEXT: lw t5, 0(t6)
+; RV32ZVE32F-NEXT: lw t6, 4(t6)
; RV32ZVE32F-NEXT: .LBB57_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
@@ -6727,42 +6727,42 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
-; RV32ZVE32F-NEXT: lw s0, 4(s1)
-; RV32ZVE32F-NEXT: lw s1, 0(s1)
+; RV32ZVE32F-NEXT: lw s0, 0(s1)
+; RV32ZVE32F-NEXT: lw s1, 4(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB57_17
; RV32ZVE32F-NEXT: .LBB57_15:
-; RV32ZVE32F-NEXT: lw t0, 60(a3)
-; RV32ZVE32F-NEXT: lw a3, 56(a3)
+; RV32ZVE32F-NEXT: lw t0, 56(a3)
+; RV32ZVE32F-NEXT: lw a3, 60(a3)
; RV32ZVE32F-NEXT: j .LBB57_18
; RV32ZVE32F-NEXT: .LBB57_16:
-; RV32ZVE32F-NEXT: lw s0, 52(a3)
-; RV32ZVE32F-NEXT: lw s1, 48(a3)
+; RV32ZVE32F-NEXT: lw s0, 48(a3)
+; RV32ZVE32F-NEXT: lw s1, 52(a3)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB57_15
; RV32ZVE32F-NEXT: .LBB57_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
-; RV32ZVE32F-NEXT: lw t0, 4(a3)
-; RV32ZVE32F-NEXT: lw a3, 0(a3)
+; RV32ZVE32F-NEXT: lw t0, 0(a3)
+; RV32ZVE32F-NEXT: lw a3, 4(a3)
; RV32ZVE32F-NEXT: .LBB57_18: # %else20
-; RV32ZVE32F-NEXT: sw a2, 0(a0)
-; RV32ZVE32F-NEXT: sw a1, 4(a0)
-; RV32ZVE32F-NEXT: sw a5, 8(a0)
-; RV32ZVE32F-NEXT: sw a4, 12(a0)
-; RV32ZVE32F-NEXT: sw a7, 16(a0)
-; RV32ZVE32F-NEXT: sw a6, 20(a0)
-; RV32ZVE32F-NEXT: sw t2, 24(a0)
-; RV32ZVE32F-NEXT: sw t1, 28(a0)
-; RV32ZVE32F-NEXT: sw t4, 32(a0)
-; RV32ZVE32F-NEXT: sw t3, 36(a0)
-; RV32ZVE32F-NEXT: sw t6, 40(a0)
-; RV32ZVE32F-NEXT: sw t5, 44(a0)
-; RV32ZVE32F-NEXT: sw s1, 48(a0)
-; RV32ZVE32F-NEXT: sw s0, 52(a0)
-; RV32ZVE32F-NEXT: sw a3, 56(a0)
-; RV32ZVE32F-NEXT: sw t0, 60(a0)
+; RV32ZVE32F-NEXT: sw a1, 0(a0)
+; RV32ZVE32F-NEXT: sw a2, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 8(a0)
+; RV32ZVE32F-NEXT: sw a5, 12(a0)
+; RV32ZVE32F-NEXT: sw a6, 16(a0)
+; RV32ZVE32F-NEXT: sw a7, 20(a0)
+; RV32ZVE32F-NEXT: sw t1, 24(a0)
+; RV32ZVE32F-NEXT: sw t2, 28(a0)
+; RV32ZVE32F-NEXT: sw t3, 32(a0)
+; RV32ZVE32F-NEXT: sw t4, 36(a0)
+; RV32ZVE32F-NEXT: sw t5, 40(a0)
+; RV32ZVE32F-NEXT: sw t6, 44(a0)
+; RV32ZVE32F-NEXT: sw s0, 48(a0)
+; RV32ZVE32F-NEXT: sw s1, 52(a0)
+; RV32ZVE32F-NEXT: sw t0, 56(a0)
+; RV32ZVE32F-NEXT: sw a3, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
@@ -11676,10 +11676,10 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8f64:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: lw a3, 56(a2)
-; RV32ZVE32F-NEXT: lw a4, 48(a2)
-; RV32ZVE32F-NEXT: lw a5, 40(a2)
-; RV32ZVE32F-NEXT: lw a6, 32(a2)
+; RV32ZVE32F-NEXT: lw a3, 32(a2)
+; RV32ZVE32F-NEXT: lw a4, 40(a2)
+; RV32ZVE32F-NEXT: lw a5, 48(a2)
+; RV32ZVE32F-NEXT: lw a6, 56(a2)
; RV32ZVE32F-NEXT: lw a7, 0(a2)
; RV32ZVE32F-NEXT: lw t0, 8(a2)
; RV32ZVE32F-NEXT: lw t1, 16(a2)
@@ -11689,10 +11689,10 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v0
@@ -12729,54 +12729,54 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) {
; RV32-NEXT: slli a0, a0, 8
; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: slli a3, a3, 8
-; RV32-NEXT: or a2, a3, a2
; RV32-NEXT: vslidedown.vi v10, v8, 2
; RV32-NEXT: vmv.x.s a1, v10
-; RV32-NEXT: lbu a3, 0(a1)
+; RV32-NEXT: lbu a4, 0(a1)
; RV32-NEXT: lbu a1, 1(a1)
; RV32-NEXT: vslidedown.vi v10, v8, 3
-; RV32-NEXT: vmv.x.s a4, v10
-; RV32-NEXT: lbu a5, 1(a4)
-; RV32-NEXT: lbu a4, 0(a4)
+; RV32-NEXT: vmv.x.s a5, v10
+; RV32-NEXT: lbu a6, 0(a5)
+; RV32-NEXT: lbu a5, 1(a5)
+; RV32-NEXT: or a2, a3, a2
; RV32-NEXT: slli a1, a1, 8
-; RV32-NEXT: or a1, a1, a3
+; RV32-NEXT: or a1, a1, a4
; RV32-NEXT: slli a5, a5, 8
-; RV32-NEXT: or a4, a5, a4
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v10, v8, 4
; RV32-NEXT: vmv.x.s a3, v10
-; RV32-NEXT: lbu a5, 0(a3)
+; RV32-NEXT: lbu a4, 0(a3)
; RV32-NEXT: lbu a3, 1(a3)
; RV32-NEXT: vslidedown.vi v10, v8, 5
-; RV32-NEXT: vmv.x.s a6, v10
-; RV32-NEXT: lbu a7, 1(a6)
-; RV32-NEXT: lbu a6, 0(a6)
+; RV32-NEXT: vmv.x.s a7, v10
+; RV32-NEXT: lbu t0, 0(a7)
+; RV32-NEXT: lbu a7, 1(a7)
+; RV32-NEXT: or a5, a5, a6
; RV32-NEXT: slli a3, a3, 8
-; RV32-NEXT: or a3, a3, a5
+; RV32-NEXT: or a3, a3, a4
; RV32-NEXT: slli a7, a7, 8
-; RV32-NEXT: or a5, a7, a6
; RV32-NEXT: vslidedown.vi v10, v8, 6
-; RV32-NEXT: vmv.x.s a6, v10
-; RV32-NEXT: lbu a7, 0(a6)
-; RV32-NEXT: lbu a6, 1(a6)
+; RV32-NEXT: vmv.x.s a4, v10
+; RV32-NEXT: lbu a6, 0(a4)
+; RV32-NEXT: lbu a4, 1(a4)
; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: vmv.x.s t0, v8
-; RV32-NEXT: lbu t1, 1(t0)
-; RV32-NEXT: lbu t0, 0(t0)
-; RV32-NEXT: slli a6, a6, 8
-; RV32-NEXT: or a6, a6, a7
+; RV32-NEXT: vmv.x.s t1, v8
+; RV32-NEXT: lbu t2, 0(t1)
+; RV32-NEXT: lbu t1, 1(t1)
+; RV32-NEXT: or a7, a7, t0
+; RV32-NEXT: slli a4, a4, 8
+; RV32-NEXT: or a4, a4, a6
; RV32-NEXT: slli t1, t1, 8
-; RV32-NEXT: or a7, t1, t0
+; RV32-NEXT: or a6, t1, t2
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v9, v8, a4
+; RV32-NEXT: vslide1down.vx v9, v8, a5
; RV32-NEXT: vmv.v.x v8, a3
-; RV32-NEXT: vslide1down.vx v8, v8, a5
-; RV32-NEXT: vslide1down.vx v8, v8, a6
-; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslide1down.vx v8, v8, a7
+; RV32-NEXT: vslide1down.vx v8, v8, a4
+; RV32-NEXT: vmv.v.i v0, 15
+; RV32-NEXT: vslide1down.vx v8, v8, a6
; RV32-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV32-NEXT: ret
;
@@ -12805,50 +12805,50 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) {
; RV64V-NEXT: lbu a2, 0(a2)
; RV64V-NEXT: slli a0, a0, 8
; RV64V-NEXT: or a0, a0, a1
-; RV64V-NEXT: slli a1, a3, 8
-; RV64V-NEXT: or a1, a1, a2
+; RV64V-NEXT: slli a3, a3, 8
; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64V-NEXT: vslidedown.vi v12, v8, 2
-; RV64V-NEXT: vmv.x.s a2, v12
-; RV64V-NEXT: lbu a3, 0(a2)
-; RV64V-NEXT: lbu a2, 1(a2)
+; RV64V-NEXT: vmv.x.s a1, v12
+; RV64V-NEXT: lbu a4, 0(a1)
+; RV64V-NEXT: lbu a1, 1(a1)
; RV64V-NEXT: vslidedown.vi v12, v8, 3
-; RV64V-NEXT: vmv.x.s a4, v12
-; RV64V-NEXT: lbu a5, 0(a4)
-; RV64V-NEXT: lbu a4, 1(a4)
-; RV64V-NEXT: mv a6, sp
+; RV64V-NEXT: vmv.x.s a5, v12
+; RV64V-NEXT: lbu a6, 0(a5)
+; RV64V-NEXT: lbu a5, 1(a5)
+; RV64V-NEXT: or a2, a3, a2
+; RV64V-NEXT: slli a1, a1, 8
+; RV64V-NEXT: or a1, a1, a4
+; RV64V-NEXT: slli a5, a5, 8
+; RV64V-NEXT: mv a3, sp
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; RV64V-NEXT: vse64.v v8, (a6)
-; RV64V-NEXT: ld a6, 32(sp)
-; RV64V-NEXT: slli a2, a2, 8
-; RV64V-NEXT: or a2, a2, a3
-; RV64V-NEXT: slli a4, a4, 8
-; RV64V-NEXT: lbu a3, 1(a6)
-; RV64V-NEXT: ld a7, 40(sp)
-; RV64V-NEXT: lbu a6, 0(a6)
-; RV64V-NEXT: or a4, a4, a5
-; RV64V-NEXT: slli a3, a3, 8
-; RV64V-NEXT: lbu a5, 1(a7)
-; RV64V-NEXT: or a3, a3, a6
-; RV64V-NEXT: lbu a6, 0(a7)
+; RV64V-NEXT: vse64.v v8, (a3)
+; RV64V-NEXT: ld a3, 32(sp)
+; RV64V-NEXT: ld a4, 40(sp)
; RV64V-NEXT: ld a7, 48(sp)
-; RV64V-NEXT: slli a5, a5, 8
; RV64V-NEXT: ld t0, 56(sp)
+; RV64V-NEXT: lbu t1, 0(a3)
+; RV64V-NEXT: lbu a3, 1(a3)
+; RV64V-NEXT: lbu t2, 0(a4)
+; RV64V-NEXT: lbu a4, 1(a4)
; RV64V-NEXT: or a5, a5, a6
-; RV64V-NEXT: lbu a6, 1(a7)
-; RV64V-NEXT: lbu a7, 0(a7)
-; RV64V-NEXT: lbu t1, 1(t0)
-; RV64V-NEXT: lbu t0, 0(t0)
-; RV64V-NEXT: slli a6, a6, 8
-; RV64V-NEXT: or a6, a6, a7
-; RV64V-NEXT: slli t1, t1, 8
-; RV64V-NEXT: or a7, t1, t0
+; RV64V-NEXT: slli a3, a3, 8
+; RV64V-NEXT: or a3, a3, t1
+; RV64V-NEXT: slli a4, a4, 8
+; RV64V-NEXT: lbu a6, 0(a7)
+; RV64V-NEXT: lbu a7, 1(a7)
+; RV64V-NEXT: lbu t1, 0(t0)
+; RV64V-NEXT: lbu t0, 1(t0)
+; RV64V-NEXT: or a4, a4, t2
+; RV64V-NEXT: slli a7, a7, 8
+; RV64V-NEXT: or a6, a7, a6
+; RV64V-NEXT: slli t0, t0, 8
+; RV64V-NEXT: or a7, t0, t1
; RV64V-NEXT: vmv.v.x v8, a0
-; RV64V-NEXT: vslide1down.vx v8, v8, a1
; RV64V-NEXT: vslide1down.vx v8, v8, a2
-; RV64V-NEXT: vslide1down.vx v9, v8, a4
+; RV64V-NEXT: vslide1down.vx v8, v8, a1
+; RV64V-NEXT: vslide1down.vx v9, v8, a5
; RV64V-NEXT: vmv.v.x v8, a3
-; RV64V-NEXT: vslide1down.vx v8, v8, a5
+; RV64V-NEXT: vslide1down.vx v8, v8, a4
; RV64V-NEXT: vslide1down.vx v8, v8, a6
; RV64V-NEXT: vmv.v.i v0, 15
; RV64V-NEXT: vslide1down.vx v8, v8, a7
@@ -12868,39 +12868,39 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) {
; RV64ZVE32F-NEXT: slli a1, a1, 8
; RV64ZVE32F-NEXT: or a1, a1, a2
; RV64ZVE32F-NEXT: slli a3, a3, 8
-; RV64ZVE32F-NEXT: or a3, a3, a4
-; RV64ZVE32F-NEXT: lbu a2, 9(a0)
-; RV64ZVE32F-NEXT: lbu a4, 8(a0)
-; RV64ZVE32F-NEXT: lbu a5, 13(a0)
+; RV64ZVE32F-NEXT: lbu a2, 8(a0)
+; RV64ZVE32F-NEXT: lbu a5, 9(a0)
; RV64ZVE32F-NEXT: lbu a6, 12(a0)
-; RV64ZVE32F-NEXT: slli a2, a2, 8
-; RV64ZVE32F-NEXT: or a2, a2, a4
+; RV64ZVE32F-NEXT: lbu a7, 13(a0)
+; RV64ZVE32F-NEXT: or a3, a3, a4
; RV64ZVE32F-NEXT: slli a5, a5, 8
-; RV64ZVE32F-NEXT: or a4, a5, a6
+; RV64ZVE32F-NEXT: or a2, a5, a2
+; RV64ZVE32F-NEXT: slli a7, a7, 8
+; RV64ZVE32F-NEXT: lbu a4, 16(a0)
; RV64ZVE32F-NEXT: lbu a5, 17(a0)
-; RV64ZVE32F-NEXT: lbu a6, 16(a0)
-; RV64ZVE32F-NEXT: lbu a7, 21(a0)
; RV64ZVE32F-NEXT: lbu t0, 20(a0)
+; RV64ZVE32F-NEXT: lbu t1, 21(a0)
+; RV64ZVE32F-NEXT: or a6, a7, a6
; RV64ZVE32F-NEXT: slli a5, a5, 8
-; RV64ZVE32F-NEXT: or a5, a5, a6
-; RV64ZVE32F-NEXT: slli a7, a7, 8
-; RV64ZVE32F-NEXT: or a6, a7, t0
+; RV64ZVE32F-NEXT: or a4, a5, a4
+; RV64ZVE32F-NEXT: slli t1, t1, 8
+; RV64ZVE32F-NEXT: lbu a5, 24(a0)
; RV64ZVE32F-NEXT: lbu a7, 25(a0)
-; RV64ZVE32F-NEXT: lbu t0, 24(a0)
-; RV64ZVE32F-NEXT: lbu t1, 29(a0)
-; RV64ZVE32F-NEXT: lbu a0, 28(a0)
+; RV64ZVE32F-NEXT: lbu t2, 28(a0)
+; RV64ZVE32F-NEXT: lbu a0, 29(a0)
+; RV64ZVE32F-NEXT: or t0, t1, t0
; RV64ZVE32F-NEXT: slli a7, a7, 8
-; RV64ZVE32F-NEXT: or a7, a7, t0
-; RV64ZVE32F-NEXT: slli t1, t1, 8
-; RV64ZVE32F-NEXT: or a0, t1, a0
+; RV64ZVE32F-NEXT: or a5, a7, a5
+; RV64ZVE32F-NEXT: slli a0, a0, 8
+; RV64ZVE32F-NEXT: or a0, a0, t2
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vmv.v.x v8, a1
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4
-; RV64ZVE32F-NEXT: vmv.v.x v8, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a6
+; RV64ZVE32F-NEXT: vmv.v.x v8, a4
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, t0
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
@@ -13018,24 +13018,24 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) {
;
; RV64ZVE32F-LABEL: mgather_reverse_unit_strided_2xSEW:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: lh a1, 28(a0)
-; RV64ZVE32F-NEXT: lh a2, 30(a0)
-; RV64ZVE32F-NEXT: lh a3, 24(a0)
-; RV64ZVE32F-NEXT: lh a4, 26(a0)
-; RV64ZVE32F-NEXT: lh a5, 20(a0)
-; RV64ZVE32F-NEXT: lh a6, 22(a0)
-; RV64ZVE32F-NEXT: lh a7, 16(a0)
-; RV64ZVE32F-NEXT: lh a0, 18(a0)
+; RV64ZVE32F-NEXT: lh a1, 24(a0)
+; RV64ZVE32F-NEXT: lh a2, 26(a0)
+; RV64ZVE32F-NEXT: lh a3, 28(a0)
+; RV64ZVE32F-NEXT: lh a4, 30(a0)
+; RV64ZVE32F-NEXT: lh a5, 16(a0)
+; RV64ZVE32F-NEXT: lh a6, 18(a0)
+; RV64ZVE32F-NEXT: lh a7, 20(a0)
+; RV64ZVE32F-NEXT: lh a0, 22(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; RV64ZVE32F-NEXT: vmv.v.x v8, a1
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4
-; RV64ZVE32F-NEXT: vmv.v.x v8, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
-; RV64ZVE32F-NEXT: vmv.v.i v0, 15
+; RV64ZVE32F-NEXT: vmv.v.x v8, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a2
+; RV64ZVE32F-NEXT: vmv.v.x v8, a7
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
+; RV64ZVE32F-NEXT: vmv.v.i v0, 15
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 14, i64 15, i64 12, i64 13, i64 10, i64 11, i64 8, i64 9>
@@ -13063,24 +13063,24 @@ define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) {
;
; RV64ZVE32F-LABEL: mgather_reverse_strided_2xSEW:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: lh a1, 28(a0)
-; RV64ZVE32F-NEXT: lh a2, 30(a0)
-; RV64ZVE32F-NEXT: lh a3, 20(a0)
-; RV64ZVE32F-NEXT: lh a4, 22(a0)
-; RV64ZVE32F-NEXT: lh a5, 12(a0)
-; RV64ZVE32F-NEXT: lh a6, 14(a0)
-; RV64ZVE32F-NEXT: lh a7, 4(a0)
-; RV64ZVE32F-NEXT: lh a0, 6(a0)
+; RV64ZVE32F-NEXT: lh a1, 20(a0)
+; RV64ZVE32F-NEXT: lh a2, 22(a0)
+; RV64ZVE32F-NEXT: lh a3, 28(a0)
+; RV64ZVE32F-NEXT: lh a4, 30(a0)
+; RV64ZVE32F-NEXT: lh a5, 4(a0)
+; RV64ZVE32F-NEXT: lh a6, 6(a0)
+; RV64ZVE32F-NEXT: lh a7, 12(a0)
+; RV64ZVE32F-NEXT: lh a0, 14(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; RV64ZVE32F-NEXT: vmv.v.x v8, a1
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4
-; RV64ZVE32F-NEXT: vmv.v.x v8, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
-; RV64ZVE32F-NEXT: vmv.v.i v0, 15
+; RV64ZVE32F-NEXT: vmv.v.x v8, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a2
+; RV64ZVE32F-NEXT: vmv.v.x v8, a7
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
+; RV64ZVE32F-NEXT: vmv.v.i v0, 15
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 14, i64 15, i64 10, i64 11, i64 6, i64 7, i64 2, i64 3>
@@ -13107,21 +13107,21 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
;
; RV64ZVE32F-LABEL: mgather_gather_2xSEW:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: lh a1, 0(a0)
-; RV64ZVE32F-NEXT: lh a2, 2(a0)
+; RV64ZVE32F-NEXT: lh a1, 8(a0)
+; RV64ZVE32F-NEXT: lh a2, 10(a0)
; RV64ZVE32F-NEXT: lh a3, 16(a0)
; RV64ZVE32F-NEXT: lh a4, 18(a0)
-; RV64ZVE32F-NEXT: lh a5, 8(a0)
-; RV64ZVE32F-NEXT: lh a6, 10(a0)
+; RV64ZVE32F-NEXT: lh a5, 0(a0)
+; RV64ZVE32F-NEXT: lh a6, 2(a0)
; RV64ZVE32F-NEXT: lh a7, 4(a0)
; RV64ZVE32F-NEXT: lh a0, 6(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; RV64ZVE32F-NEXT: vmv.v.x v8, a1
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4
; RV64ZVE32F-NEXT: vmv.v.x v8, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4
+; RV64ZVE32F-NEXT: vmv.v.x v8, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
@@ -13154,21 +13154,21 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) {
;
; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: lh a1, 0(a0)
-; RV64ZVE32F-NEXT: lh a2, 2(a0)
+; RV64ZVE32F-NEXT: lh a1, 8(a0)
+; RV64ZVE32F-NEXT: lh a2, 10(a0)
; RV64ZVE32F-NEXT: lh a3, 18(a0)
; RV64ZVE32F-NEXT: lh a4, 20(a0)
-; RV64ZVE32F-NEXT: lh a5, 8(a0)
-; RV64ZVE32F-NEXT: lh a6, 10(a0)
+; RV64ZVE32F-NEXT: lh a5, 0(a0)
+; RV64ZVE32F-NEXT: lh a6, 2(a0)
; RV64ZVE32F-NEXT: lh a7, 4(a0)
; RV64ZVE32F-NEXT: lh a0, 6(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; RV64ZVE32F-NEXT: vmv.v.x v8, a1
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4
; RV64ZVE32F-NEXT: vmv.v.x v8, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4
+; RV64ZVE32F-NEXT: vmv.v.x v8, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
@@ -13202,23 +13202,23 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) {
;
; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned2:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: lh a1, 2(a0)
-; RV64ZVE32F-NEXT: lh a2, 4(a0)
-; RV64ZVE32F-NEXT: lh a3, 18(a0)
-; RV64ZVE32F-NEXT: lh a4, 20(a0)
-; RV64ZVE32F-NEXT: lh a5, 8(a0)
-; RV64ZVE32F-NEXT: lh a6, 10(a0)
-; RV64ZVE32F-NEXT: lh a0, 6(a0)
+; RV64ZVE32F-NEXT: lh a1, 10(a0)
+; RV64ZVE32F-NEXT: lh a2, 18(a0)
+; RV64ZVE32F-NEXT: lh a3, 20(a0)
+; RV64ZVE32F-NEXT: lh a4, 2(a0)
+; RV64ZVE32F-NEXT: lh a5, 4(a0)
+; RV64ZVE32F-NEXT: lh a6, 6(a0)
+; RV64ZVE32F-NEXT: lh a0, 8(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; RV64ZVE32F-NEXT: vmv.v.x v8, a1
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4
-; RV64ZVE32F-NEXT: vmv.v.x v8, a5
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32F-NEXT: vmv.v.x v8, a4
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a3
+; RV64ZVE32F-NEXT: vmv.v.x v8, a0
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
@@ -13406,18 +13406,18 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) {
; RV64ZVE32F-LABEL: mgather_shuffle_vrgather:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: lh a1, 0(a0)
-; RV64ZVE32F-NEXT: lh a2, 4(a0)
-; RV64ZVE32F-NEXT: lh a3, 6(a0)
-; RV64ZVE32F-NEXT: lh a4, 2(a0)
+; RV64ZVE32F-NEXT: lh a2, 2(a0)
+; RV64ZVE32F-NEXT: lh a3, 4(a0)
+; RV64ZVE32F-NEXT: lh a4, 6(a0)
; RV64ZVE32F-NEXT: lh a5, 8(a0)
; RV64ZVE32F-NEXT: lh a6, 10(a0)
; RV64ZVE32F-NEXT: lh a7, 12(a0)
; RV64ZVE32F-NEXT: lh a0, 14(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vmv.v.x v8, a1
-; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
-; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4
+; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a2
; RV64ZVE32F-NEXT: vmv.v.x v8, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
@@ -13541,109 +13541,109 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
; RV32ZVE32F-NEXT: vse32.v v8, (a1)
; RV32ZVE32F-NEXT: lw a1, 288(sp)
; RV32ZVE32F-NEXT: lw a2, 292(sp)
-; RV32ZVE32F-NEXT: lw a3, 0(a1)
-; RV32ZVE32F-NEXT: sw a3, 188(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a3, 296(sp)
+; RV32ZVE32F-NEXT: lw a4, 300(sp)
+; RV32ZVE32F-NEXT: lw a5, 0(a1)
+; RV32ZVE32F-NEXT: sw a5, 188(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a1, 184(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a1, 296(sp)
-; RV32ZVE32F-NEXT: lw a3, 0(a2)
-; RV32ZVE32F-NEXT: sw a3, 180(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: sw a2, 176(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a2, 300(sp)
-; RV32ZVE32F-NEXT: lw a3, 0(a1)
-; RV32ZVE32F-NEXT: sw a3, 172(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a1, 4(a1)
+; RV32ZVE32F-NEXT: lw a1, 0(a2)
+; RV32ZVE32F-NEXT: sw a1, 180(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a1, 4(a2)
+; RV32ZVE32F-NEXT: sw a1, 176(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a1, 0(a3)
+; RV32ZVE32F-NEXT: sw a1, 172(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: sw a1, 168(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a1, 0(a4)
+; RV32ZVE32F-NEXT: sw a1, 164(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a1, 4(a4)
+; RV32ZVE32F-NEXT: sw a1, 160(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 304(sp)
-; RV32ZVE32F-NEXT: lw a3, 0(a2)
-; RV32ZVE32F-NEXT: sw a3, 164(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: sw a2, 160(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a2, 308(sp)
-; RV32ZVE32F-NEXT: lw a3, 0(a1)
-; RV32ZVE32F-NEXT: sw a3, 156(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a3, 312(sp)
+; RV32ZVE32F-NEXT: lw a4, 316(sp)
+; RV32ZVE32F-NEXT: lw a5, 0(a1)
+; RV32ZVE32F-NEXT: sw a5, 156(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a1, 152(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a1, 312(sp)
-; RV32ZVE32F-NEXT: lw a3, 0(a2)
-; RV32ZVE32F-NEXT: sw a3, 148(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: sw a2, 144(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a2, 316(sp)
-; RV32ZVE32F-NEXT: lw a3, 0(a1)
-; RV32ZVE32F-NEXT: sw a3, 140(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a1, 4(a1)
+; RV32ZVE32F-NEXT: lw a1, 0(a2)
+; RV32ZVE32F-NEXT: sw a1, 148(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a1, 4(a2)
+; RV32ZVE32F-NEXT: sw a1, 144(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a1, 0(a3)
+; RV32ZVE32F-NEXT: sw a1, 140(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: sw a1, 136(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a1, 0(a4)
+; RV32ZVE32F-NEXT: sw a1, 132(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a1, 4(a4)
+; RV32ZVE32F-NEXT: sw a1, 128(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 320(sp)
-; RV32ZVE32F-NEXT: lw a3, 0(a2)
-; RV32ZVE32F-NEXT: sw a3, 132(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: sw a2, 128(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a2, 324(sp)
-; RV32ZVE32F-NEXT: lw a3, 0(a1)
-; RV32ZVE32F-NEXT: sw a3, 124(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a3, 328(sp)
+; RV32ZVE32F-NEXT: lw a4, 332(sp)
+; RV32ZVE32F-NEXT: lw a5, 0(a1)
+; RV32ZVE32F-NEXT: sw a5, 124(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a1, 328(sp)
-; RV32ZVE32F-NEXT: lw a3, 0(a2)
-; RV32ZVE32F-NEXT: sw a3, 116(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a2, 4(a2)
-; RV32ZVE32F-NEXT: sw a2, 112(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a2, 332(sp)
-; RV32ZVE32F-NEXT: lw a3, 0(a1)
-; RV32ZVE32F-NEXT: sw a3, 104(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw ra, 4(a1)
-; RV32ZVE32F-NEXT: lw a1, 336(sp)
-; RV32ZVE32F-NEXT: lw s10, 0(a2)
-; RV32ZVE32F-NEXT: lw s8, 4(a2)
-; RV32ZVE32F-NEXT: lw a2, 340(sp)
-; RV32ZVE32F-NEXT: lw s6, 0(a1)
-; RV32ZVE32F-NEXT: lw s4, 4(a1)
-; RV32ZVE32F-NEXT: lw a4, 344(sp)
-; RV32ZVE32F-NEXT: lw s2, 0(a2)
-; RV32ZVE32F-NEXT: lw t5, 4(a2)
-; RV32ZVE32F-NEXT: lw a2, 348(sp)
-; RV32ZVE32F-NEXT: lw t3, 0(a4)
-; RV32ZVE32F-NEXT: lw t2, 4(a4)
-; RV32ZVE32F-NEXT: lw a4, 352(sp)
-; RV32ZVE32F-NEXT: lw t0, 0(a2)
-; RV32ZVE32F-NEXT: lw a7, 4(a2)
-; RV32ZVE32F-NEXT: lw a2, 356(sp)
-; RV32ZVE32F-NEXT: lw a6, 0(a4)
-; RV32ZVE32F-NEXT: lw a5, 4(a4)
-; RV32ZVE32F-NEXT: lw a4, 360(sp)
; RV32ZVE32F-NEXT: lw a1, 0(a2)
-; RV32ZVE32F-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: sw a1, 116(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a2)
-; RV32ZVE32F-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
-; RV32ZVE32F-NEXT: lw a2, 364(sp)
-; RV32ZVE32F-NEXT: lw s11, 0(a4)
-; RV32ZVE32F-NEXT: lw s9, 4(a4)
-; RV32ZVE32F-NEXT: lw a1, 368(sp)
-; RV32ZVE32F-NEXT: lw s7, 0(a2)
-; RV32ZVE32F-NEXT: lw s5, 4(a2)
+; RV32ZVE32F-NEXT: sw a1, 112(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a1, 0(a3)
+; RV32ZVE32F-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw ra, 4(a3)
+; RV32ZVE32F-NEXT: lw s10, 0(a4)
+; RV32ZVE32F-NEXT: lw s8, 4(a4)
+; RV32ZVE32F-NEXT: lw a2, 336(sp)
+; RV32ZVE32F-NEXT: lw a4, 340(sp)
+; RV32ZVE32F-NEXT: lw a6, 344(sp)
+; RV32ZVE32F-NEXT: lw t0, 348(sp)
+; RV32ZVE32F-NEXT: lw s5, 0(a2)
+; RV32ZVE32F-NEXT: lw s4, 4(a2)
+; RV32ZVE32F-NEXT: lw t6, 0(a4)
+; RV32ZVE32F-NEXT: lw t5, 4(a4)
+; RV32ZVE32F-NEXT: lw t3, 0(a6)
+; RV32ZVE32F-NEXT: lw t2, 4(a6)
+; RV32ZVE32F-NEXT: lw t1, 0(t0)
+; RV32ZVE32F-NEXT: lw a7, 4(t0)
+; RV32ZVE32F-NEXT: lw a6, 352(sp)
+; RV32ZVE32F-NEXT: lw t0, 356(sp)
+; RV32ZVE32F-NEXT: lw t4, 360(sp)
+; RV32ZVE32F-NEXT: lw a1, 364(sp)
+; RV32ZVE32F-NEXT: lw a5, 0(a6)
+; RV32ZVE32F-NEXT: lw a6, 4(a6)
+; RV32ZVE32F-NEXT: lw a2, 0(t0)
+; RV32ZVE32F-NEXT: sw a2, 108(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw a2, 4(t0)
+; RV32ZVE32F-NEXT: sw a2, 100(sp) # 4-byte Folded Spill
+; RV32ZVE32F-NEXT: lw s11, 0(t4)
+; RV32ZVE32F-NEXT: lw s9, 4(t4)
+; RV32ZVE32F-NEXT: lw s7, 0(a1)
+; RV32ZVE32F-NEXT: lw s6, 4(a1)
+; RV32ZVE32F-NEXT: lw a4, 368(sp)
; RV32ZVE32F-NEXT: lw a3, 372(sp)
-; RV32ZVE32F-NEXT: lw s3, 0(a1)
-; RV32ZVE32F-NEXT: lw t6, 4(a1)
; RV32ZVE32F-NEXT: lw a2, 376(sp)
-; RV32ZVE32F-NEXT: lw t4, 0(a3)
; RV32ZVE32F-NEXT: lw a1, 380(sp)
-; RV32ZVE32F-NEXT: lw t1, 4(a3)
+; RV32ZVE32F-NEXT: lw s3, 0(a4)
+; RV32ZVE32F-NEXT: lw s2, 4(a4)
+; RV32ZVE32F-NEXT: lw t4, 0(a3)
+; RV32ZVE32F-NEXT: lw t0, 4(a3)
; RV32ZVE32F-NEXT: lw a4, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a1)
; RV32ZVE32F-NEXT: lw a1, 4(a1)
-; RV32ZVE32F-NEXT: sw a5, 196(a0)
-; RV32ZVE32F-NEXT: sw a6, 192(a0)
+; RV32ZVE32F-NEXT: sw a6, 196(a0)
+; RV32ZVE32F-NEXT: sw a5, 192(a0)
; RV32ZVE32F-NEXT: sw a7, 188(a0)
-; RV32ZVE32F-NEXT: sw t0, 184(a0)
+; RV32ZVE32F-NEXT: sw t1, 184(a0)
; RV32ZVE32F-NEXT: sw t2, 180(a0)
; RV32ZVE32F-NEXT: sw t3, 176(a0)
; RV32ZVE32F-NEXT: sw t5, 172(a0)
-; RV32ZVE32F-NEXT: sw s2, 168(a0)
+; RV32ZVE32F-NEXT: sw t6, 168(a0)
; RV32ZVE32F-NEXT: sw s4, 164(a0)
-; RV32ZVE32F-NEXT: sw s6, 160(a0)
+; RV32ZVE32F-NEXT: sw s5, 160(a0)
; RV32ZVE32F-NEXT: sw s8, 156(a0)
; RV32ZVE32F-NEXT: sw s10, 152(a0)
; RV32ZVE32F-NEXT: sw ra, 148(a0)
@@ -13697,11 +13697,11 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
; RV32ZVE32F-NEXT: sw a2, 248(a0)
; RV32ZVE32F-NEXT: sw a3, 244(a0)
; RV32ZVE32F-NEXT: sw a4, 240(a0)
-; RV32ZVE32F-NEXT: sw t1, 236(a0)
+; RV32ZVE32F-NEXT: sw t0, 236(a0)
; RV32ZVE32F-NEXT: sw t4, 232(a0)
-; RV32ZVE32F-NEXT: sw t6, 228(a0)
+; RV32ZVE32F-NEXT: sw s2, 228(a0)
; RV32ZVE32F-NEXT: sw s3, 224(a0)
-; RV32ZVE32F-NEXT: sw s5, 220(a0)
+; RV32ZVE32F-NEXT: sw s6, 220(a0)
; RV32ZVE32F-NEXT: sw s7, 216(a0)
; RV32ZVE32F-NEXT: sw s9, 212(a0)
; RV32ZVE32F-NEXT: sw s11, 208(a0)
@@ -13812,22 +13812,22 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
; RV64ZVE32F-NEXT: ld s8, 336(a1)
; RV64ZVE32F-NEXT: ld s9, 352(a1)
; RV64ZVE32F-NEXT: ld s10, 368(a1)
-; RV64ZVE32F-NEXT: ld s11, 384(a1)
-; RV64ZVE32F-NEXT: ld ra, 400(a1)
-; RV64ZVE32F-NEXT: ld a6, 416(a1)
-; RV64ZVE32F-NEXT: ld a5, 432(a1)
-; RV64ZVE32F-NEXT: ld a2, 496(a1)
+; RV64ZVE32F-NEXT: ld s11, 448(a1)
+; RV64ZVE32F-NEXT: ld ra, 464(a1)
; RV64ZVE32F-NEXT: ld a3, 480(a1)
-; RV64ZVE32F-NEXT: ld a4, 464(a1)
-; RV64ZVE32F-NEXT: ld a1, 448(a1)
+; RV64ZVE32F-NEXT: ld a2, 496(a1)
+; RV64ZVE32F-NEXT: ld a6, 384(a1)
+; RV64ZVE32F-NEXT: ld a5, 400(a1)
+; RV64ZVE32F-NEXT: ld a4, 416(a1)
+; RV64ZVE32F-NEXT: ld a1, 432(a1)
; RV64ZVE32F-NEXT: sd a2, 248(a0)
; RV64ZVE32F-NEXT: sd a3, 240(a0)
-; RV64ZVE32F-NEXT: sd a4, 232(a0)
-; RV64ZVE32F-NEXT: sd a1, 224(a0)
-; RV64ZVE32F-NEXT: sd a5, 216(a0)
-; RV64ZVE32F-NEXT: sd a6, 208(a0)
-; RV64ZVE32F-NEXT: sd ra, 200(a0)
-; RV64ZVE32F-NEXT: sd s11, 192(a0)
+; RV64ZVE32F-NEXT: sd ra, 232(a0)
+; RV64ZVE32F-NEXT: sd s11, 224(a0)
+; RV64ZVE32F-NEXT: sd a1, 216(a0)
+; RV64ZVE32F-NEXT: sd a4, 208(a0)
+; RV64ZVE32F-NEXT: sd a5, 200(a0)
+; RV64ZVE32F-NEXT: sd a6, 192(a0)
; RV64ZVE32F-NEXT: sd s10, 184(a0)
; RV64ZVE32F-NEXT: sd s9, 176(a0)
; RV64ZVE32F-NEXT: sd s8, 168(a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index fe037a5af57c06..bc7758717c1c15 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -275,9 +275,9 @@ define void @mscatter_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
;
; RV64ZVE32F-LABEL: mscatter_v4i8:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a1, 24(a0)
-; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: ld a4, 8(a0)
+; RV64ZVE32F-NEXT: ld a2, 16(a0)
+; RV64ZVE32F-NEXT: ld a1, 24(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
@@ -336,17 +336,17 @@ define void @mscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) {
; RV64ZVE32F-LABEL: mscatter_truemask_v4i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a1, 0(a0)
-; RV64ZVE32F-NEXT: ld a2, 24(a0)
-; RV64ZVE32F-NEXT: ld a3, 8(a0)
-; RV64ZVE32F-NEXT: ld a0, 16(a0)
+; RV64ZVE32F-NEXT: ld a2, 8(a0)
+; RV64ZVE32F-NEXT: ld a3, 16(a0)
+; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a1)
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
-; RV64ZVE32F-NEXT: vse8.v v9, (a3)
+; RV64ZVE32F-NEXT: vse8.v v9, (a2)
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
-; RV64ZVE32F-NEXT: vse8.v v9, (a0)
+; RV64ZVE32F-NEXT: vse8.v v9, (a3)
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
-; RV64ZVE32F-NEXT: vse8.v v8, (a2)
+; RV64ZVE32F-NEXT: vse8.v v8, (a0)
; RV64ZVE32F-NEXT: ret
call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1))
ret void
@@ -377,37 +377,37 @@ define void @mscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
;
; RV64ZVE32F-LABEL: mscatter_v8i8:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a1, 56(a0)
+; RV64ZVE32F-NEXT: ld a3, 40(a0)
; RV64ZVE32F-NEXT: ld a2, 48(a0)
-; RV64ZVE32F-NEXT: ld a4, 40(a0)
-; RV64ZVE32F-NEXT: ld a5, 32(a0)
-; RV64ZVE32F-NEXT: ld a6, 24(a0)
-; RV64ZVE32F-NEXT: ld a7, 16(a0)
+; RV64ZVE32F-NEXT: ld a1, 56(a0)
; RV64ZVE32F-NEXT: ld t0, 8(a0)
+; RV64ZVE32F-NEXT: ld a7, 16(a0)
+; RV64ZVE32F-NEXT: ld a6, 24(a0)
+; RV64ZVE32F-NEXT: ld a5, 32(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a3, v0
-; RV64ZVE32F-NEXT: andi t1, a3, 1
+; RV64ZVE32F-NEXT: vmv.x.s a4, v0
+; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB8_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
-; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB8_10
; RV64ZVE32F-NEXT: .LBB8_2: # %else2
-; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: bnez a0, .LBB8_11
; RV64ZVE32F-NEXT: .LBB8_3: # %else4
-; RV64ZVE32F-NEXT: andi a0, a3, 8
+; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB8_12
; RV64ZVE32F-NEXT: .LBB8_4: # %else6
-; RV64ZVE32F-NEXT: andi a0, a3, 16
+; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB8_13
; RV64ZVE32F-NEXT: .LBB8_5: # %else8
-; RV64ZVE32F-NEXT: andi a0, a3, 32
+; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: bnez a0, .LBB8_14
; RV64ZVE32F-NEXT: .LBB8_6: # %else10
-; RV64ZVE32F-NEXT: andi a0, a3, 64
+; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: bnez a0, .LBB8_15
; RV64ZVE32F-NEXT: .LBB8_7: # %else12
-; RV64ZVE32F-NEXT: andi a0, a3, -128
+; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB8_16
; RV64ZVE32F-NEXT: .LBB8_8: # %else14
; RV64ZVE32F-NEXT: ret
@@ -415,43 +415,43 @@ define void @mscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB8_2
; RV64ZVE32F-NEXT: .LBB8_10: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vse8.v v9, (t0)
-; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: beqz a0, .LBB8_3
; RV64ZVE32F-NEXT: .LBB8_11: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vse8.v v9, (a7)
-; RV64ZVE32F-NEXT: andi a0, a3, 8
+; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB8_4
; RV64ZVE32F-NEXT: .LBB8_12: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-NEXT: vse8.v v9, (a6)
-; RV64ZVE32F-NEXT: andi a0, a3, 16
+; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB8_5
; RV64ZVE32F-NEXT: .LBB8_13: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse8.v v9, (a5)
-; RV64ZVE32F-NEXT: andi a0, a3, 32
+; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB8_6
; RV64ZVE32F-NEXT: .LBB8_14: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-NEXT: vse8.v v9, (a4)
-; RV64ZVE32F-NEXT: andi a0, a3, 64
+; RV64ZVE32F-NEXT: vse8.v v9, (a3)
+; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: beqz a0, .LBB8_7
; RV64ZVE32F-NEXT: .LBB8_15: # %cond.store11
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
; RV64ZVE32F-NEXT: vse8.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a0, a3, -128
+; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB8_8
; RV64ZVE32F-NEXT: .LBB8_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
@@ -793,9 +793,9 @@ define void @mscatter_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
;
; RV64ZVE32F-LABEL: mscatter_v4i16:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a1, 24(a0)
-; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: ld a4, 8(a0)
+; RV64ZVE32F-NEXT: ld a2, 16(a0)
+; RV64ZVE32F-NEXT: ld a1, 24(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
@@ -854,17 +854,17 @@ define void @mscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) {
; RV64ZVE32F-LABEL: mscatter_truemask_v4i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a1, 0(a0)
-; RV64ZVE32F-NEXT: ld a2, 24(a0)
-; RV64ZVE32F-NEXT: ld a3, 8(a0)
-; RV64ZVE32F-NEXT: ld a0, 16(a0)
+; RV64ZVE32F-NEXT: ld a2, 8(a0)
+; RV64ZVE32F-NEXT: ld a3, 16(a0)
+; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a1)
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
-; RV64ZVE32F-NEXT: vse16.v v9, (a3)
+; RV64ZVE32F-NEXT: vse16.v v9, (a2)
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
-; RV64ZVE32F-NEXT: vse16.v v9, (a0)
+; RV64ZVE32F-NEXT: vse16.v v9, (a3)
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
-; RV64ZVE32F-NEXT: vse16.v v8, (a2)
+; RV64ZVE32F-NEXT: vse16.v v8, (a0)
; RV64ZVE32F-NEXT: ret
call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1))
ret void
@@ -895,37 +895,37 @@ define void @mscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
;
; RV64ZVE32F-LABEL: mscatter_v8i16:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a1, 56(a0)
+; RV64ZVE32F-NEXT: ld a3, 40(a0)
; RV64ZVE32F-NEXT: ld a2, 48(a0)
-; RV64ZVE32F-NEXT: ld a4, 40(a0)
-; RV64ZVE32F-NEXT: ld a5, 32(a0)
-; RV64ZVE32F-NEXT: ld a6, 24(a0)
-; RV64ZVE32F-NEXT: ld a7, 16(a0)
+; RV64ZVE32F-NEXT: ld a1, 56(a0)
; RV64ZVE32F-NEXT: ld t0, 8(a0)
+; RV64ZVE32F-NEXT: ld a7, 16(a0)
+; RV64ZVE32F-NEXT: ld a6, 24(a0)
+; RV64ZVE32F-NEXT: ld a5, 32(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a3, v0
-; RV64ZVE32F-NEXT: andi t1, a3, 1
+; RV64ZVE32F-NEXT: vmv.x.s a4, v0
+; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB17_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
-; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB17_10
; RV64ZVE32F-NEXT: .LBB17_2: # %else2
-; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: bnez a0, .LBB17_11
; RV64ZVE32F-NEXT: .LBB17_3: # %else4
-; RV64ZVE32F-NEXT: andi a0, a3, 8
+; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB17_12
; RV64ZVE32F-NEXT: .LBB17_4: # %else6
-; RV64ZVE32F-NEXT: andi a0, a3, 16
+; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB17_13
; RV64ZVE32F-NEXT: .LBB17_5: # %else8
-; RV64ZVE32F-NEXT: andi a0, a3, 32
+; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: bnez a0, .LBB17_14
; RV64ZVE32F-NEXT: .LBB17_6: # %else10
-; RV64ZVE32F-NEXT: andi a0, a3, 64
+; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: bnez a0, .LBB17_15
; RV64ZVE32F-NEXT: .LBB17_7: # %else12
-; RV64ZVE32F-NEXT: andi a0, a3, -128
+; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB17_16
; RV64ZVE32F-NEXT: .LBB17_8: # %else14
; RV64ZVE32F-NEXT: ret
@@ -933,43 +933,43 @@ define void @mscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB17_2
; RV64ZVE32F-NEXT: .LBB17_10: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vse16.v v9, (t0)
-; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: beqz a0, .LBB17_3
; RV64ZVE32F-NEXT: .LBB17_11: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vse16.v v9, (a7)
-; RV64ZVE32F-NEXT: andi a0, a3, 8
+; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB17_4
; RV64ZVE32F-NEXT: .LBB17_12: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-NEXT: vse16.v v9, (a6)
-; RV64ZVE32F-NEXT: andi a0, a3, 16
+; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB17_5
; RV64ZVE32F-NEXT: .LBB17_13: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse16.v v9, (a5)
-; RV64ZVE32F-NEXT: andi a0, a3, 32
+; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB17_6
; RV64ZVE32F-NEXT: .LBB17_14: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-NEXT: vse16.v v9, (a4)
-; RV64ZVE32F-NEXT: andi a0, a3, 64
+; RV64ZVE32F-NEXT: vse16.v v9, (a3)
+; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: beqz a0, .LBB17_7
; RV64ZVE32F-NEXT: .LBB17_15: # %cond.store11
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a0, a3, -128
+; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB17_8
; RV64ZVE32F-NEXT: .LBB17_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -1670,9 +1670,9 @@ define void @mscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
;
; RV64ZVE32F-LABEL: mscatter_v4i32:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a1, 24(a0)
-; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: ld a4, 8(a0)
+; RV64ZVE32F-NEXT: ld a2, 16(a0)
+; RV64ZVE32F-NEXT: ld a1, 24(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
@@ -1731,17 +1731,17 @@ define void @mscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) {
; RV64ZVE32F-LABEL: mscatter_truemask_v4i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a1, 0(a0)
-; RV64ZVE32F-NEXT: ld a2, 24(a0)
-; RV64ZVE32F-NEXT: ld a3, 8(a0)
-; RV64ZVE32F-NEXT: ld a0, 16(a0)
+; RV64ZVE32F-NEXT: ld a2, 8(a0)
+; RV64ZVE32F-NEXT: ld a3, 16(a0)
+; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a1)
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
-; RV64ZVE32F-NEXT: vse32.v v9, (a3)
+; RV64ZVE32F-NEXT: vse32.v v9, (a2)
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
-; RV64ZVE32F-NEXT: vse32.v v9, (a0)
+; RV64ZVE32F-NEXT: vse32.v v9, (a3)
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v8, (a2)
+; RV64ZVE32F-NEXT: vse32.v v8, (a0)
; RV64ZVE32F-NEXT: ret
call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1))
ret void
@@ -1772,37 +1772,37 @@ define void @mscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
;
; RV64ZVE32F-LABEL: mscatter_v8i32:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a1, 56(a0)
+; RV64ZVE32F-NEXT: ld a3, 40(a0)
; RV64ZVE32F-NEXT: ld a2, 48(a0)
-; RV64ZVE32F-NEXT: ld a4, 40(a0)
-; RV64ZVE32F-NEXT: ld a5, 32(a0)
-; RV64ZVE32F-NEXT: ld a6, 24(a0)
-; RV64ZVE32F-NEXT: ld a7, 16(a0)
+; RV64ZVE32F-NEXT: ld a1, 56(a0)
; RV64ZVE32F-NEXT: ld t0, 8(a0)
+; RV64ZVE32F-NEXT: ld a7, 16(a0)
+; RV64ZVE32F-NEXT: ld a6, 24(a0)
+; RV64ZVE32F-NEXT: ld a5, 32(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a3, v0
-; RV64ZVE32F-NEXT: andi t1, a3, 1
+; RV64ZVE32F-NEXT: vmv.x.s a4, v0
+; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB28_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
-; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB28_10
; RV64ZVE32F-NEXT: .LBB28_2: # %else2
-; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: bnez a0, .LBB28_11
; RV64ZVE32F-NEXT: .LBB28_3: # %else4
-; RV64ZVE32F-NEXT: andi a0, a3, 8
+; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB28_12
; RV64ZVE32F-NEXT: .LBB28_4: # %else6
-; RV64ZVE32F-NEXT: andi a0, a3, 16
+; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB28_13
; RV64ZVE32F-NEXT: .LBB28_5: # %else8
-; RV64ZVE32F-NEXT: andi a0, a3, 32
+; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: bnez a0, .LBB28_14
; RV64ZVE32F-NEXT: .LBB28_6: # %else10
-; RV64ZVE32F-NEXT: andi a0, a3, 64
+; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: bnez a0, .LBB28_15
; RV64ZVE32F-NEXT: .LBB28_7: # %else12
-; RV64ZVE32F-NEXT: andi a0, a3, -128
+; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB28_16
; RV64ZVE32F-NEXT: .LBB28_8: # %else14
; RV64ZVE32F-NEXT: ret
@@ -1810,46 +1810,46 @@ define void @mscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB28_2
; RV64ZVE32F-NEXT: .LBB28_10: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vse32.v v10, (t0)
-; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: beqz a0, .LBB28_3
; RV64ZVE32F-NEXT: .LBB28_11: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV64ZVE32F-NEXT: vse32.v v10, (a7)
-; RV64ZVE32F-NEXT: andi a0, a3, 8
+; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB28_4
; RV64ZVE32F-NEXT: .LBB28_12: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a6)
-; RV64ZVE32F-NEXT: andi a0, a3, 16
+; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB28_5
; RV64ZVE32F-NEXT: .LBB28_13: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a5)
-; RV64ZVE32F-NEXT: andi a0, a3, 32
+; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB28_6
; RV64ZVE32F-NEXT: .LBB28_14: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v10, (a4)
-; RV64ZVE32F-NEXT: andi a0, a3, 64
+; RV64ZVE32F-NEXT: vse32.v v10, (a3)
+; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: beqz a0, .LBB28_7
; RV64ZVE32F-NEXT: .LBB28_15: # %cond.store11
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a0, a3, -128
+; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB28_8
; RV64ZVE32F-NEXT: .LBB28_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
@@ -2898,8 +2898,8 @@ define void @mscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
;
; RV32ZVE32F-LABEL: mscatter_v2i64:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: lw a2, 12(a0)
; RV32ZVE32F-NEXT: lw a1, 8(a0)
+; RV32ZVE32F-NEXT: lw a2, 12(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v0
; RV32ZVE32F-NEXT: andi a4, a3, 1
@@ -2965,12 +2965,12 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
;
; RV32ZVE32F-LABEL: mscatter_v4i64:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: lw a1, 28(a0)
-; RV32ZVE32F-NEXT: lw a2, 24(a0)
-; RV32ZVE32F-NEXT: lw a3, 20(a0)
-; RV32ZVE32F-NEXT: lw a4, 16(a0)
-; RV32ZVE32F-NEXT: lw a7, 12(a0)
+; RV32ZVE32F-NEXT: lw a1, 24(a0)
+; RV32ZVE32F-NEXT: lw a2, 28(a0)
; RV32ZVE32F-NEXT: lw a6, 8(a0)
+; RV32ZVE32F-NEXT: lw a7, 12(a0)
+; RV32ZVE32F-NEXT: lw a3, 16(a0)
+; RV32ZVE32F-NEXT: lw a4, 20(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a5, v0
; RV32ZVE32F-NEXT: andi t0, a5, 1
@@ -3007,38 +3007,38 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-NEXT: sw a4, 0(a0)
-; RV32ZVE32F-NEXT: sw a3, 4(a0)
+; RV32ZVE32F-NEXT: sw a3, 0(a0)
+; RV32ZVE32F-NEXT: sw a4, 4(a0)
; RV32ZVE32F-NEXT: andi a5, a5, 8
; RV32ZVE32F-NEXT: beqz a5, .LBB38_4
; RV32ZVE32F-NEXT: .LBB38_8: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: sw a2, 0(a0)
-; RV32ZVE32F-NEXT: sw a1, 4(a0)
+; RV32ZVE32F-NEXT: sw a1, 0(a0)
+; RV32ZVE32F-NEXT: sw a2, 4(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_v4i64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a2, 24(a1)
+; RV64ZVE32F-NEXT: ld a6, 8(a1)
; RV64ZVE32F-NEXT: ld a4, 16(a1)
-; RV64ZVE32F-NEXT: ld a7, 8(a1)
-; RV64ZVE32F-NEXT: ld a3, 24(a0)
-; RV64ZVE32F-NEXT: ld a5, 16(a0)
+; RV64ZVE32F-NEXT: ld a2, 24(a1)
; RV64ZVE32F-NEXT: ld t0, 8(a0)
+; RV64ZVE32F-NEXT: ld a5, 16(a0)
+; RV64ZVE32F-NEXT: ld a3, 24(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a6, v0
-; RV64ZVE32F-NEXT: andi t1, a6, 1
+; RV64ZVE32F-NEXT: vmv.x.s a7, v0
+; RV64ZVE32F-NEXT: andi t1, a7, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB38_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
-; RV64ZVE32F-NEXT: andi a0, a6, 2
+; RV64ZVE32F-NEXT: andi a0, a7, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB38_6
; RV64ZVE32F-NEXT: .LBB38_2: # %else2
-; RV64ZVE32F-NEXT: andi a0, a6, 4
+; RV64ZVE32F-NEXT: andi a0, a7, 4
; RV64ZVE32F-NEXT: bnez a0, .LBB38_7
; RV64ZVE32F-NEXT: .LBB38_3: # %else4
-; RV64ZVE32F-NEXT: andi a0, a6, 8
+; RV64ZVE32F-NEXT: andi a0, a7, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB38_8
; RV64ZVE32F-NEXT: .LBB38_4: # %else6
; RV64ZVE32F-NEXT: ret
@@ -3046,15 +3046,15 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: sd a0, 0(a1)
-; RV64ZVE32F-NEXT: andi a0, a6, 2
+; RV64ZVE32F-NEXT: andi a0, a7, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB38_2
; RV64ZVE32F-NEXT: .LBB38_6: # %cond.store1
-; RV64ZVE32F-NEXT: sd t0, 0(a7)
-; RV64ZVE32F-NEXT: andi a0, a6, 4
+; RV64ZVE32F-NEXT: sd t0, 0(a6)
+; RV64ZVE32F-NEXT: andi a0, a7, 4
; RV64ZVE32F-NEXT: beqz a0, .LBB38_3
; RV64ZVE32F-NEXT: .LBB38_7: # %cond.store3
; RV64ZVE32F-NEXT: sd a5, 0(a4)
-; RV64ZVE32F-NEXT: andi a0, a6, 8
+; RV64ZVE32F-NEXT: andi a0, a7, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB38_4
; RV64ZVE32F-NEXT: .LBB38_8: # %cond.store5
; RV64ZVE32F-NEXT: sd a3, 0(a2)
@@ -3078,46 +3078,46 @@ define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) {
;
; RV32ZVE32F-LABEL: mscatter_truemask_v4i64:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: lw a1, 28(a0)
-; RV32ZVE32F-NEXT: lw a2, 24(a0)
-; RV32ZVE32F-NEXT: lw a3, 20(a0)
-; RV32ZVE32F-NEXT: lw a4, 16(a0)
-; RV32ZVE32F-NEXT: lw a5, 12(a0)
-; RV32ZVE32F-NEXT: lw a6, 0(a0)
-; RV32ZVE32F-NEXT: lw a7, 4(a0)
-; RV32ZVE32F-NEXT: lw a0, 8(a0)
+; RV32ZVE32F-NEXT: lw a1, 16(a0)
+; RV32ZVE32F-NEXT: lw a2, 20(a0)
+; RV32ZVE32F-NEXT: lw a3, 24(a0)
+; RV32ZVE32F-NEXT: lw a4, 28(a0)
+; RV32ZVE32F-NEXT: lw a5, 0(a0)
+; RV32ZVE32F-NEXT: lw a6, 4(a0)
+; RV32ZVE32F-NEXT: lw a7, 8(a0)
+; RV32ZVE32F-NEXT: lw a0, 12(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v8
-; RV32ZVE32F-NEXT: sw a6, 0(t0)
-; RV32ZVE32F-NEXT: sw a7, 4(t0)
+; RV32ZVE32F-NEXT: sw a5, 0(t0)
+; RV32ZVE32F-NEXT: sw a6, 4(t0)
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
-; RV32ZVE32F-NEXT: vmv.x.s a6, v9
-; RV32ZVE32F-NEXT: sw a0, 0(a6)
-; RV32ZVE32F-NEXT: sw a5, 4(a6)
+; RV32ZVE32F-NEXT: vmv.x.s a5, v9
+; RV32ZVE32F-NEXT: sw a7, 0(a5)
+; RV32ZVE32F-NEXT: sw a0, 4(a5)
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-NEXT: sw a4, 0(a0)
-; RV32ZVE32F-NEXT: sw a3, 4(a0)
+; RV32ZVE32F-NEXT: sw a1, 0(a0)
+; RV32ZVE32F-NEXT: sw a2, 4(a0)
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: sw a2, 0(a0)
-; RV32ZVE32F-NEXT: sw a1, 4(a0)
+; RV32ZVE32F-NEXT: sw a3, 0(a0)
+; RV32ZVE32F-NEXT: sw a4, 4(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_truemask_v4i64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a2, 24(a1)
-; RV64ZVE32F-NEXT: ld a3, 16(a1)
-; RV64ZVE32F-NEXT: ld a4, 8(a1)
-; RV64ZVE32F-NEXT: ld a1, 0(a1)
+; RV64ZVE32F-NEXT: ld a2, 0(a1)
+; RV64ZVE32F-NEXT: ld a3, 8(a1)
+; RV64ZVE32F-NEXT: ld a4, 16(a1)
+; RV64ZVE32F-NEXT: ld a1, 24(a1)
; RV64ZVE32F-NEXT: ld a5, 0(a0)
; RV64ZVE32F-NEXT: ld a6, 8(a0)
; RV64ZVE32F-NEXT: ld a7, 16(a0)
; RV64ZVE32F-NEXT: ld a0, 24(a0)
-; RV64ZVE32F-NEXT: sd a5, 0(a1)
-; RV64ZVE32F-NEXT: sd a6, 0(a4)
-; RV64ZVE32F-NEXT: sd a7, 0(a3)
-; RV64ZVE32F-NEXT: sd a0, 0(a2)
+; RV64ZVE32F-NEXT: sd a5, 0(a2)
+; RV64ZVE32F-NEXT: sd a6, 0(a3)
+; RV64ZVE32F-NEXT: sd a7, 0(a4)
+; RV64ZVE32F-NEXT: sd a0, 0(a1)
; RV64ZVE32F-NEXT: ret
call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1))
ret void
@@ -3156,51 +3156,51 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: .cfi_offset s2, -12
-; RV32ZVE32F-NEXT: lw a1, 60(a0)
-; RV32ZVE32F-NEXT: lw a2, 56(a0)
-; RV32ZVE32F-NEXT: lw a3, 52(a0)
-; RV32ZVE32F-NEXT: lw a4, 48(a0)
-; RV32ZVE32F-NEXT: lw a5, 44(a0)
-; RV32ZVE32F-NEXT: lw a7, 40(a0)
-; RV32ZVE32F-NEXT: lw t0, 36(a0)
-; RV32ZVE32F-NEXT: lw t1, 32(a0)
-; RV32ZVE32F-NEXT: lw t2, 28(a0)
-; RV32ZVE32F-NEXT: lw t3, 24(a0)
-; RV32ZVE32F-NEXT: lw t4, 20(a0)
-; RV32ZVE32F-NEXT: lw t5, 16(a0)
-; RV32ZVE32F-NEXT: lw s0, 12(a0)
+; RV32ZVE32F-NEXT: lw a1, 56(a0)
+; RV32ZVE32F-NEXT: lw a2, 60(a0)
+; RV32ZVE32F-NEXT: lw a5, 40(a0)
+; RV32ZVE32F-NEXT: lw a6, 44(a0)
+; RV32ZVE32F-NEXT: lw a3, 48(a0)
+; RV32ZVE32F-NEXT: lw a4, 52(a0)
+; RV32ZVE32F-NEXT: lw t2, 24(a0)
+; RV32ZVE32F-NEXT: lw t3, 28(a0)
+; RV32ZVE32F-NEXT: lw t0, 32(a0)
+; RV32ZVE32F-NEXT: lw t1, 36(a0)
; RV32ZVE32F-NEXT: lw t6, 8(a0)
+; RV32ZVE32F-NEXT: lw s0, 12(a0)
+; RV32ZVE32F-NEXT: lw t4, 16(a0)
+; RV32ZVE32F-NEXT: lw t5, 20(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32ZVE32F-NEXT: vmv.x.s a6, v0
-; RV32ZVE32F-NEXT: andi s1, a6, 1
+; RV32ZVE32F-NEXT: vmv.x.s a7, v0
+; RV32ZVE32F-NEXT: andi s1, a7, 1
; RV32ZVE32F-NEXT: bnez s1, .LBB41_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
-; RV32ZVE32F-NEXT: andi a0, a6, 2
+; RV32ZVE32F-NEXT: andi a0, a7, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB41_11
; RV32ZVE32F-NEXT: .LBB41_2: # %else2
-; RV32ZVE32F-NEXT: andi a0, a6, 4
+; RV32ZVE32F-NEXT: andi a0, a7, 4
; RV32ZVE32F-NEXT: bnez a0, .LBB41_12
; RV32ZVE32F-NEXT: .LBB41_3: # %else4
-; RV32ZVE32F-NEXT: andi a0, a6, 8
+; RV32ZVE32F-NEXT: andi a0, a7, 8
; RV32ZVE32F-NEXT: bnez a0, .LBB41_13
; RV32ZVE32F-NEXT: .LBB41_4: # %else6
-; RV32ZVE32F-NEXT: andi a0, a6, 16
+; RV32ZVE32F-NEXT: andi a0, a7, 16
; RV32ZVE32F-NEXT: bnez a0, .LBB41_14
; RV32ZVE32F-NEXT: .LBB41_5: # %else8
-; RV32ZVE32F-NEXT: andi a0, a6, 32
+; RV32ZVE32F-NEXT: andi a0, a7, 32
; RV32ZVE32F-NEXT: bnez a0, .LBB41_15
; RV32ZVE32F-NEXT: .LBB41_6: # %else10
-; RV32ZVE32F-NEXT: andi a0, a6, 64
+; RV32ZVE32F-NEXT: andi a0, a7, 64
; RV32ZVE32F-NEXT: bnez a0, .LBB41_16
; RV32ZVE32F-NEXT: .LBB41_7: # %else12
-; RV32ZVE32F-NEXT: andi a0, a6, -128
+; RV32ZVE32F-NEXT: andi a0, a7, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB41_9
; RV32ZVE32F-NEXT: .LBB41_8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: sw a2, 0(a0)
-; RV32ZVE32F-NEXT: sw a1, 4(a0)
+; RV32ZVE32F-NEXT: sw a1, 0(a0)
+; RV32ZVE32F-NEXT: sw a2, 4(a0)
; RV32ZVE32F-NEXT: .LBB41_9: # %else14
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
@@ -3214,7 +3214,7 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw s1, 4(s2)
; RV32ZVE32F-NEXT: sw a0, 0(s2)
-; RV32ZVE32F-NEXT: andi a0, a6, 2
+; RV32ZVE32F-NEXT: andi a0, a7, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB41_2
; RV32ZVE32F-NEXT: .LBB41_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -3222,47 +3222,47 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s0, 4(a0)
; RV32ZVE32F-NEXT: sw t6, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a6, 4
+; RV32ZVE32F-NEXT: andi a0, a7, 4
; RV32ZVE32F-NEXT: beqz a0, .LBB41_3
; RV32ZVE32F-NEXT: .LBB41_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t5, 0(a0)
-; RV32ZVE32F-NEXT: sw t4, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a6, 8
+; RV32ZVE32F-NEXT: sw t4, 0(a0)
+; RV32ZVE32F-NEXT: sw t5, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, a7, 8
; RV32ZVE32F-NEXT: beqz a0, .LBB41_4
; RV32ZVE32F-NEXT: .LBB41_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t3, 0(a0)
-; RV32ZVE32F-NEXT: sw t2, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a6, 16
+; RV32ZVE32F-NEXT: sw t2, 0(a0)
+; RV32ZVE32F-NEXT: sw t3, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, a7, 16
; RV32ZVE32F-NEXT: beqz a0, .LBB41_5
; RV32ZVE32F-NEXT: .LBB41_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t1, 0(a0)
-; RV32ZVE32F-NEXT: sw t0, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a6, 32
+; RV32ZVE32F-NEXT: sw t0, 0(a0)
+; RV32ZVE32F-NEXT: sw t1, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, a7, 32
; RV32ZVE32F-NEXT: beqz a0, .LBB41_6
; RV32ZVE32F-NEXT: .LBB41_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a7, 0(a0)
-; RV32ZVE32F-NEXT: sw a5, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a6, 64
+; RV32ZVE32F-NEXT: sw a5, 0(a0)
+; RV32ZVE32F-NEXT: sw a6, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, a7, 64
; RV32ZVE32F-NEXT: beqz a0, .LBB41_7
; RV32ZVE32F-NEXT: .LBB41_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a4, 0(a0)
-; RV32ZVE32F-NEXT: sw a3, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a6, -128
+; RV32ZVE32F-NEXT: sw a3, 0(a0)
+; RV32ZVE32F-NEXT: sw a4, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, a7, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB41_8
; RV32ZVE32F-NEXT: j .LBB41_9
;
@@ -3276,47 +3276,47 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: .cfi_offset s0, -8
; RV64ZVE32F-NEXT: .cfi_offset s1, -16
; RV64ZVE32F-NEXT: .cfi_offset s2, -24
+; RV64ZVE32F-NEXT: ld a4, 40(a1)
+; RV64ZVE32F-NEXT: ld a3, 48(a1)
; RV64ZVE32F-NEXT: ld a2, 56(a1)
-; RV64ZVE32F-NEXT: ld a4, 48(a1)
-; RV64ZVE32F-NEXT: ld a6, 40(a1)
-; RV64ZVE32F-NEXT: ld t1, 32(a1)
-; RV64ZVE32F-NEXT: ld t3, 24(a1)
-; RV64ZVE32F-NEXT: ld t5, 16(a1)
-; RV64ZVE32F-NEXT: ld s0, 8(a1)
-; RV64ZVE32F-NEXT: ld a3, 56(a0)
-; RV64ZVE32F-NEXT: ld a5, 48(a0)
-; RV64ZVE32F-NEXT: ld t0, 40(a0)
-; RV64ZVE32F-NEXT: ld t2, 32(a0)
-; RV64ZVE32F-NEXT: ld t4, 24(a0)
-; RV64ZVE32F-NEXT: ld t6, 16(a0)
+; RV64ZVE32F-NEXT: ld t5, 8(a1)
+; RV64ZVE32F-NEXT: ld t3, 16(a1)
+; RV64ZVE32F-NEXT: ld t2, 24(a1)
+; RV64ZVE32F-NEXT: ld t0, 32(a1)
+; RV64ZVE32F-NEXT: ld a7, 40(a0)
+; RV64ZVE32F-NEXT: ld a6, 48(a0)
+; RV64ZVE32F-NEXT: ld a5, 56(a0)
; RV64ZVE32F-NEXT: ld s1, 8(a0)
+; RV64ZVE32F-NEXT: ld s0, 16(a0)
+; RV64ZVE32F-NEXT: ld t6, 24(a0)
+; RV64ZVE32F-NEXT: ld t4, 32(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a7, v0
-; RV64ZVE32F-NEXT: andi s2, a7, 1
+; RV64ZVE32F-NEXT: vmv.x.s t1, v0
+; RV64ZVE32F-NEXT: andi s2, t1, 1
; RV64ZVE32F-NEXT: bnez s2, .LBB41_10
; RV64ZVE32F-NEXT: # %bb.1: # %else
-; RV64ZVE32F-NEXT: andi a0, a7, 2
+; RV64ZVE32F-NEXT: andi a0, t1, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB41_11
; RV64ZVE32F-NEXT: .LBB41_2: # %else2
-; RV64ZVE32F-NEXT: andi a0, a7, 4
+; RV64ZVE32F-NEXT: andi a0, t1, 4
; RV64ZVE32F-NEXT: bnez a0, .LBB41_12
; RV64ZVE32F-NEXT: .LBB41_3: # %else4
-; RV64ZVE32F-NEXT: andi a0, a7, 8
+; RV64ZVE32F-NEXT: andi a0, t1, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB41_13
; RV64ZVE32F-NEXT: .LBB41_4: # %else6
-; RV64ZVE32F-NEXT: andi a0, a7, 16
+; RV64ZVE32F-NEXT: andi a0, t1, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB41_14
; RV64ZVE32F-NEXT: .LBB41_5: # %else8
-; RV64ZVE32F-NEXT: andi a0, a7, 32
+; RV64ZVE32F-NEXT: andi a0, t1, 32
; RV64ZVE32F-NEXT: bnez a0, .LBB41_15
; RV64ZVE32F-NEXT: .LBB41_6: # %else10
-; RV64ZVE32F-NEXT: andi a0, a7, 64
+; RV64ZVE32F-NEXT: andi a0, t1, 64
; RV64ZVE32F-NEXT: bnez a0, .LBB41_16
; RV64ZVE32F-NEXT: .LBB41_7: # %else12
-; RV64ZVE32F-NEXT: andi a0, a7, -128
+; RV64ZVE32F-NEXT: andi a0, t1, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB41_9
; RV64ZVE32F-NEXT: .LBB41_8: # %cond.store13
-; RV64ZVE32F-NEXT: sd a3, 0(a2)
+; RV64ZVE32F-NEXT: sd a5, 0(a2)
; RV64ZVE32F-NEXT: .LBB41_9: # %else14
; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
@@ -3327,31 +3327,31 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: sd a0, 0(a1)
-; RV64ZVE32F-NEXT: andi a0, a7, 2
+; RV64ZVE32F-NEXT: andi a0, t1, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB41_2
; RV64ZVE32F-NEXT: .LBB41_11: # %cond.store1
-; RV64ZVE32F-NEXT: sd s1, 0(s0)
-; RV64ZVE32F-NEXT: andi a0, a7, 4
+; RV64ZVE32F-NEXT: sd s1, 0(t5)
+; RV64ZVE32F-NEXT: andi a0, t1, 4
; RV64ZVE32F-NEXT: beqz a0, .LBB41_3
; RV64ZVE32F-NEXT: .LBB41_12: # %cond.store3
-; RV64ZVE32F-NEXT: sd t6, 0(t5)
-; RV64ZVE32F-NEXT: andi a0, a7, 8
+; RV64ZVE32F-NEXT: sd s0, 0(t3)
+; RV64ZVE32F-NEXT: andi a0, t1, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB41_4
; RV64ZVE32F-NEXT: .LBB41_13: # %cond.store5
-; RV64ZVE32F-NEXT: sd t4, 0(t3)
-; RV64ZVE32F-NEXT: andi a0, a7, 16
+; RV64ZVE32F-NEXT: sd t6, 0(t2)
+; RV64ZVE32F-NEXT: andi a0, t1, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB41_5
; RV64ZVE32F-NEXT: .LBB41_14: # %cond.store7
-; RV64ZVE32F-NEXT: sd t2, 0(t1)
-; RV64ZVE32F-NEXT: andi a0, a7, 32
+; RV64ZVE32F-NEXT: sd t4, 0(t0)
+; RV64ZVE32F-NEXT: andi a0, t1, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB41_6
; RV64ZVE32F-NEXT: .LBB41_15: # %cond.store9
-; RV64ZVE32F-NEXT: sd t0, 0(a6)
-; RV64ZVE32F-NEXT: andi a0, a7, 64
+; RV64ZVE32F-NEXT: sd a7, 0(a4)
+; RV64ZVE32F-NEXT: andi a0, t1, 64
; RV64ZVE32F-NEXT: beqz a0, .LBB41_7
; RV64ZVE32F-NEXT: .LBB41_16: # %cond.store11
-; RV64ZVE32F-NEXT: sd a5, 0(a4)
-; RV64ZVE32F-NEXT: andi a0, a7, -128
+; RV64ZVE32F-NEXT: sd a6, 0(a3)
+; RV64ZVE32F-NEXT: andi a0, t1, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB41_8
; RV64ZVE32F-NEXT: j .LBB41_9
call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
@@ -3386,20 +3386,20 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: .cfi_offset s2, -12
-; RV32ZVE32F-NEXT: lw a2, 60(a0)
-; RV32ZVE32F-NEXT: lw a3, 56(a0)
-; RV32ZVE32F-NEXT: lw a4, 52(a0)
-; RV32ZVE32F-NEXT: lw a5, 48(a0)
-; RV32ZVE32F-NEXT: lw a6, 44(a0)
-; RV32ZVE32F-NEXT: lw a7, 40(a0)
-; RV32ZVE32F-NEXT: lw t1, 36(a0)
-; RV32ZVE32F-NEXT: lw t2, 32(a0)
-; RV32ZVE32F-NEXT: lw t3, 28(a0)
-; RV32ZVE32F-NEXT: lw t4, 24(a0)
-; RV32ZVE32F-NEXT: lw t5, 20(a0)
-; RV32ZVE32F-NEXT: lw t6, 16(a0)
-; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw a2, 56(a0)
+; RV32ZVE32F-NEXT: lw a3, 60(a0)
+; RV32ZVE32F-NEXT: lw a6, 40(a0)
+; RV32ZVE32F-NEXT: lw a7, 44(a0)
+; RV32ZVE32F-NEXT: lw a4, 48(a0)
+; RV32ZVE32F-NEXT: lw a5, 52(a0)
+; RV32ZVE32F-NEXT: lw t3, 24(a0)
+; RV32ZVE32F-NEXT: lw t4, 28(a0)
+; RV32ZVE32F-NEXT: lw t1, 32(a0)
+; RV32ZVE32F-NEXT: lw t2, 36(a0)
; RV32ZVE32F-NEXT: lw s0, 8(a0)
+; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw t5, 16(a0)
+; RV32ZVE32F-NEXT: lw t6, 20(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
@@ -3434,8 +3434,8 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a2, 4(a0)
+; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: .LBB42_9: # %else14
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
@@ -3462,56 +3462,56 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t6, 0(a0)
-; RV32ZVE32F-NEXT: sw t5, 4(a0)
+; RV32ZVE32F-NEXT: sw t5, 0(a0)
+; RV32ZVE32F-NEXT: sw t6, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 8
; RV32ZVE32F-NEXT: beqz a0, .LBB42_4
; RV32ZVE32F-NEXT: .LBB42_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t4, 0(a0)
-; RV32ZVE32F-NEXT: sw t3, 4(a0)
+; RV32ZVE32F-NEXT: sw t3, 0(a0)
+; RV32ZVE32F-NEXT: sw t4, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 16
; RV32ZVE32F-NEXT: beqz a0, .LBB42_5
; RV32ZVE32F-NEXT: .LBB42_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t2, 0(a0)
-; RV32ZVE32F-NEXT: sw t1, 4(a0)
+; RV32ZVE32F-NEXT: sw t1, 0(a0)
+; RV32ZVE32F-NEXT: sw t2, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 32
; RV32ZVE32F-NEXT: beqz a0, .LBB42_6
; RV32ZVE32F-NEXT: .LBB42_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a7, 0(a0)
-; RV32ZVE32F-NEXT: sw a6, 4(a0)
+; RV32ZVE32F-NEXT: sw a6, 0(a0)
+; RV32ZVE32F-NEXT: sw a7, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 64
; RV32ZVE32F-NEXT: beqz a0, .LBB42_7
; RV32ZVE32F-NEXT: .LBB42_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a5, 0(a0)
-; RV32ZVE32F-NEXT: sw a4, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 0(a0)
+; RV32ZVE32F-NEXT: sw a5, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB42_8
; RV32ZVE32F-NEXT: j .LBB42_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a2, 56(a0)
+; RV64ZVE32F-NEXT: ld a4, 40(a0)
; RV64ZVE32F-NEXT: ld a3, 48(a0)
-; RV64ZVE32F-NEXT: ld a5, 40(a0)
-; RV64ZVE32F-NEXT: ld a6, 32(a0)
-; RV64ZVE32F-NEXT: ld a7, 24(a0)
-; RV64ZVE32F-NEXT: ld t0, 16(a0)
+; RV64ZVE32F-NEXT: ld a2, 56(a0)
; RV64ZVE32F-NEXT: ld t1, 8(a0)
+; RV64ZVE32F-NEXT: ld t0, 16(a0)
+; RV64ZVE32F-NEXT: ld a7, 24(a0)
+; RV64ZVE32F-NEXT: ld a6, 32(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a4, v0
-; RV64ZVE32F-NEXT: andi t2, a4, 1
+; RV64ZVE32F-NEXT: vmv.x.s a5, v0
+; RV64ZVE32F-NEXT: andi t2, a5, 1
; RV64ZVE32F-NEXT: beqz t2, .LBB42_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
@@ -3520,7 +3520,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: sd a0, 0(t2)
; RV64ZVE32F-NEXT: .LBB42_2: # %else
-; RV64ZVE32F-NEXT: andi a0, a4, 2
+; RV64ZVE32F-NEXT: andi a0, a5, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB42_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
@@ -3532,31 +3532,31 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: .LBB42_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a0, a4, 4
+; RV64ZVE32F-NEXT: andi a0, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB42_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB42_13
; RV64ZVE32F-NEXT: .LBB42_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB42_14
; RV64ZVE32F-NEXT: .LBB42_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB42_9
; RV64ZVE32F-NEXT: .LBB42_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a5, 0(a0)
+; RV64ZVE32F-NEXT: sd a4, 0(a0)
; RV64ZVE32F-NEXT: .LBB42_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: andi a0, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB42_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB42_16
; RV64ZVE32F-NEXT: .LBB42_11: # %else14
; RV64ZVE32F-NEXT: ret
@@ -3565,7 +3565,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB42_6
; RV64ZVE32F-NEXT: .LBB42_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -3573,14 +3573,14 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB42_7
; RV64ZVE32F-NEXT: .LBB42_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: bnez a0, .LBB42_8
; RV64ZVE32F-NEXT: j .LBB42_9
; RV64ZVE32F-NEXT: .LBB42_15: # %cond.store11
@@ -3588,7 +3588,7 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB42_11
; RV64ZVE32F-NEXT: .LBB42_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -3630,20 +3630,20 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: .cfi_offset s2, -12
-; RV32ZVE32F-NEXT: lw a2, 60(a0)
-; RV32ZVE32F-NEXT: lw a3, 56(a0)
-; RV32ZVE32F-NEXT: lw a4, 52(a0)
-; RV32ZVE32F-NEXT: lw a5, 48(a0)
-; RV32ZVE32F-NEXT: lw a6, 44(a0)
-; RV32ZVE32F-NEXT: lw a7, 40(a0)
-; RV32ZVE32F-NEXT: lw t1, 36(a0)
-; RV32ZVE32F-NEXT: lw t2, 32(a0)
-; RV32ZVE32F-NEXT: lw t3, 28(a0)
-; RV32ZVE32F-NEXT: lw t4, 24(a0)
-; RV32ZVE32F-NEXT: lw t5, 20(a0)
-; RV32ZVE32F-NEXT: lw t6, 16(a0)
-; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw a2, 56(a0)
+; RV32ZVE32F-NEXT: lw a3, 60(a0)
+; RV32ZVE32F-NEXT: lw a6, 40(a0)
+; RV32ZVE32F-NEXT: lw a7, 44(a0)
+; RV32ZVE32F-NEXT: lw a4, 48(a0)
+; RV32ZVE32F-NEXT: lw a5, 52(a0)
+; RV32ZVE32F-NEXT: lw t3, 24(a0)
+; RV32ZVE32F-NEXT: lw t4, 28(a0)
+; RV32ZVE32F-NEXT: lw t1, 32(a0)
+; RV32ZVE32F-NEXT: lw t2, 36(a0)
; RV32ZVE32F-NEXT: lw s0, 8(a0)
+; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw t5, 16(a0)
+; RV32ZVE32F-NEXT: lw t6, 20(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
@@ -3678,8 +3678,8 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a2, 4(a0)
+; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: .LBB43_9: # %else14
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
@@ -3706,56 +3706,56 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t6, 0(a0)
-; RV32ZVE32F-NEXT: sw t5, 4(a0)
+; RV32ZVE32F-NEXT: sw t5, 0(a0)
+; RV32ZVE32F-NEXT: sw t6, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 8
; RV32ZVE32F-NEXT: beqz a0, .LBB43_4
; RV32ZVE32F-NEXT: .LBB43_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t4, 0(a0)
-; RV32ZVE32F-NEXT: sw t3, 4(a0)
+; RV32ZVE32F-NEXT: sw t3, 0(a0)
+; RV32ZVE32F-NEXT: sw t4, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 16
; RV32ZVE32F-NEXT: beqz a0, .LBB43_5
; RV32ZVE32F-NEXT: .LBB43_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t2, 0(a0)
-; RV32ZVE32F-NEXT: sw t1, 4(a0)
+; RV32ZVE32F-NEXT: sw t1, 0(a0)
+; RV32ZVE32F-NEXT: sw t2, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 32
; RV32ZVE32F-NEXT: beqz a0, .LBB43_6
; RV32ZVE32F-NEXT: .LBB43_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a7, 0(a0)
-; RV32ZVE32F-NEXT: sw a6, 4(a0)
+; RV32ZVE32F-NEXT: sw a6, 0(a0)
+; RV32ZVE32F-NEXT: sw a7, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 64
; RV32ZVE32F-NEXT: beqz a0, .LBB43_7
; RV32ZVE32F-NEXT: .LBB43_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a5, 0(a0)
-; RV32ZVE32F-NEXT: sw a4, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 0(a0)
+; RV32ZVE32F-NEXT: sw a5, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB43_8
; RV32ZVE32F-NEXT: j .LBB43_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a2, 56(a0)
+; RV64ZVE32F-NEXT: ld a4, 40(a0)
; RV64ZVE32F-NEXT: ld a3, 48(a0)
-; RV64ZVE32F-NEXT: ld a5, 40(a0)
-; RV64ZVE32F-NEXT: ld a6, 32(a0)
-; RV64ZVE32F-NEXT: ld a7, 24(a0)
-; RV64ZVE32F-NEXT: ld t0, 16(a0)
+; RV64ZVE32F-NEXT: ld a2, 56(a0)
; RV64ZVE32F-NEXT: ld t1, 8(a0)
+; RV64ZVE32F-NEXT: ld t0, 16(a0)
+; RV64ZVE32F-NEXT: ld a7, 24(a0)
+; RV64ZVE32F-NEXT: ld a6, 32(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a4, v0
-; RV64ZVE32F-NEXT: andi t2, a4, 1
+; RV64ZVE32F-NEXT: vmv.x.s a5, v0
+; RV64ZVE32F-NEXT: andi t2, a5, 1
; RV64ZVE32F-NEXT: beqz t2, .LBB43_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
@@ -3764,7 +3764,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: sd a0, 0(t2)
; RV64ZVE32F-NEXT: .LBB43_2: # %else
-; RV64ZVE32F-NEXT: andi a0, a4, 2
+; RV64ZVE32F-NEXT: andi a0, a5, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB43_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
@@ -3776,31 +3776,31 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: .LBB43_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a0, a4, 4
+; RV64ZVE32F-NEXT: andi a0, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB43_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB43_13
; RV64ZVE32F-NEXT: .LBB43_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB43_14
; RV64ZVE32F-NEXT: .LBB43_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB43_9
; RV64ZVE32F-NEXT: .LBB43_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a5, 0(a0)
+; RV64ZVE32F-NEXT: sd a4, 0(a0)
; RV64ZVE32F-NEXT: .LBB43_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: andi a0, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB43_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB43_16
; RV64ZVE32F-NEXT: .LBB43_11: # %else14
; RV64ZVE32F-NEXT: ret
@@ -3809,7 +3809,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB43_6
; RV64ZVE32F-NEXT: .LBB43_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -3817,14 +3817,14 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB43_7
; RV64ZVE32F-NEXT: .LBB43_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: bnez a0, .LBB43_8
; RV64ZVE32F-NEXT: j .LBB43_9
; RV64ZVE32F-NEXT: .LBB43_15: # %cond.store11
@@ -3832,7 +3832,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB43_11
; RV64ZVE32F-NEXT: .LBB43_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -3876,20 +3876,20 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: .cfi_offset s2, -12
-; RV32ZVE32F-NEXT: lw a2, 60(a0)
-; RV32ZVE32F-NEXT: lw a3, 56(a0)
-; RV32ZVE32F-NEXT: lw a4, 52(a0)
-; RV32ZVE32F-NEXT: lw a5, 48(a0)
-; RV32ZVE32F-NEXT: lw a6, 44(a0)
-; RV32ZVE32F-NEXT: lw a7, 40(a0)
-; RV32ZVE32F-NEXT: lw t1, 36(a0)
-; RV32ZVE32F-NEXT: lw t2, 32(a0)
-; RV32ZVE32F-NEXT: lw t3, 28(a0)
-; RV32ZVE32F-NEXT: lw t4, 24(a0)
-; RV32ZVE32F-NEXT: lw t5, 20(a0)
-; RV32ZVE32F-NEXT: lw t6, 16(a0)
-; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw a2, 56(a0)
+; RV32ZVE32F-NEXT: lw a3, 60(a0)
+; RV32ZVE32F-NEXT: lw a6, 40(a0)
+; RV32ZVE32F-NEXT: lw a7, 44(a0)
+; RV32ZVE32F-NEXT: lw a4, 48(a0)
+; RV32ZVE32F-NEXT: lw a5, 52(a0)
+; RV32ZVE32F-NEXT: lw t3, 24(a0)
+; RV32ZVE32F-NEXT: lw t4, 28(a0)
+; RV32ZVE32F-NEXT: lw t1, 32(a0)
+; RV32ZVE32F-NEXT: lw t2, 36(a0)
; RV32ZVE32F-NEXT: lw s0, 8(a0)
+; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw t5, 16(a0)
+; RV32ZVE32F-NEXT: lw t6, 20(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vzext.vf4 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
@@ -3924,8 +3924,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a2, 4(a0)
+; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: .LBB44_9: # %else14
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
@@ -3952,56 +3952,56 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t6, 0(a0)
-; RV32ZVE32F-NEXT: sw t5, 4(a0)
+; RV32ZVE32F-NEXT: sw t5, 0(a0)
+; RV32ZVE32F-NEXT: sw t6, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 8
; RV32ZVE32F-NEXT: beqz a0, .LBB44_4
; RV32ZVE32F-NEXT: .LBB44_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t4, 0(a0)
-; RV32ZVE32F-NEXT: sw t3, 4(a0)
+; RV32ZVE32F-NEXT: sw t3, 0(a0)
+; RV32ZVE32F-NEXT: sw t4, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 16
; RV32ZVE32F-NEXT: beqz a0, .LBB44_5
; RV32ZVE32F-NEXT: .LBB44_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t2, 0(a0)
-; RV32ZVE32F-NEXT: sw t1, 4(a0)
+; RV32ZVE32F-NEXT: sw t1, 0(a0)
+; RV32ZVE32F-NEXT: sw t2, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 32
; RV32ZVE32F-NEXT: beqz a0, .LBB44_6
; RV32ZVE32F-NEXT: .LBB44_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a7, 0(a0)
-; RV32ZVE32F-NEXT: sw a6, 4(a0)
+; RV32ZVE32F-NEXT: sw a6, 0(a0)
+; RV32ZVE32F-NEXT: sw a7, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 64
; RV32ZVE32F-NEXT: beqz a0, .LBB44_7
; RV32ZVE32F-NEXT: .LBB44_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a5, 0(a0)
-; RV32ZVE32F-NEXT: sw a4, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 0(a0)
+; RV32ZVE32F-NEXT: sw a5, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB44_8
; RV32ZVE32F-NEXT: j .LBB44_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a2, 56(a0)
+; RV64ZVE32F-NEXT: ld a4, 40(a0)
; RV64ZVE32F-NEXT: ld a3, 48(a0)
-; RV64ZVE32F-NEXT: ld a5, 40(a0)
-; RV64ZVE32F-NEXT: ld a6, 32(a0)
-; RV64ZVE32F-NEXT: ld a7, 24(a0)
-; RV64ZVE32F-NEXT: ld t0, 16(a0)
+; RV64ZVE32F-NEXT: ld a2, 56(a0)
; RV64ZVE32F-NEXT: ld t1, 8(a0)
+; RV64ZVE32F-NEXT: ld t0, 16(a0)
+; RV64ZVE32F-NEXT: ld a7, 24(a0)
+; RV64ZVE32F-NEXT: ld a6, 32(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a4, v0
-; RV64ZVE32F-NEXT: andi t2, a4, 1
+; RV64ZVE32F-NEXT: vmv.x.s a5, v0
+; RV64ZVE32F-NEXT: andi t2, a5, 1
; RV64ZVE32F-NEXT: beqz t2, .LBB44_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
@@ -4011,7 +4011,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: sd a0, 0(t2)
; RV64ZVE32F-NEXT: .LBB44_2: # %else
-; RV64ZVE32F-NEXT: andi a0, a4, 2
+; RV64ZVE32F-NEXT: andi a0, a5, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB44_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
@@ -4024,18 +4024,18 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: .LBB44_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a0, a4, 4
+; RV64ZVE32F-NEXT: andi a0, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB44_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB44_13
; RV64ZVE32F-NEXT: .LBB44_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB44_14
; RV64ZVE32F-NEXT: .LBB44_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB44_9
; RV64ZVE32F-NEXT: .LBB44_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
@@ -4043,13 +4043,13 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: andi a0, a0, 255
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a5, 0(a0)
+; RV64ZVE32F-NEXT: sd a4, 0(a0)
; RV64ZVE32F-NEXT: .LBB44_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: andi a0, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB44_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB44_16
; RV64ZVE32F-NEXT: .LBB44_11: # %else14
; RV64ZVE32F-NEXT: ret
@@ -4059,7 +4059,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB44_6
; RV64ZVE32F-NEXT: .LBB44_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -4068,7 +4068,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB44_7
; RV64ZVE32F-NEXT: .LBB44_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
@@ -4076,7 +4076,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: bnez a0, .LBB44_8
; RV64ZVE32F-NEXT: j .LBB44_9
; RV64ZVE32F-NEXT: .LBB44_15: # %cond.store11
@@ -4085,7 +4085,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB44_11
; RV64ZVE32F-NEXT: .LBB44_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -4129,20 +4129,20 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: .cfi_offset s2, -12
-; RV32ZVE32F-NEXT: lw a2, 60(a0)
-; RV32ZVE32F-NEXT: lw a3, 56(a0)
-; RV32ZVE32F-NEXT: lw a4, 52(a0)
-; RV32ZVE32F-NEXT: lw a5, 48(a0)
-; RV32ZVE32F-NEXT: lw a6, 44(a0)
-; RV32ZVE32F-NEXT: lw a7, 40(a0)
-; RV32ZVE32F-NEXT: lw t1, 36(a0)
-; RV32ZVE32F-NEXT: lw t2, 32(a0)
-; RV32ZVE32F-NEXT: lw t3, 28(a0)
-; RV32ZVE32F-NEXT: lw t4, 24(a0)
-; RV32ZVE32F-NEXT: lw t5, 20(a0)
-; RV32ZVE32F-NEXT: lw t6, 16(a0)
-; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw a2, 56(a0)
+; RV32ZVE32F-NEXT: lw a3, 60(a0)
+; RV32ZVE32F-NEXT: lw a6, 40(a0)
+; RV32ZVE32F-NEXT: lw a7, 44(a0)
+; RV32ZVE32F-NEXT: lw a4, 48(a0)
+; RV32ZVE32F-NEXT: lw a5, 52(a0)
+; RV32ZVE32F-NEXT: lw t3, 24(a0)
+; RV32ZVE32F-NEXT: lw t4, 28(a0)
+; RV32ZVE32F-NEXT: lw t1, 32(a0)
+; RV32ZVE32F-NEXT: lw t2, 36(a0)
; RV32ZVE32F-NEXT: lw s0, 8(a0)
+; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw t5, 16(a0)
+; RV32ZVE32F-NEXT: lw t6, 20(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
@@ -4177,8 +4177,8 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a2, 4(a0)
+; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: .LBB45_9: # %else14
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
@@ -4205,56 +4205,56 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t6, 0(a0)
-; RV32ZVE32F-NEXT: sw t5, 4(a0)
+; RV32ZVE32F-NEXT: sw t5, 0(a0)
+; RV32ZVE32F-NEXT: sw t6, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 8
; RV32ZVE32F-NEXT: beqz a0, .LBB45_4
; RV32ZVE32F-NEXT: .LBB45_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t4, 0(a0)
-; RV32ZVE32F-NEXT: sw t3, 4(a0)
+; RV32ZVE32F-NEXT: sw t3, 0(a0)
+; RV32ZVE32F-NEXT: sw t4, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 16
; RV32ZVE32F-NEXT: beqz a0, .LBB45_5
; RV32ZVE32F-NEXT: .LBB45_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t2, 0(a0)
-; RV32ZVE32F-NEXT: sw t1, 4(a0)
+; RV32ZVE32F-NEXT: sw t1, 0(a0)
+; RV32ZVE32F-NEXT: sw t2, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 32
; RV32ZVE32F-NEXT: beqz a0, .LBB45_6
; RV32ZVE32F-NEXT: .LBB45_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a7, 0(a0)
-; RV32ZVE32F-NEXT: sw a6, 4(a0)
+; RV32ZVE32F-NEXT: sw a6, 0(a0)
+; RV32ZVE32F-NEXT: sw a7, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 64
; RV32ZVE32F-NEXT: beqz a0, .LBB45_7
; RV32ZVE32F-NEXT: .LBB45_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a5, 0(a0)
-; RV32ZVE32F-NEXT: sw a4, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 0(a0)
+; RV32ZVE32F-NEXT: sw a5, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB45_8
; RV32ZVE32F-NEXT: j .LBB45_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a2, 56(a0)
+; RV64ZVE32F-NEXT: ld a4, 40(a0)
; RV64ZVE32F-NEXT: ld a3, 48(a0)
-; RV64ZVE32F-NEXT: ld a5, 40(a0)
-; RV64ZVE32F-NEXT: ld a6, 32(a0)
-; RV64ZVE32F-NEXT: ld a7, 24(a0)
-; RV64ZVE32F-NEXT: ld t0, 16(a0)
+; RV64ZVE32F-NEXT: ld a2, 56(a0)
; RV64ZVE32F-NEXT: ld t1, 8(a0)
+; RV64ZVE32F-NEXT: ld t0, 16(a0)
+; RV64ZVE32F-NEXT: ld a7, 24(a0)
+; RV64ZVE32F-NEXT: ld a6, 32(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a4, v0
-; RV64ZVE32F-NEXT: andi t2, a4, 1
+; RV64ZVE32F-NEXT: vmv.x.s a5, v0
+; RV64ZVE32F-NEXT: andi t2, a5, 1
; RV64ZVE32F-NEXT: beqz t2, .LBB45_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
@@ -4264,7 +4264,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: sd a0, 0(t2)
; RV64ZVE32F-NEXT: .LBB45_2: # %else
-; RV64ZVE32F-NEXT: andi a0, a4, 2
+; RV64ZVE32F-NEXT: andi a0, a5, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB45_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
@@ -4276,31 +4276,31 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: .LBB45_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a0, a4, 4
+; RV64ZVE32F-NEXT: andi a0, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB45_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB45_13
; RV64ZVE32F-NEXT: .LBB45_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB45_14
; RV64ZVE32F-NEXT: .LBB45_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB45_9
; RV64ZVE32F-NEXT: .LBB45_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a5, 0(a0)
+; RV64ZVE32F-NEXT: sd a4, 0(a0)
; RV64ZVE32F-NEXT: .LBB45_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: andi a0, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB45_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB45_16
; RV64ZVE32F-NEXT: .LBB45_11: # %else14
; RV64ZVE32F-NEXT: ret
@@ -4309,7 +4309,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB45_6
; RV64ZVE32F-NEXT: .LBB45_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -4317,14 +4317,14 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB45_7
; RV64ZVE32F-NEXT: .LBB45_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: bnez a0, .LBB45_8
; RV64ZVE32F-NEXT: j .LBB45_9
; RV64ZVE32F-NEXT: .LBB45_15: # %cond.store11
@@ -4332,7 +4332,7 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB45_11
; RV64ZVE32F-NEXT: .LBB45_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -4374,20 +4374,20 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: .cfi_offset s2, -12
-; RV32ZVE32F-NEXT: lw a2, 60(a0)
-; RV32ZVE32F-NEXT: lw a3, 56(a0)
-; RV32ZVE32F-NEXT: lw a4, 52(a0)
-; RV32ZVE32F-NEXT: lw a5, 48(a0)
-; RV32ZVE32F-NEXT: lw a6, 44(a0)
-; RV32ZVE32F-NEXT: lw a7, 40(a0)
-; RV32ZVE32F-NEXT: lw t1, 36(a0)
-; RV32ZVE32F-NEXT: lw t2, 32(a0)
-; RV32ZVE32F-NEXT: lw t3, 28(a0)
-; RV32ZVE32F-NEXT: lw t4, 24(a0)
-; RV32ZVE32F-NEXT: lw t5, 20(a0)
-; RV32ZVE32F-NEXT: lw t6, 16(a0)
-; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw a2, 56(a0)
+; RV32ZVE32F-NEXT: lw a3, 60(a0)
+; RV32ZVE32F-NEXT: lw a6, 40(a0)
+; RV32ZVE32F-NEXT: lw a7, 44(a0)
+; RV32ZVE32F-NEXT: lw a4, 48(a0)
+; RV32ZVE32F-NEXT: lw a5, 52(a0)
+; RV32ZVE32F-NEXT: lw t3, 24(a0)
+; RV32ZVE32F-NEXT: lw t4, 28(a0)
+; RV32ZVE32F-NEXT: lw t1, 32(a0)
+; RV32ZVE32F-NEXT: lw t2, 36(a0)
; RV32ZVE32F-NEXT: lw s0, 8(a0)
+; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw t5, 16(a0)
+; RV32ZVE32F-NEXT: lw t6, 20(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
@@ -4422,8 +4422,8 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a2, 4(a0)
+; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: .LBB46_9: # %else14
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
@@ -4450,56 +4450,56 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t6, 0(a0)
-; RV32ZVE32F-NEXT: sw t5, 4(a0)
+; RV32ZVE32F-NEXT: sw t5, 0(a0)
+; RV32ZVE32F-NEXT: sw t6, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 8
; RV32ZVE32F-NEXT: beqz a0, .LBB46_4
; RV32ZVE32F-NEXT: .LBB46_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t4, 0(a0)
-; RV32ZVE32F-NEXT: sw t3, 4(a0)
+; RV32ZVE32F-NEXT: sw t3, 0(a0)
+; RV32ZVE32F-NEXT: sw t4, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 16
; RV32ZVE32F-NEXT: beqz a0, .LBB46_5
; RV32ZVE32F-NEXT: .LBB46_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t2, 0(a0)
-; RV32ZVE32F-NEXT: sw t1, 4(a0)
+; RV32ZVE32F-NEXT: sw t1, 0(a0)
+; RV32ZVE32F-NEXT: sw t2, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 32
; RV32ZVE32F-NEXT: beqz a0, .LBB46_6
; RV32ZVE32F-NEXT: .LBB46_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a7, 0(a0)
-; RV32ZVE32F-NEXT: sw a6, 4(a0)
+; RV32ZVE32F-NEXT: sw a6, 0(a0)
+; RV32ZVE32F-NEXT: sw a7, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 64
; RV32ZVE32F-NEXT: beqz a0, .LBB46_7
; RV32ZVE32F-NEXT: .LBB46_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a5, 0(a0)
-; RV32ZVE32F-NEXT: sw a4, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 0(a0)
+; RV32ZVE32F-NEXT: sw a5, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB46_8
; RV32ZVE32F-NEXT: j .LBB46_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a2, 56(a0)
+; RV64ZVE32F-NEXT: ld a4, 40(a0)
; RV64ZVE32F-NEXT: ld a3, 48(a0)
-; RV64ZVE32F-NEXT: ld a5, 40(a0)
-; RV64ZVE32F-NEXT: ld a6, 32(a0)
-; RV64ZVE32F-NEXT: ld a7, 24(a0)
-; RV64ZVE32F-NEXT: ld t0, 16(a0)
+; RV64ZVE32F-NEXT: ld a2, 56(a0)
; RV64ZVE32F-NEXT: ld t1, 8(a0)
+; RV64ZVE32F-NEXT: ld t0, 16(a0)
+; RV64ZVE32F-NEXT: ld a7, 24(a0)
+; RV64ZVE32F-NEXT: ld a6, 32(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a4, v0
-; RV64ZVE32F-NEXT: andi t2, a4, 1
+; RV64ZVE32F-NEXT: vmv.x.s a5, v0
+; RV64ZVE32F-NEXT: andi t2, a5, 1
; RV64ZVE32F-NEXT: beqz t2, .LBB46_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
@@ -4509,7 +4509,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: sd a0, 0(t2)
; RV64ZVE32F-NEXT: .LBB46_2: # %else
-; RV64ZVE32F-NEXT: andi a0, a4, 2
+; RV64ZVE32F-NEXT: andi a0, a5, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB46_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
@@ -4521,31 +4521,31 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: .LBB46_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a0, a4, 4
+; RV64ZVE32F-NEXT: andi a0, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB46_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB46_13
; RV64ZVE32F-NEXT: .LBB46_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB46_14
; RV64ZVE32F-NEXT: .LBB46_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB46_9
; RV64ZVE32F-NEXT: .LBB46_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a5, 0(a0)
+; RV64ZVE32F-NEXT: sd a4, 0(a0)
; RV64ZVE32F-NEXT: .LBB46_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: andi a0, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB46_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB46_16
; RV64ZVE32F-NEXT: .LBB46_11: # %else14
; RV64ZVE32F-NEXT: ret
@@ -4554,7 +4554,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB46_6
; RV64ZVE32F-NEXT: .LBB46_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -4562,14 +4562,14 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB46_7
; RV64ZVE32F-NEXT: .LBB46_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: bnez a0, .LBB46_8
; RV64ZVE32F-NEXT: j .LBB46_9
; RV64ZVE32F-NEXT: .LBB46_15: # %cond.store11
@@ -4577,7 +4577,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB46_11
; RV64ZVE32F-NEXT: .LBB46_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -4621,20 +4621,20 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: .cfi_offset s2, -12
-; RV32ZVE32F-NEXT: lw a2, 60(a0)
-; RV32ZVE32F-NEXT: lw a3, 56(a0)
-; RV32ZVE32F-NEXT: lw a4, 52(a0)
-; RV32ZVE32F-NEXT: lw a5, 48(a0)
-; RV32ZVE32F-NEXT: lw a6, 44(a0)
-; RV32ZVE32F-NEXT: lw a7, 40(a0)
-; RV32ZVE32F-NEXT: lw t1, 36(a0)
-; RV32ZVE32F-NEXT: lw t2, 32(a0)
-; RV32ZVE32F-NEXT: lw t3, 28(a0)
-; RV32ZVE32F-NEXT: lw t4, 24(a0)
-; RV32ZVE32F-NEXT: lw t5, 20(a0)
-; RV32ZVE32F-NEXT: lw t6, 16(a0)
-; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw a2, 56(a0)
+; RV32ZVE32F-NEXT: lw a3, 60(a0)
+; RV32ZVE32F-NEXT: lw a6, 40(a0)
+; RV32ZVE32F-NEXT: lw a7, 44(a0)
+; RV32ZVE32F-NEXT: lw a4, 48(a0)
+; RV32ZVE32F-NEXT: lw a5, 52(a0)
+; RV32ZVE32F-NEXT: lw t3, 24(a0)
+; RV32ZVE32F-NEXT: lw t4, 28(a0)
+; RV32ZVE32F-NEXT: lw t1, 32(a0)
+; RV32ZVE32F-NEXT: lw t2, 36(a0)
; RV32ZVE32F-NEXT: lw s0, 8(a0)
+; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw t5, 16(a0)
+; RV32ZVE32F-NEXT: lw t6, 20(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vzext.vf2 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
@@ -4669,8 +4669,8 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a2, 4(a0)
+; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: .LBB47_9: # %else14
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
@@ -4697,57 +4697,57 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t6, 0(a0)
-; RV32ZVE32F-NEXT: sw t5, 4(a0)
+; RV32ZVE32F-NEXT: sw t5, 0(a0)
+; RV32ZVE32F-NEXT: sw t6, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 8
; RV32ZVE32F-NEXT: beqz a0, .LBB47_4
; RV32ZVE32F-NEXT: .LBB47_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t4, 0(a0)
-; RV32ZVE32F-NEXT: sw t3, 4(a0)
+; RV32ZVE32F-NEXT: sw t3, 0(a0)
+; RV32ZVE32F-NEXT: sw t4, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 16
; RV32ZVE32F-NEXT: beqz a0, .LBB47_5
; RV32ZVE32F-NEXT: .LBB47_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t2, 0(a0)
-; RV32ZVE32F-NEXT: sw t1, 4(a0)
+; RV32ZVE32F-NEXT: sw t1, 0(a0)
+; RV32ZVE32F-NEXT: sw t2, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 32
; RV32ZVE32F-NEXT: beqz a0, .LBB47_6
; RV32ZVE32F-NEXT: .LBB47_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a7, 0(a0)
-; RV32ZVE32F-NEXT: sw a6, 4(a0)
+; RV32ZVE32F-NEXT: sw a6, 0(a0)
+; RV32ZVE32F-NEXT: sw a7, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, 64
; RV32ZVE32F-NEXT: beqz a0, .LBB47_7
; RV32ZVE32F-NEXT: .LBB47_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a5, 0(a0)
-; RV32ZVE32F-NEXT: sw a4, 4(a0)
+; RV32ZVE32F-NEXT: sw a4, 0(a0)
+; RV32ZVE32F-NEXT: sw a5, 4(a0)
; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB47_8
; RV32ZVE32F-NEXT: j .LBB47_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a2, 56(a0)
+; RV64ZVE32F-NEXT: ld a5, 40(a0)
; RV64ZVE32F-NEXT: ld a3, 48(a0)
-; RV64ZVE32F-NEXT: ld a6, 40(a0)
-; RV64ZVE32F-NEXT: ld a7, 32(a0)
-; RV64ZVE32F-NEXT: ld t0, 24(a0)
-; RV64ZVE32F-NEXT: ld t1, 16(a0)
+; RV64ZVE32F-NEXT: ld a2, 56(a0)
; RV64ZVE32F-NEXT: ld t2, 8(a0)
+; RV64ZVE32F-NEXT: ld t1, 16(a0)
+; RV64ZVE32F-NEXT: ld t0, 24(a0)
+; RV64ZVE32F-NEXT: ld a7, 32(a0)
; RV64ZVE32F-NEXT: lui a4, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a5, v0
-; RV64ZVE32F-NEXT: andi t3, a5, 1
+; RV64ZVE32F-NEXT: vmv.x.s a6, v0
+; RV64ZVE32F-NEXT: andi t3, a6, 1
; RV64ZVE32F-NEXT: addiw a4, a4, -1
; RV64ZVE32F-NEXT: beqz t3, .LBB47_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
@@ -4759,7 +4759,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add t3, a1, t3
; RV64ZVE32F-NEXT: sd a0, 0(t3)
; RV64ZVE32F-NEXT: .LBB47_2: # %else
-; RV64ZVE32F-NEXT: andi a0, a5, 2
+; RV64ZVE32F-NEXT: andi a0, a6, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB47_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
@@ -4772,18 +4772,18 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: .LBB47_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: andi a0, a5, 4
+; RV64ZVE32F-NEXT: andi a0, a6, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB47_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a5, 8
+; RV64ZVE32F-NEXT: andi a0, a6, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB47_13
; RV64ZVE32F-NEXT: .LBB47_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a5, 16
+; RV64ZVE32F-NEXT: andi a0, a6, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB47_14
; RV64ZVE32F-NEXT: .LBB47_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a5, 32
+; RV64ZVE32F-NEXT: andi a0, a6, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB47_9
; RV64ZVE32F-NEXT: .LBB47_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
@@ -4791,13 +4791,13 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: and a0, a0, a4
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a6, 0(a0)
+; RV64ZVE32F-NEXT: sd a5, 0(a0)
; RV64ZVE32F-NEXT: .LBB47_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a5, 64
+; RV64ZVE32F-NEXT: andi a0, a6, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB47_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a5, -128
+; RV64ZVE32F-NEXT: andi a0, a6, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB47_16
; RV64ZVE32F-NEXT: .LBB47_11: # %else14
; RV64ZVE32F-NEXT: ret
@@ -4807,7 +4807,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t1, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 8
+; RV64ZVE32F-NEXT: andi a0, a6, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB47_6
; RV64ZVE32F-NEXT: .LBB47_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -4816,7 +4816,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 16
+; RV64ZVE32F-NEXT: andi a0, a6, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB47_7
; RV64ZVE32F-NEXT: .LBB47_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
@@ -4824,7 +4824,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, 32
+; RV64ZVE32F-NEXT: andi a0, a6, 32
; RV64ZVE32F-NEXT: bnez a0, .LBB47_8
; RV64ZVE32F-NEXT: j .LBB47_9
; RV64ZVE32F-NEXT: .LBB47_15: # %cond.store11
@@ -4833,7 +4833,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a5, -128
+; RV64ZVE32F-NEXT: andi a0, a6, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB47_11
; RV64ZVE32F-NEXT: .LBB47_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -4876,55 +4876,55 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: .cfi_offset s2, -12
-; RV32ZVE32F-NEXT: lw a2, 60(a0)
-; RV32ZVE32F-NEXT: lw a3, 56(a0)
-; RV32ZVE32F-NEXT: lw a4, 52(a0)
-; RV32ZVE32F-NEXT: lw a5, 48(a0)
-; RV32ZVE32F-NEXT: lw a6, 44(a0)
-; RV32ZVE32F-NEXT: lw t0, 40(a0)
-; RV32ZVE32F-NEXT: lw t1, 36(a0)
-; RV32ZVE32F-NEXT: lw t2, 32(a0)
-; RV32ZVE32F-NEXT: lw t3, 28(a0)
-; RV32ZVE32F-NEXT: lw t4, 24(a0)
-; RV32ZVE32F-NEXT: lw t5, 20(a0)
-; RV32ZVE32F-NEXT: lw t6, 16(a0)
-; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw a2, 56(a0)
+; RV32ZVE32F-NEXT: lw a3, 60(a0)
+; RV32ZVE32F-NEXT: lw a6, 40(a0)
+; RV32ZVE32F-NEXT: lw a7, 44(a0)
+; RV32ZVE32F-NEXT: lw a4, 48(a0)
+; RV32ZVE32F-NEXT: lw a5, 52(a0)
+; RV32ZVE32F-NEXT: lw t3, 24(a0)
+; RV32ZVE32F-NEXT: lw t4, 28(a0)
+; RV32ZVE32F-NEXT: lw t1, 32(a0)
+; RV32ZVE32F-NEXT: lw t2, 36(a0)
; RV32ZVE32F-NEXT: lw s0, 8(a0)
+; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw t5, 16(a0)
+; RV32ZVE32F-NEXT: lw t6, 20(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32ZVE32F-NEXT: vmv.x.s a7, v0
-; RV32ZVE32F-NEXT: andi s2, a7, 1
+; RV32ZVE32F-NEXT: vmv.x.s t0, v0
+; RV32ZVE32F-NEXT: andi s2, t0, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez s2, .LBB48_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
-; RV32ZVE32F-NEXT: andi a0, a7, 2
+; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB48_11
; RV32ZVE32F-NEXT: .LBB48_2: # %else2
-; RV32ZVE32F-NEXT: andi a0, a7, 4
+; RV32ZVE32F-NEXT: andi a0, t0, 4
; RV32ZVE32F-NEXT: bnez a0, .LBB48_12
; RV32ZVE32F-NEXT: .LBB48_3: # %else4
-; RV32ZVE32F-NEXT: andi a0, a7, 8
+; RV32ZVE32F-NEXT: andi a0, t0, 8
; RV32ZVE32F-NEXT: bnez a0, .LBB48_13
; RV32ZVE32F-NEXT: .LBB48_4: # %else6
-; RV32ZVE32F-NEXT: andi a0, a7, 16
+; RV32ZVE32F-NEXT: andi a0, t0, 16
; RV32ZVE32F-NEXT: bnez a0, .LBB48_14
; RV32ZVE32F-NEXT: .LBB48_5: # %else8
-; RV32ZVE32F-NEXT: andi a0, a7, 32
+; RV32ZVE32F-NEXT: andi a0, t0, 32
; RV32ZVE32F-NEXT: bnez a0, .LBB48_15
; RV32ZVE32F-NEXT: .LBB48_6: # %else10
-; RV32ZVE32F-NEXT: andi a0, a7, 64
+; RV32ZVE32F-NEXT: andi a0, t0, 64
; RV32ZVE32F-NEXT: bnez a0, .LBB48_16
; RV32ZVE32F-NEXT: .LBB48_7: # %else12
-; RV32ZVE32F-NEXT: andi a0, a7, -128
+; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB48_9
; RV32ZVE32F-NEXT: .LBB48_8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a2, 4(a0)
+; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: .LBB48_9: # %else14
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
@@ -4937,7 +4937,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw a1, 4(s2)
; RV32ZVE32F-NEXT: sw a0, 0(s2)
-; RV32ZVE32F-NEXT: andi a0, a7, 2
+; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB48_2
; RV32ZVE32F-NEXT: .LBB48_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -4945,62 +4945,62 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: sw s0, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 4
+; RV32ZVE32F-NEXT: andi a0, t0, 4
; RV32ZVE32F-NEXT: beqz a0, .LBB48_3
; RV32ZVE32F-NEXT: .LBB48_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t6, 0(a0)
-; RV32ZVE32F-NEXT: sw t5, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 8
+; RV32ZVE32F-NEXT: sw t5, 0(a0)
+; RV32ZVE32F-NEXT: sw t6, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, t0, 8
; RV32ZVE32F-NEXT: beqz a0, .LBB48_4
; RV32ZVE32F-NEXT: .LBB48_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t4, 0(a0)
-; RV32ZVE32F-NEXT: sw t3, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 16
+; RV32ZVE32F-NEXT: sw t3, 0(a0)
+; RV32ZVE32F-NEXT: sw t4, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, t0, 16
; RV32ZVE32F-NEXT: beqz a0, .LBB48_5
; RV32ZVE32F-NEXT: .LBB48_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t2, 0(a0)
-; RV32ZVE32F-NEXT: sw t1, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 32
+; RV32ZVE32F-NEXT: sw t1, 0(a0)
+; RV32ZVE32F-NEXT: sw t2, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, t0, 32
; RV32ZVE32F-NEXT: beqz a0, .LBB48_6
; RV32ZVE32F-NEXT: .LBB48_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t0, 0(a0)
-; RV32ZVE32F-NEXT: sw a6, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 64
+; RV32ZVE32F-NEXT: sw a6, 0(a0)
+; RV32ZVE32F-NEXT: sw a7, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, t0, 64
; RV32ZVE32F-NEXT: beqz a0, .LBB48_7
; RV32ZVE32F-NEXT: .LBB48_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a5, 0(a0)
-; RV32ZVE32F-NEXT: sw a4, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, -128
+; RV32ZVE32F-NEXT: sw a4, 0(a0)
+; RV32ZVE32F-NEXT: sw a5, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB48_8
; RV32ZVE32F-NEXT: j .LBB48_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a2, 56(a0)
+; RV64ZVE32F-NEXT: ld a4, 40(a0)
; RV64ZVE32F-NEXT: ld a3, 48(a0)
-; RV64ZVE32F-NEXT: ld a5, 40(a0)
-; RV64ZVE32F-NEXT: ld a6, 32(a0)
-; RV64ZVE32F-NEXT: ld a7, 24(a0)
-; RV64ZVE32F-NEXT: ld t0, 16(a0)
+; RV64ZVE32F-NEXT: ld a2, 56(a0)
; RV64ZVE32F-NEXT: ld t1, 8(a0)
+; RV64ZVE32F-NEXT: ld t0, 16(a0)
+; RV64ZVE32F-NEXT: ld a7, 24(a0)
+; RV64ZVE32F-NEXT: ld a6, 32(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a4, v0
-; RV64ZVE32F-NEXT: andi t2, a4, 1
+; RV64ZVE32F-NEXT: vmv.x.s a5, v0
+; RV64ZVE32F-NEXT: andi t2, a5, 1
; RV64ZVE32F-NEXT: beqz t2, .LBB48_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
@@ -5010,7 +5010,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: sd a0, 0(t2)
; RV64ZVE32F-NEXT: .LBB48_2: # %else
-; RV64ZVE32F-NEXT: andi a0, a4, 2
+; RV64ZVE32F-NEXT: andi a0, a5, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB48_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -5022,31 +5022,31 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV64ZVE32F-NEXT: .LBB48_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
-; RV64ZVE32F-NEXT: andi a0, a4, 4
+; RV64ZVE32F-NEXT: andi a0, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB48_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB48_13
; RV64ZVE32F-NEXT: .LBB48_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB48_14
; RV64ZVE32F-NEXT: .LBB48_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB48_9
; RV64ZVE32F-NEXT: .LBB48_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a5, 0(a0)
+; RV64ZVE32F-NEXT: sd a4, 0(a0)
; RV64ZVE32F-NEXT: .LBB48_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: andi a0, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB48_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB48_16
; RV64ZVE32F-NEXT: .LBB48_11: # %else14
; RV64ZVE32F-NEXT: ret
@@ -5055,7 +5055,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB48_6
; RV64ZVE32F-NEXT: .LBB48_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -5063,14 +5063,14 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB48_7
; RV64ZVE32F-NEXT: .LBB48_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v10
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: bnez a0, .LBB48_8
; RV64ZVE32F-NEXT: j .LBB48_9
; RV64ZVE32F-NEXT: .LBB48_15: # %cond.store11
@@ -5078,7 +5078,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB48_11
; RV64ZVE32F-NEXT: .LBB48_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -5119,55 +5119,55 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: .cfi_offset s2, -12
-; RV32ZVE32F-NEXT: lw a2, 60(a0)
-; RV32ZVE32F-NEXT: lw a3, 56(a0)
-; RV32ZVE32F-NEXT: lw a4, 52(a0)
-; RV32ZVE32F-NEXT: lw a5, 48(a0)
-; RV32ZVE32F-NEXT: lw a6, 44(a0)
-; RV32ZVE32F-NEXT: lw t0, 40(a0)
-; RV32ZVE32F-NEXT: lw t1, 36(a0)
-; RV32ZVE32F-NEXT: lw t2, 32(a0)
-; RV32ZVE32F-NEXT: lw t3, 28(a0)
-; RV32ZVE32F-NEXT: lw t4, 24(a0)
-; RV32ZVE32F-NEXT: lw t5, 20(a0)
-; RV32ZVE32F-NEXT: lw t6, 16(a0)
-; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw a2, 56(a0)
+; RV32ZVE32F-NEXT: lw a3, 60(a0)
+; RV32ZVE32F-NEXT: lw a6, 40(a0)
+; RV32ZVE32F-NEXT: lw a7, 44(a0)
+; RV32ZVE32F-NEXT: lw a4, 48(a0)
+; RV32ZVE32F-NEXT: lw a5, 52(a0)
+; RV32ZVE32F-NEXT: lw t3, 24(a0)
+; RV32ZVE32F-NEXT: lw t4, 28(a0)
+; RV32ZVE32F-NEXT: lw t1, 32(a0)
+; RV32ZVE32F-NEXT: lw t2, 36(a0)
; RV32ZVE32F-NEXT: lw s0, 8(a0)
+; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw t5, 16(a0)
+; RV32ZVE32F-NEXT: lw t6, 20(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32ZVE32F-NEXT: vmv.x.s a7, v0
-; RV32ZVE32F-NEXT: andi s2, a7, 1
+; RV32ZVE32F-NEXT: vmv.x.s t0, v0
+; RV32ZVE32F-NEXT: andi s2, t0, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez s2, .LBB49_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
-; RV32ZVE32F-NEXT: andi a0, a7, 2
+; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB49_11
; RV32ZVE32F-NEXT: .LBB49_2: # %else2
-; RV32ZVE32F-NEXT: andi a0, a7, 4
+; RV32ZVE32F-NEXT: andi a0, t0, 4
; RV32ZVE32F-NEXT: bnez a0, .LBB49_12
; RV32ZVE32F-NEXT: .LBB49_3: # %else4
-; RV32ZVE32F-NEXT: andi a0, a7, 8
+; RV32ZVE32F-NEXT: andi a0, t0, 8
; RV32ZVE32F-NEXT: bnez a0, .LBB49_13
; RV32ZVE32F-NEXT: .LBB49_4: # %else6
-; RV32ZVE32F-NEXT: andi a0, a7, 16
+; RV32ZVE32F-NEXT: andi a0, t0, 16
; RV32ZVE32F-NEXT: bnez a0, .LBB49_14
; RV32ZVE32F-NEXT: .LBB49_5: # %else8
-; RV32ZVE32F-NEXT: andi a0, a7, 32
+; RV32ZVE32F-NEXT: andi a0, t0, 32
; RV32ZVE32F-NEXT: bnez a0, .LBB49_15
; RV32ZVE32F-NEXT: .LBB49_6: # %else10
-; RV32ZVE32F-NEXT: andi a0, a7, 64
+; RV32ZVE32F-NEXT: andi a0, t0, 64
; RV32ZVE32F-NEXT: bnez a0, .LBB49_16
; RV32ZVE32F-NEXT: .LBB49_7: # %else12
-; RV32ZVE32F-NEXT: andi a0, a7, -128
+; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB49_9
; RV32ZVE32F-NEXT: .LBB49_8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a2, 4(a0)
+; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: .LBB49_9: # %else14
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
@@ -5180,7 +5180,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw a1, 4(s2)
; RV32ZVE32F-NEXT: sw a0, 0(s2)
-; RV32ZVE32F-NEXT: andi a0, a7, 2
+; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB49_2
; RV32ZVE32F-NEXT: .LBB49_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -5188,62 +5188,62 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: sw s0, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 4
+; RV32ZVE32F-NEXT: andi a0, t0, 4
; RV32ZVE32F-NEXT: beqz a0, .LBB49_3
; RV32ZVE32F-NEXT: .LBB49_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t6, 0(a0)
-; RV32ZVE32F-NEXT: sw t5, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 8
+; RV32ZVE32F-NEXT: sw t5, 0(a0)
+; RV32ZVE32F-NEXT: sw t6, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, t0, 8
; RV32ZVE32F-NEXT: beqz a0, .LBB49_4
; RV32ZVE32F-NEXT: .LBB49_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t4, 0(a0)
-; RV32ZVE32F-NEXT: sw t3, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 16
+; RV32ZVE32F-NEXT: sw t3, 0(a0)
+; RV32ZVE32F-NEXT: sw t4, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, t0, 16
; RV32ZVE32F-NEXT: beqz a0, .LBB49_5
; RV32ZVE32F-NEXT: .LBB49_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t2, 0(a0)
-; RV32ZVE32F-NEXT: sw t1, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 32
+; RV32ZVE32F-NEXT: sw t1, 0(a0)
+; RV32ZVE32F-NEXT: sw t2, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, t0, 32
; RV32ZVE32F-NEXT: beqz a0, .LBB49_6
; RV32ZVE32F-NEXT: .LBB49_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t0, 0(a0)
-; RV32ZVE32F-NEXT: sw a6, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 64
+; RV32ZVE32F-NEXT: sw a6, 0(a0)
+; RV32ZVE32F-NEXT: sw a7, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, t0, 64
; RV32ZVE32F-NEXT: beqz a0, .LBB49_7
; RV32ZVE32F-NEXT: .LBB49_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a5, 0(a0)
-; RV32ZVE32F-NEXT: sw a4, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, -128
+; RV32ZVE32F-NEXT: sw a4, 0(a0)
+; RV32ZVE32F-NEXT: sw a5, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB49_8
; RV32ZVE32F-NEXT: j .LBB49_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a2, 56(a0)
+; RV64ZVE32F-NEXT: ld a4, 40(a0)
; RV64ZVE32F-NEXT: ld a3, 48(a0)
-; RV64ZVE32F-NEXT: ld a5, 40(a0)
-; RV64ZVE32F-NEXT: ld a6, 32(a0)
-; RV64ZVE32F-NEXT: ld a7, 24(a0)
-; RV64ZVE32F-NEXT: ld t0, 16(a0)
+; RV64ZVE32F-NEXT: ld a2, 56(a0)
; RV64ZVE32F-NEXT: ld t1, 8(a0)
+; RV64ZVE32F-NEXT: ld t0, 16(a0)
+; RV64ZVE32F-NEXT: ld a7, 24(a0)
+; RV64ZVE32F-NEXT: ld a6, 32(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a4, v0
-; RV64ZVE32F-NEXT: andi t2, a4, 1
+; RV64ZVE32F-NEXT: vmv.x.s a5, v0
+; RV64ZVE32F-NEXT: andi t2, a5, 1
; RV64ZVE32F-NEXT: beqz t2, .LBB49_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
@@ -5253,7 +5253,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: sd a0, 0(t2)
; RV64ZVE32F-NEXT: .LBB49_2: # %else
-; RV64ZVE32F-NEXT: andi a0, a4, 2
+; RV64ZVE32F-NEXT: andi a0, a5, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB49_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -5265,31 +5265,31 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: .LBB49_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
-; RV64ZVE32F-NEXT: andi a0, a4, 4
+; RV64ZVE32F-NEXT: andi a0, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB49_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB49_13
; RV64ZVE32F-NEXT: .LBB49_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB49_14
; RV64ZVE32F-NEXT: .LBB49_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB49_9
; RV64ZVE32F-NEXT: .LBB49_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a5, 0(a0)
+; RV64ZVE32F-NEXT: sd a4, 0(a0)
; RV64ZVE32F-NEXT: .LBB49_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: andi a0, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB49_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB49_16
; RV64ZVE32F-NEXT: .LBB49_11: # %else14
; RV64ZVE32F-NEXT: ret
@@ -5298,7 +5298,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB49_6
; RV64ZVE32F-NEXT: .LBB49_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -5306,14 +5306,14 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB49_7
; RV64ZVE32F-NEXT: .LBB49_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v10
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: bnez a0, .LBB49_8
; RV64ZVE32F-NEXT: j .LBB49_9
; RV64ZVE32F-NEXT: .LBB49_15: # %cond.store11
@@ -5321,7 +5321,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB49_11
; RV64ZVE32F-NEXT: .LBB49_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -5363,55 +5363,55 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: .cfi_offset s2, -12
-; RV32ZVE32F-NEXT: lw a2, 60(a0)
-; RV32ZVE32F-NEXT: lw a3, 56(a0)
-; RV32ZVE32F-NEXT: lw a4, 52(a0)
-; RV32ZVE32F-NEXT: lw a5, 48(a0)
-; RV32ZVE32F-NEXT: lw a6, 44(a0)
-; RV32ZVE32F-NEXT: lw t0, 40(a0)
-; RV32ZVE32F-NEXT: lw t1, 36(a0)
-; RV32ZVE32F-NEXT: lw t2, 32(a0)
-; RV32ZVE32F-NEXT: lw t3, 28(a0)
-; RV32ZVE32F-NEXT: lw t4, 24(a0)
-; RV32ZVE32F-NEXT: lw t5, 20(a0)
-; RV32ZVE32F-NEXT: lw t6, 16(a0)
-; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw a2, 56(a0)
+; RV32ZVE32F-NEXT: lw a3, 60(a0)
+; RV32ZVE32F-NEXT: lw a6, 40(a0)
+; RV32ZVE32F-NEXT: lw a7, 44(a0)
+; RV32ZVE32F-NEXT: lw a4, 48(a0)
+; RV32ZVE32F-NEXT: lw a5, 52(a0)
+; RV32ZVE32F-NEXT: lw t3, 24(a0)
+; RV32ZVE32F-NEXT: lw t4, 28(a0)
+; RV32ZVE32F-NEXT: lw t1, 32(a0)
+; RV32ZVE32F-NEXT: lw t2, 36(a0)
; RV32ZVE32F-NEXT: lw s0, 8(a0)
+; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw t5, 16(a0)
+; RV32ZVE32F-NEXT: lw t6, 20(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32ZVE32F-NEXT: vmv.x.s a7, v0
-; RV32ZVE32F-NEXT: andi s2, a7, 1
+; RV32ZVE32F-NEXT: vmv.x.s t0, v0
+; RV32ZVE32F-NEXT: andi s2, t0, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: bnez s2, .LBB50_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
-; RV32ZVE32F-NEXT: andi a0, a7, 2
+; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB50_11
; RV32ZVE32F-NEXT: .LBB50_2: # %else2
-; RV32ZVE32F-NEXT: andi a0, a7, 4
+; RV32ZVE32F-NEXT: andi a0, t0, 4
; RV32ZVE32F-NEXT: bnez a0, .LBB50_12
; RV32ZVE32F-NEXT: .LBB50_3: # %else4
-; RV32ZVE32F-NEXT: andi a0, a7, 8
+; RV32ZVE32F-NEXT: andi a0, t0, 8
; RV32ZVE32F-NEXT: bnez a0, .LBB50_13
; RV32ZVE32F-NEXT: .LBB50_4: # %else6
-; RV32ZVE32F-NEXT: andi a0, a7, 16
+; RV32ZVE32F-NEXT: andi a0, t0, 16
; RV32ZVE32F-NEXT: bnez a0, .LBB50_14
; RV32ZVE32F-NEXT: .LBB50_5: # %else8
-; RV32ZVE32F-NEXT: andi a0, a7, 32
+; RV32ZVE32F-NEXT: andi a0, t0, 32
; RV32ZVE32F-NEXT: bnez a0, .LBB50_15
; RV32ZVE32F-NEXT: .LBB50_6: # %else10
-; RV32ZVE32F-NEXT: andi a0, a7, 64
+; RV32ZVE32F-NEXT: andi a0, t0, 64
; RV32ZVE32F-NEXT: bnez a0, .LBB50_16
; RV32ZVE32F-NEXT: .LBB50_7: # %else12
-; RV32ZVE32F-NEXT: andi a0, a7, -128
+; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: beqz a0, .LBB50_9
; RV32ZVE32F-NEXT: .LBB50_8: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: sw a3, 0(a0)
-; RV32ZVE32F-NEXT: sw a2, 4(a0)
+; RV32ZVE32F-NEXT: sw a2, 0(a0)
+; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: .LBB50_9: # %else14
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
@@ -5424,7 +5424,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vmv.x.s s2, v8
; RV32ZVE32F-NEXT: sw a1, 4(s2)
; RV32ZVE32F-NEXT: sw a0, 0(s2)
-; RV32ZVE32F-NEXT: andi a0, a7, 2
+; RV32ZVE32F-NEXT: andi a0, t0, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB50_2
; RV32ZVE32F-NEXT: .LBB50_11: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -5432,62 +5432,62 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: sw s1, 4(a0)
; RV32ZVE32F-NEXT: sw s0, 0(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 4
+; RV32ZVE32F-NEXT: andi a0, t0, 4
; RV32ZVE32F-NEXT: beqz a0, .LBB50_3
; RV32ZVE32F-NEXT: .LBB50_12: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t6, 0(a0)
-; RV32ZVE32F-NEXT: sw t5, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 8
+; RV32ZVE32F-NEXT: sw t5, 0(a0)
+; RV32ZVE32F-NEXT: sw t6, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, t0, 8
; RV32ZVE32F-NEXT: beqz a0, .LBB50_4
; RV32ZVE32F-NEXT: .LBB50_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t4, 0(a0)
-; RV32ZVE32F-NEXT: sw t3, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 16
+; RV32ZVE32F-NEXT: sw t3, 0(a0)
+; RV32ZVE32F-NEXT: sw t4, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, t0, 16
; RV32ZVE32F-NEXT: beqz a0, .LBB50_5
; RV32ZVE32F-NEXT: .LBB50_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t2, 0(a0)
-; RV32ZVE32F-NEXT: sw t1, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 32
+; RV32ZVE32F-NEXT: sw t1, 0(a0)
+; RV32ZVE32F-NEXT: sw t2, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, t0, 32
; RV32ZVE32F-NEXT: beqz a0, .LBB50_6
; RV32ZVE32F-NEXT: .LBB50_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t0, 0(a0)
-; RV32ZVE32F-NEXT: sw a6, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, 64
+; RV32ZVE32F-NEXT: sw a6, 0(a0)
+; RV32ZVE32F-NEXT: sw a7, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, t0, 64
; RV32ZVE32F-NEXT: beqz a0, .LBB50_7
; RV32ZVE32F-NEXT: .LBB50_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a5, 0(a0)
-; RV32ZVE32F-NEXT: sw a4, 4(a0)
-; RV32ZVE32F-NEXT: andi a0, a7, -128
+; RV32ZVE32F-NEXT: sw a4, 0(a0)
+; RV32ZVE32F-NEXT: sw a5, 4(a0)
+; RV32ZVE32F-NEXT: andi a0, t0, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB50_8
; RV32ZVE32F-NEXT: j .LBB50_9
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a2, 56(a0)
+; RV64ZVE32F-NEXT: ld a4, 40(a0)
; RV64ZVE32F-NEXT: ld a3, 48(a0)
-; RV64ZVE32F-NEXT: ld a5, 40(a0)
-; RV64ZVE32F-NEXT: ld a6, 32(a0)
-; RV64ZVE32F-NEXT: ld a7, 24(a0)
-; RV64ZVE32F-NEXT: ld t0, 16(a0)
+; RV64ZVE32F-NEXT: ld a2, 56(a0)
; RV64ZVE32F-NEXT: ld t1, 8(a0)
+; RV64ZVE32F-NEXT: ld t0, 16(a0)
+; RV64ZVE32F-NEXT: ld a7, 24(a0)
+; RV64ZVE32F-NEXT: ld a6, 32(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a4, v0
-; RV64ZVE32F-NEXT: andi t2, a4, 1
+; RV64ZVE32F-NEXT: vmv.x.s a5, v0
+; RV64ZVE32F-NEXT: andi t2, a5, 1
; RV64ZVE32F-NEXT: beqz t2, .LBB50_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
@@ -5498,7 +5498,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: sd a0, 0(t2)
; RV64ZVE32F-NEXT: .LBB50_2: # %else
-; RV64ZVE32F-NEXT: andi a0, a4, 2
+; RV64ZVE32F-NEXT: andi a0, a5, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB50_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -5511,18 +5511,18 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: .LBB50_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
-; RV64ZVE32F-NEXT: andi a0, a4, 4
+; RV64ZVE32F-NEXT: andi a0, a5, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB50_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
-; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB50_13
; RV64ZVE32F-NEXT: .LBB50_6: # %else6
-; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB50_14
; RV64ZVE32F-NEXT: .LBB50_7: # %else8
-; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB50_9
; RV64ZVE32F-NEXT: .LBB50_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
@@ -5530,13 +5530,13 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: slli a0, a0, 32
; RV64ZVE32F-NEXT: srli a0, a0, 29
; RV64ZVE32F-NEXT: add a0, a1, a0
-; RV64ZVE32F-NEXT: sd a5, 0(a0)
+; RV64ZVE32F-NEXT: sd a4, 0(a0)
; RV64ZVE32F-NEXT: .LBB50_9: # %else10
-; RV64ZVE32F-NEXT: andi a0, a4, 64
+; RV64ZVE32F-NEXT: andi a0, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB50_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
-; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB50_16
; RV64ZVE32F-NEXT: .LBB50_11: # %else14
; RV64ZVE32F-NEXT: ret
@@ -5546,7 +5546,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: srli a0, a0, 29
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd t0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 8
+; RV64ZVE32F-NEXT: andi a0, a5, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB50_6
; RV64ZVE32F-NEXT: .LBB50_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -5555,7 +5555,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: srli a0, a0, 29
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a7, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 16
+; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB50_7
; RV64ZVE32F-NEXT: .LBB50_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a0, v10
@@ -5563,7 +5563,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: srli a0, a0, 29
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a6, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, 32
+; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: bnez a0, .LBB50_8
; RV64ZVE32F-NEXT: j .LBB50_9
; RV64ZVE32F-NEXT: .LBB50_15: # %cond.store11
@@ -5572,7 +5572,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: srli a0, a0, 29
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a3, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a4, -128
+; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB50_11
; RV64ZVE32F-NEXT: .LBB50_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -5627,24 +5627,24 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV32ZVE32F-NEXT: .cfi_offset s6, -28
; RV32ZVE32F-NEXT: .cfi_offset s7, -32
; RV32ZVE32F-NEXT: .cfi_offset s8, -36
-; RV32ZVE32F-NEXT: lw a3, 60(a0)
-; RV32ZVE32F-NEXT: lw a4, 56(a0)
-; RV32ZVE32F-NEXT: lw a5, 52(a0)
-; RV32ZVE32F-NEXT: lw a6, 48(a0)
-; RV32ZVE32F-NEXT: lw a7, 44(a0)
-; RV32ZVE32F-NEXT: lw t0, 40(a0)
-; RV32ZVE32F-NEXT: lw t1, 36(a0)
-; RV32ZVE32F-NEXT: lw t2, 32(a0)
-; RV32ZVE32F-NEXT: lw t3, 28(a0)
-; RV32ZVE32F-NEXT: lw t4, 24(a0)
-; RV32ZVE32F-NEXT: lw t5, 20(a0)
-; RV32ZVE32F-NEXT: lw t6, 16(a0)
-; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw a3, 56(a0)
+; RV32ZVE32F-NEXT: lw a4, 60(a0)
+; RV32ZVE32F-NEXT: lw a7, 40(a0)
+; RV32ZVE32F-NEXT: lw t0, 44(a0)
+; RV32ZVE32F-NEXT: lw a5, 48(a0)
+; RV32ZVE32F-NEXT: lw a6, 52(a0)
+; RV32ZVE32F-NEXT: lw t3, 24(a0)
+; RV32ZVE32F-NEXT: lw t4, 28(a0)
+; RV32ZVE32F-NEXT: lw t1, 32(a0)
+; RV32ZVE32F-NEXT: lw t2, 36(a0)
; RV32ZVE32F-NEXT: lw s0, 8(a0)
-; RV32ZVE32F-NEXT: lw s2, 56(a2)
-; RV32ZVE32F-NEXT: lw s3, 48(a2)
-; RV32ZVE32F-NEXT: lw s4, 40(a2)
-; RV32ZVE32F-NEXT: lw s5, 32(a2)
+; RV32ZVE32F-NEXT: lw s1, 12(a0)
+; RV32ZVE32F-NEXT: lw t5, 16(a0)
+; RV32ZVE32F-NEXT: lw t6, 20(a0)
+; RV32ZVE32F-NEXT: lw s2, 32(a2)
+; RV32ZVE32F-NEXT: lw s3, 40(a2)
+; RV32ZVE32F-NEXT: lw s4, 48(a2)
+; RV32ZVE32F-NEXT: lw s5, 56(a2)
; RV32ZVE32F-NEXT: lw s6, 0(a2)
; RV32ZVE32F-NEXT: lw s7, 8(a2)
; RV32ZVE32F-NEXT: lw s8, 16(a2)
@@ -5654,10 +5654,10 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s7
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s8
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s5
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s4
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s3
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s2
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s3
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s4
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s5
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v0
@@ -5690,8 +5690,8 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: sw a4, 0(a0)
-; RV32ZVE32F-NEXT: sw a3, 4(a0)
+; RV32ZVE32F-NEXT: sw a3, 0(a0)
+; RV32ZVE32F-NEXT: sw a4, 4(a0)
; RV32ZVE32F-NEXT: .LBB51_9: # %else14
; RV32ZVE32F-NEXT: lw s0, 44(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 40(sp) # 4-byte Folded Reload
@@ -5724,40 +5724,40 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t6, 0(a0)
-; RV32ZVE32F-NEXT: sw t5, 4(a0)
+; RV32ZVE32F-NEXT: sw t5, 0(a0)
+; RV32ZVE32F-NEXT: sw t6, 4(a0)
; RV32ZVE32F-NEXT: andi a0, a2, 8
; RV32ZVE32F-NEXT: beqz a0, .LBB51_4
; RV32ZVE32F-NEXT: .LBB51_13: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t4, 0(a0)
-; RV32ZVE32F-NEXT: sw t3, 4(a0)
+; RV32ZVE32F-NEXT: sw t3, 0(a0)
+; RV32ZVE32F-NEXT: sw t4, 4(a0)
; RV32ZVE32F-NEXT: andi a0, a2, 16
; RV32ZVE32F-NEXT: beqz a0, .LBB51_5
; RV32ZVE32F-NEXT: .LBB51_14: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t2, 0(a0)
-; RV32ZVE32F-NEXT: sw t1, 4(a0)
+; RV32ZVE32F-NEXT: sw t1, 0(a0)
+; RV32ZVE32F-NEXT: sw t2, 4(a0)
; RV32ZVE32F-NEXT: andi a0, a2, 32
; RV32ZVE32F-NEXT: beqz a0, .LBB51_6
; RV32ZVE32F-NEXT: .LBB51_15: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw t0, 0(a0)
-; RV32ZVE32F-NEXT: sw a7, 4(a0)
+; RV32ZVE32F-NEXT: sw a7, 0(a0)
+; RV32ZVE32F-NEXT: sw t0, 4(a0)
; RV32ZVE32F-NEXT: andi a0, a2, 64
; RV32ZVE32F-NEXT: beqz a0, .LBB51_7
; RV32ZVE32F-NEXT: .LBB51_16: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-NEXT: sw a6, 0(a0)
-; RV32ZVE32F-NEXT: sw a5, 4(a0)
+; RV32ZVE32F-NEXT: sw a5, 0(a0)
+; RV32ZVE32F-NEXT: sw a6, 4(a0)
; RV32ZVE32F-NEXT: andi a0, a2, -128
; RV32ZVE32F-NEXT: bnez a0, .LBB51_8
; RV32ZVE32F-NEXT: j .LBB51_9
@@ -5774,20 +5774,20 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV64ZVE32F-NEXT: .cfi_offset s1, -16
; RV64ZVE32F-NEXT: .cfi_offset s2, -24
; RV64ZVE32F-NEXT: .cfi_offset s3, -32
-; RV64ZVE32F-NEXT: ld a3, 56(a0)
+; RV64ZVE32F-NEXT: ld a5, 40(a0)
; RV64ZVE32F-NEXT: ld a4, 48(a0)
-; RV64ZVE32F-NEXT: ld a6, 40(a0)
-; RV64ZVE32F-NEXT: ld t1, 32(a0)
+; RV64ZVE32F-NEXT: ld a3, 56(a0)
+; RV64ZVE32F-NEXT: ld s0, 8(a0)
+; RV64ZVE32F-NEXT: ld t5, 16(a0)
; RV64ZVE32F-NEXT: ld t3, 24(a0)
-; RV64ZVE32F-NEXT: ld t6, 16(a0)
-; RV64ZVE32F-NEXT: ld s1, 8(a0)
+; RV64ZVE32F-NEXT: ld t1, 32(a0)
; RV64ZVE32F-NEXT: ld s2, 8(a2)
-; RV64ZVE32F-NEXT: ld s0, 16(a2)
-; RV64ZVE32F-NEXT: ld t5, 24(a2)
+; RV64ZVE32F-NEXT: ld s1, 16(a2)
+; RV64ZVE32F-NEXT: ld t6, 24(a2)
; RV64ZVE32F-NEXT: ld t4, 32(a2)
; RV64ZVE32F-NEXT: ld t2, 40(a2)
; RV64ZVE32F-NEXT: ld t0, 48(a2)
-; RV64ZVE32F-NEXT: ld a5, 56(a2)
+; RV64ZVE32F-NEXT: ld a6, 56(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a7, v0
; RV64ZVE32F-NEXT: andi s3, a7, 1
@@ -5814,8 +5814,8 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV64ZVE32F-NEXT: andi a0, a7, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB51_9
; RV64ZVE32F-NEXT: .LBB51_8: # %cond.store13
-; RV64ZVE32F-NEXT: slli a5, a5, 3
-; RV64ZVE32F-NEXT: add a1, a1, a5
+; RV64ZVE32F-NEXT: slli a6, a6, 3
+; RV64ZVE32F-NEXT: add a1, a1, a6
; RV64ZVE32F-NEXT: sd a3, 0(a1)
; RV64ZVE32F-NEXT: .LBB51_9: # %else14
; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
@@ -5835,19 +5835,19 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV64ZVE32F-NEXT: .LBB51_11: # %cond.store1
; RV64ZVE32F-NEXT: slli s2, s2, 3
; RV64ZVE32F-NEXT: add s2, a1, s2
-; RV64ZVE32F-NEXT: sd s1, 0(s2)
+; RV64ZVE32F-NEXT: sd s0, 0(s2)
; RV64ZVE32F-NEXT: andi a0, a7, 4
; RV64ZVE32F-NEXT: beqz a0, .LBB51_3
; RV64ZVE32F-NEXT: .LBB51_12: # %cond.store3
-; RV64ZVE32F-NEXT: slli s0, s0, 3
-; RV64ZVE32F-NEXT: add s0, a1, s0
-; RV64ZVE32F-NEXT: sd t6, 0(s0)
+; RV64ZVE32F-NEXT: slli s1, s1, 3
+; RV64ZVE32F-NEXT: add s1, a1, s1
+; RV64ZVE32F-NEXT: sd t5, 0(s1)
; RV64ZVE32F-NEXT: andi a0, a7, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB51_4
; RV64ZVE32F-NEXT: .LBB51_13: # %cond.store5
-; RV64ZVE32F-NEXT: slli t5, t5, 3
-; RV64ZVE32F-NEXT: add t5, a1, t5
-; RV64ZVE32F-NEXT: sd t3, 0(t5)
+; RV64ZVE32F-NEXT: slli t6, t6, 3
+; RV64ZVE32F-NEXT: add t6, a1, t6
+; RV64ZVE32F-NEXT: sd t3, 0(t6)
; RV64ZVE32F-NEXT: andi a0, a7, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB51_5
; RV64ZVE32F-NEXT: .LBB51_14: # %cond.store7
@@ -5859,7 +5859,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV64ZVE32F-NEXT: .LBB51_15: # %cond.store9
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
-; RV64ZVE32F-NEXT: sd a6, 0(t2)
+; RV64ZVE32F-NEXT: sd a5, 0(t2)
; RV64ZVE32F-NEXT: andi a0, a7, 64
; RV64ZVE32F-NEXT: beqz a0, .LBB51_7
; RV64ZVE32F-NEXT: .LBB51_16: # %cond.store11
@@ -5972,9 +5972,9 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
;
; RV64ZVE32F-LABEL: mscatter_v4f16:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a1, 24(a0)
-; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: ld a4, 8(a0)
+; RV64ZVE32F-NEXT: ld a2, 16(a0)
+; RV64ZVE32F-NEXT: ld a1, 24(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
@@ -6033,17 +6033,17 @@ define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) {
; RV64ZVE32F-LABEL: mscatter_truemask_v4f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a1, 0(a0)
-; RV64ZVE32F-NEXT: ld a2, 24(a0)
-; RV64ZVE32F-NEXT: ld a3, 8(a0)
-; RV64ZVE32F-NEXT: ld a0, 16(a0)
+; RV64ZVE32F-NEXT: ld a2, 8(a0)
+; RV64ZVE32F-NEXT: ld a3, 16(a0)
+; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a1)
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
-; RV64ZVE32F-NEXT: vse16.v v9, (a3)
+; RV64ZVE32F-NEXT: vse16.v v9, (a2)
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
-; RV64ZVE32F-NEXT: vse16.v v9, (a0)
+; RV64ZVE32F-NEXT: vse16.v v9, (a3)
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
-; RV64ZVE32F-NEXT: vse16.v v8, (a2)
+; RV64ZVE32F-NEXT: vse16.v v8, (a0)
; RV64ZVE32F-NEXT: ret
call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1))
ret void
@@ -6074,37 +6074,37 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
;
; RV64ZVE32F-LABEL: mscatter_v8f16:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a1, 56(a0)
+; RV64ZVE32F-NEXT: ld a3, 40(a0)
; RV64ZVE32F-NEXT: ld a2, 48(a0)
-; RV64ZVE32F-NEXT: ld a4, 40(a0)
-; RV64ZVE32F-NEXT: ld a5, 32(a0)
-; RV64ZVE32F-NEXT: ld a6, 24(a0)
-; RV64ZVE32F-NEXT: ld a7, 16(a0)
+; RV64ZVE32F-NEXT: ld a1, 56(a0)
; RV64ZVE32F-NEXT: ld t0, 8(a0)
+; RV64ZVE32F-NEXT: ld a7, 16(a0)
+; RV64ZVE32F-NEXT: ld a6, 24(a0)
+; RV64ZVE32F-NEXT: ld a5, 32(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a3, v0
-; RV64ZVE32F-NEXT: andi t1, a3, 1
+; RV64ZVE32F-NEXT: vmv.x.s a4, v0
+; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB57_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
-; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB57_10
; RV64ZVE32F-NEXT: .LBB57_2: # %else2
-; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: bnez a0, .LBB57_11
; RV64ZVE32F-NEXT: .LBB57_3: # %else4
-; RV64ZVE32F-NEXT: andi a0, a3, 8
+; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB57_12
; RV64ZVE32F-NEXT: .LBB57_4: # %else6
-; RV64ZVE32F-NEXT: andi a0, a3, 16
+; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB57_13
; RV64ZVE32F-NEXT: .LBB57_5: # %else8
-; RV64ZVE32F-NEXT: andi a0, a3, 32
+; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: bnez a0, .LBB57_14
; RV64ZVE32F-NEXT: .LBB57_6: # %else10
-; RV64ZVE32F-NEXT: andi a0, a3, 64
+; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: bnez a0, .LBB57_15
; RV64ZVE32F-NEXT: .LBB57_7: # %else12
-; RV64ZVE32F-NEXT: andi a0, a3, -128
+; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB57_16
; RV64ZVE32F-NEXT: .LBB57_8: # %else14
; RV64ZVE32F-NEXT: ret
@@ -6112,43 +6112,43 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB57_2
; RV64ZVE32F-NEXT: .LBB57_10: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vse16.v v9, (t0)
-; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: beqz a0, .LBB57_3
; RV64ZVE32F-NEXT: .LBB57_11: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vse16.v v9, (a7)
-; RV64ZVE32F-NEXT: andi a0, a3, 8
+; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB57_4
; RV64ZVE32F-NEXT: .LBB57_12: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
; RV64ZVE32F-NEXT: vse16.v v9, (a6)
-; RV64ZVE32F-NEXT: andi a0, a3, 16
+; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB57_5
; RV64ZVE32F-NEXT: .LBB57_13: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vse16.v v9, (a5)
-; RV64ZVE32F-NEXT: andi a0, a3, 32
+; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB57_6
; RV64ZVE32F-NEXT: .LBB57_14: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-NEXT: vse16.v v9, (a4)
-; RV64ZVE32F-NEXT: andi a0, a3, 64
+; RV64ZVE32F-NEXT: vse16.v v9, (a3)
+; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: beqz a0, .LBB57_7
; RV64ZVE32F-NEXT: .LBB57_15: # %cond.store11
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
; RV64ZVE32F-NEXT: vse16.v v9, (a2)
-; RV64ZVE32F-NEXT: andi a0, a3, -128
+; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB57_8
; RV64ZVE32F-NEXT: .LBB57_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
@@ -6794,9 +6794,9 @@ define void @mscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
;
; RV64ZVE32F-LABEL: mscatter_v4f32:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a1, 24(a0)
-; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: ld a4, 8(a0)
+; RV64ZVE32F-NEXT: ld a2, 16(a0)
+; RV64ZVE32F-NEXT: ld a1, 24(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
@@ -6855,17 +6855,17 @@ define void @mscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) {
; RV64ZVE32F-LABEL: mscatter_truemask_v4f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a1, 0(a0)
-; RV64ZVE32F-NEXT: ld a2, 24(a0)
-; RV64ZVE32F-NEXT: ld a3, 8(a0)
-; RV64ZVE32F-NEXT: ld a0, 16(a0)
+; RV64ZVE32F-NEXT: ld a2, 8(a0)
+; RV64ZVE32F-NEXT: ld a3, 16(a0)
+; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a1)
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
-; RV64ZVE32F-NEXT: vse32.v v9, (a3)
+; RV64ZVE32F-NEXT: vse32.v v9, (a2)
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
-; RV64ZVE32F-NEXT: vse32.v v9, (a0)
+; RV64ZVE32F-NEXT: vse32.v v9, (a3)
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
-; RV64ZVE32F-NEXT: vse32.v v8, (a2)
+; RV64ZVE32F-NEXT: vse32.v v8, (a0)
; RV64ZVE32F-NEXT: ret
call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1))
ret void
@@ -6896,37 +6896,37 @@ define void @mscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
;
; RV64ZVE32F-LABEL: mscatter_v8f32:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a1, 56(a0)
+; RV64ZVE32F-NEXT: ld a3, 40(a0)
; RV64ZVE32F-NEXT: ld a2, 48(a0)
-; RV64ZVE32F-NEXT: ld a4, 40(a0)
-; RV64ZVE32F-NEXT: ld a5, 32(a0)
-; RV64ZVE32F-NEXT: ld a6, 24(a0)
-; RV64ZVE32F-NEXT: ld a7, 16(a0)
+; RV64ZVE32F-NEXT: ld a1, 56(a0)
; RV64ZVE32F-NEXT: ld t0, 8(a0)
+; RV64ZVE32F-NEXT: ld a7, 16(a0)
+; RV64ZVE32F-NEXT: ld a6, 24(a0)
+; RV64ZVE32F-NEXT: ld a5, 32(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a3, v0
-; RV64ZVE32F-NEXT: andi t1, a3, 1
+; RV64ZVE32F-NEXT: vmv.x.s a4, v0
+; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB67_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
-; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB67_10
; RV64ZVE32F-NEXT: .LBB67_2: # %else2
-; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: bnez a0, .LBB67_11
; RV64ZVE32F-NEXT: .LBB67_3: # %else4
-; RV64ZVE32F-NEXT: andi a0, a3, 8
+; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB67_12
; RV64ZVE32F-NEXT: .LBB67_4: # %else6
-; RV64ZVE32F-NEXT: andi a0, a3, 16
+; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB67_13
; RV64ZVE32F-NEXT: .LBB67_5: # %else8
-; RV64ZVE32F-NEXT: andi a0, a3, 32
+; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: bnez a0, .LBB67_14
; RV64ZVE32F-NEXT: .LBB67_6: # %else10
-; RV64ZVE32F-NEXT: andi a0, a3, 64
+; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: bnez a0, .LBB67_15
; RV64ZVE32F-NEXT: .LBB67_7: # %else12
-; RV64ZVE32F-NEXT: andi a0, a3, -128
+; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB67_16
; RV64ZVE32F-NEXT: .LBB67_8: # %else14
; RV64ZVE32F-NEXT: ret
@@ -6934,46 +6934,46 @@ define void @mscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
-; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB67_2
; RV64ZVE32F-NEXT: .LBB67_10: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vse32.v v10, (t0)
-; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: beqz a0, .LBB67_3
; RV64ZVE32F-NEXT: .LBB67_11: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV64ZVE32F-NEXT: vse32.v v10, (a7)
-; RV64ZVE32F-NEXT: andi a0, a3, 8
+; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB67_4
; RV64ZVE32F-NEXT: .LBB67_12: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a6)
-; RV64ZVE32F-NEXT: andi a0, a3, 16
+; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB67_5
; RV64ZVE32F-NEXT: .LBB67_13: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a5)
-; RV64ZVE32F-NEXT: andi a0, a3, 32
+; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB67_6
; RV64ZVE32F-NEXT: .LBB67_14: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64ZVE32F-NEXT: vse32.v v10, (a4)
-; RV64ZVE32F-NEXT: andi a0, a3, 64
+; RV64ZVE32F-NEXT: vse32.v v10, (a3)
+; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: beqz a0, .LBB67_7
; RV64ZVE32F-NEXT: .LBB67_15: # %cond.store11
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: andi a0, a3, -128
+; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB67_8
; RV64ZVE32F-NEXT: .LBB67_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
@@ -8126,9 +8126,9 @@ define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
;
; RV64ZVE32F-LABEL: mscatter_v4f64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a1, 24(a0)
-; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: ld a4, 8(a0)
+; RV64ZVE32F-NEXT: ld a2, 16(a0)
+; RV64ZVE32F-NEXT: ld a1, 24(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
@@ -8317,68 +8317,68 @@ define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
;
; RV64ZVE32F-LABEL: mscatter_v8f64:
; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: ld a1, 56(a0)
+; RV64ZVE32F-NEXT: ld a3, 40(a0)
; RV64ZVE32F-NEXT: ld a2, 48(a0)
-; RV64ZVE32F-NEXT: ld a4, 40(a0)
-; RV64ZVE32F-NEXT: ld a5, 32(a0)
-; RV64ZVE32F-NEXT: ld a6, 24(a0)
-; RV64ZVE32F-NEXT: ld a7, 16(a0)
+; RV64ZVE32F-NEXT: ld a1, 56(a0)
; RV64ZVE32F-NEXT: ld t0, 8(a0)
+; RV64ZVE32F-NEXT: ld a7, 16(a0)
+; RV64ZVE32F-NEXT: ld a6, 24(a0)
+; RV64ZVE32F-NEXT: ld a5, 32(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a3, v0
-; RV64ZVE32F-NEXT: andi t1, a3, 1
+; RV64ZVE32F-NEXT: vmv.x.s a4, v0
+; RV64ZVE32F-NEXT: andi t1, a4, 1
; RV64ZVE32F-NEXT: bnez t1, .LBB80_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
-; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB80_10
; RV64ZVE32F-NEXT: .LBB80_2: # %else2
-; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: bnez a0, .LBB80_11
; RV64ZVE32F-NEXT: .LBB80_3: # %else4
-; RV64ZVE32F-NEXT: andi a0, a3, 8
+; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: bnez a0, .LBB80_12
; RV64ZVE32F-NEXT: .LBB80_4: # %else6
-; RV64ZVE32F-NEXT: andi a0, a3, 16
+; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: bnez a0, .LBB80_13
; RV64ZVE32F-NEXT: .LBB80_5: # %else8
-; RV64ZVE32F-NEXT: andi a0, a3, 32
+; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: bnez a0, .LBB80_14
; RV64ZVE32F-NEXT: .LBB80_6: # %else10
-; RV64ZVE32F-NEXT: andi a0, a3, 64
+; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: bnez a0, .LBB80_15
; RV64ZVE32F-NEXT: .LBB80_7: # %else12
-; RV64ZVE32F-NEXT: andi a0, a3, -128
+; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: bnez a0, .LBB80_16
; RV64ZVE32F-NEXT: .LBB80_8: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB80_9: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV64ZVE32F-NEXT: andi a0, a3, 2
+; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB80_2
; RV64ZVE32F-NEXT: .LBB80_10: # %cond.store1
; RV64ZVE32F-NEXT: fsd fa1, 0(t0)
-; RV64ZVE32F-NEXT: andi a0, a3, 4
+; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: beqz a0, .LBB80_3
; RV64ZVE32F-NEXT: .LBB80_11: # %cond.store3
; RV64ZVE32F-NEXT: fsd fa2, 0(a7)
-; RV64ZVE32F-NEXT: andi a0, a3, 8
+; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB80_4
; RV64ZVE32F-NEXT: .LBB80_12: # %cond.store5
; RV64ZVE32F-NEXT: fsd fa3, 0(a6)
-; RV64ZVE32F-NEXT: andi a0, a3, 16
+; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB80_5
; RV64ZVE32F-NEXT: .LBB80_13: # %cond.store7
; RV64ZVE32F-NEXT: fsd fa4, 0(a5)
-; RV64ZVE32F-NEXT: andi a0, a3, 32
+; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB80_6
; RV64ZVE32F-NEXT: .LBB80_14: # %cond.store9
-; RV64ZVE32F-NEXT: fsd fa5, 0(a4)
-; RV64ZVE32F-NEXT: andi a0, a3, 64
+; RV64ZVE32F-NEXT: fsd fa5, 0(a3)
+; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: beqz a0, .LBB80_7
; RV64ZVE32F-NEXT: .LBB80_15: # %cond.store11
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
-; RV64ZVE32F-NEXT: andi a0, a3, -128
+; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB80_8
; RV64ZVE32F-NEXT: .LBB80_16: # %cond.store13
; RV64ZVE32F-NEXT: fsd fa7, 0(a1)
@@ -10240,10 +10240,10 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx
;
; RV32ZVE32F-LABEL: mscatter_baseidx_v8f64:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: lw a2, 56(a1)
-; RV32ZVE32F-NEXT: lw a3, 48(a1)
-; RV32ZVE32F-NEXT: lw a4, 40(a1)
-; RV32ZVE32F-NEXT: lw a5, 32(a1)
+; RV32ZVE32F-NEXT: lw a2, 32(a1)
+; RV32ZVE32F-NEXT: lw a3, 40(a1)
+; RV32ZVE32F-NEXT: lw a4, 48(a1)
+; RV32ZVE32F-NEXT: lw a5, 56(a1)
; RV32ZVE32F-NEXT: lw a6, 0(a1)
; RV32ZVE32F-NEXT: lw a7, 8(a1)
; RV32ZVE32F-NEXT: lw t0, 16(a1)
@@ -10253,10 +10253,10 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a1
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
-; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
+; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
index e57b6a22dd6eab..9385fa69b2f049 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
@@ -736,18 +736,18 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu
; ZVE32F-NEXT: li a5, 40
; ZVE32F-NEXT: .LBB13_1: # %bb2
; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1
-; ZVE32F-NEXT: ld a6, 8(a1)
-; ZVE32F-NEXT: ld a7, 0(a1)
-; ZVE32F-NEXT: ld t0, 24(a1)
-; ZVE32F-NEXT: ld t1, 16(a1)
+; ZVE32F-NEXT: ld a6, 0(a1)
+; ZVE32F-NEXT: ld a7, 8(a1)
+; ZVE32F-NEXT: ld t0, 16(a1)
+; ZVE32F-NEXT: ld t1, 24(a1)
; ZVE32F-NEXT: mul t2, a4, a5
; ZVE32F-NEXT: add t2, a0, t2
; ZVE32F-NEXT: mul t3, a2, a5
; ZVE32F-NEXT: add t3, a0, t3
-; ZVE32F-NEXT: sd a7, 0(t3)
-; ZVE32F-NEXT: sd a6, 0(t2)
-; ZVE32F-NEXT: sd t1, 80(t3)
-; ZVE32F-NEXT: sd t0, 80(t2)
+; ZVE32F-NEXT: sd a6, 0(t3)
+; ZVE32F-NEXT: sd a7, 0(t2)
+; ZVE32F-NEXT: sd t0, 80(t3)
+; ZVE32F-NEXT: sd t1, 80(t2)
; ZVE32F-NEXT: addi a2, a2, 4
; ZVE32F-NEXT: addi a1, a1, 32
; ZVE32F-NEXT: addi a4, a4, 4
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index ac830b34b5957d..5a880105f68379 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -340,21 +340,21 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset fs0, -48
; CHECK-NOV-NEXT: .cfi_offset fs1, -56
; CHECK-NOV-NEXT: .cfi_offset fs2, -64
-; CHECK-NOV-NEXT: lhu s1, 24(a1)
-; CHECK-NOV-NEXT: lhu s2, 0(a1)
-; CHECK-NOV-NEXT: lhu s3, 8(a1)
-; CHECK-NOV-NEXT: lhu a1, 16(a1)
+; CHECK-NOV-NEXT: lhu s1, 0(a1)
+; CHECK-NOV-NEXT: lhu s2, 8(a1)
+; CHECK-NOV-NEXT: lhu a2, 16(a1)
+; CHECK-NOV-NEXT: lhu s3, 24(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: fmv.w.x fa0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: fcvt.l.s s1, fs2, rtz
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
@@ -524,17 +524,17 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset fs1, -56
; CHECK-NOV-NEXT: .cfi_offset fs2, -64
; CHECK-NOV-NEXT: lhu s1, 0(a1)
-; CHECK-NOV-NEXT: lhu s2, 24(a1)
-; CHECK-NOV-NEXT: lhu s3, 16(a1)
-; CHECK-NOV-NEXT: lhu a1, 8(a1)
+; CHECK-NOV-NEXT: lhu a2, 8(a1)
+; CHECK-NOV-NEXT: lhu s2, 16(a1)
+; CHECK-NOV-NEXT: lhu s3, 24(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: fmv.w.x fa0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs0, fa0
; CHECK-NOV-NEXT: fmv.w.x fa0, s1
@@ -684,21 +684,21 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset fs0, -48
; CHECK-NOV-NEXT: .cfi_offset fs1, -56
; CHECK-NOV-NEXT: .cfi_offset fs2, -64
-; CHECK-NOV-NEXT: lhu s1, 24(a1)
-; CHECK-NOV-NEXT: lhu s2, 0(a1)
-; CHECK-NOV-NEXT: lhu s3, 8(a1)
-; CHECK-NOV-NEXT: lhu a1, 16(a1)
+; CHECK-NOV-NEXT: lhu s1, 0(a1)
+; CHECK-NOV-NEXT: lhu s2, 8(a1)
+; CHECK-NOV-NEXT: lhu a2, 16(a1)
+; CHECK-NOV-NEXT: lhu s3, 24(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: fmv.w.x fa0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: fcvt.l.s s1, fs2, rtz
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
@@ -1196,37 +1196,37 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset fs4, -112
; CHECK-NOV-NEXT: .cfi_offset fs5, -120
; CHECK-NOV-NEXT: .cfi_offset fs6, -128
-; CHECK-NOV-NEXT: lhu s1, 56(a1)
-; CHECK-NOV-NEXT: lhu s2, 0(a1)
-; CHECK-NOV-NEXT: lhu s3, 8(a1)
-; CHECK-NOV-NEXT: lhu s4, 16(a1)
-; CHECK-NOV-NEXT: lhu s5, 24(a1)
-; CHECK-NOV-NEXT: lhu s6, 32(a1)
-; CHECK-NOV-NEXT: lhu s7, 40(a1)
-; CHECK-NOV-NEXT: lhu a1, 48(a1)
+; CHECK-NOV-NEXT: lhu s1, 32(a1)
+; CHECK-NOV-NEXT: lhu s2, 40(a1)
+; CHECK-NOV-NEXT: lhu a2, 48(a1)
+; CHECK-NOV-NEXT: lhu s3, 56(a1)
+; CHECK-NOV-NEXT: lhu s4, 0(a1)
+; CHECK-NOV-NEXT: lhu s5, 8(a1)
+; CHECK-NOV-NEXT: lhu s6, 16(a1)
+; CHECK-NOV-NEXT: lhu s7, 24(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: fmv.w.x fa0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs6, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s7
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs5, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s6
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs4, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s5
+; CHECK-NOV-NEXT: fmv.w.x fa0, s7
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs3, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s4
+; CHECK-NOV-NEXT: fmv.w.x fa0, s6
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s5
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s4
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: fcvt.l.s s1, fs6, rtz
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
@@ -1551,37 +1551,37 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset fs4, -112
; CHECK-NOV-NEXT: .cfi_offset fs5, -120
; CHECK-NOV-NEXT: .cfi_offset fs6, -128
-; CHECK-NOV-NEXT: lhu s1, 0(a1)
-; CHECK-NOV-NEXT: lhu s2, 56(a1)
+; CHECK-NOV-NEXT: lhu s1, 32(a1)
+; CHECK-NOV-NEXT: lhu s2, 40(a1)
; CHECK-NOV-NEXT: lhu s3, 48(a1)
-; CHECK-NOV-NEXT: lhu s4, 40(a1)
-; CHECK-NOV-NEXT: lhu s5, 32(a1)
-; CHECK-NOV-NEXT: lhu s6, 24(a1)
-; CHECK-NOV-NEXT: lhu s7, 16(a1)
-; CHECK-NOV-NEXT: lhu a1, 8(a1)
+; CHECK-NOV-NEXT: lhu s4, 56(a1)
+; CHECK-NOV-NEXT: lhu s5, 0(a1)
+; CHECK-NOV-NEXT: lhu a2, 8(a1)
+; CHECK-NOV-NEXT: lhu s6, 16(a1)
+; CHECK-NOV-NEXT: lhu s7, 24(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: fmv.w.x fa0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs6, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s7
+; CHECK-NOV-NEXT: fmv.w.x fa0, s6
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs5, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s6
+; CHECK-NOV-NEXT: fmv.w.x fa0, s7
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs4, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s5
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs3, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s4
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs2, fa0
; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s4
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s5
; CHECK-NOV-NEXT: fcvt.lu.s s1, fs6, rtz
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fcvt.lu.s a0, fa0, rtz
@@ -1862,37 +1862,37 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset fs4, -112
; CHECK-NOV-NEXT: .cfi_offset fs5, -120
; CHECK-NOV-NEXT: .cfi_offset fs6, -128
-; CHECK-NOV-NEXT: lhu s1, 56(a1)
-; CHECK-NOV-NEXT: lhu s2, 0(a1)
-; CHECK-NOV-NEXT: lhu s3, 8(a1)
-; CHECK-NOV-NEXT: lhu s4, 16(a1)
-; CHECK-NOV-NEXT: lhu s5, 24(a1)
-; CHECK-NOV-NEXT: lhu s6, 32(a1)
-; CHECK-NOV-NEXT: lhu s7, 40(a1)
-; CHECK-NOV-NEXT: lhu a1, 48(a1)
+; CHECK-NOV-NEXT: lhu s1, 32(a1)
+; CHECK-NOV-NEXT: lhu s2, 40(a1)
+; CHECK-NOV-NEXT: lhu a2, 48(a1)
+; CHECK-NOV-NEXT: lhu s3, 56(a1)
+; CHECK-NOV-NEXT: lhu s4, 0(a1)
+; CHECK-NOV-NEXT: lhu s5, 8(a1)
+; CHECK-NOV-NEXT: lhu s6, 16(a1)
+; CHECK-NOV-NEXT: lhu s7, 24(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: fmv.w.x fa0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs6, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s7
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs5, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s6
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs4, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s5
+; CHECK-NOV-NEXT: fmv.w.x fa0, s7
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs3, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s4
+; CHECK-NOV-NEXT: fmv.w.x fa0, s6
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s5
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s4
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: fcvt.l.s s1, fs6, rtz
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
@@ -3669,21 +3669,21 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset fs0, -48
; CHECK-NOV-NEXT: .cfi_offset fs1, -56
; CHECK-NOV-NEXT: .cfi_offset fs2, -64
-; CHECK-NOV-NEXT: lhu s1, 24(a1)
-; CHECK-NOV-NEXT: lhu s2, 0(a1)
-; CHECK-NOV-NEXT: lhu s3, 8(a1)
-; CHECK-NOV-NEXT: lhu a1, 16(a1)
+; CHECK-NOV-NEXT: lhu s1, 0(a1)
+; CHECK-NOV-NEXT: lhu s2, 8(a1)
+; CHECK-NOV-NEXT: lhu a2, 16(a1)
+; CHECK-NOV-NEXT: lhu s3, 24(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: fmv.w.x fa0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: fcvt.l.s s1, fs2, rtz
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
@@ -3851,17 +3851,17 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset fs1, -56
; CHECK-NOV-NEXT: .cfi_offset fs2, -64
; CHECK-NOV-NEXT: lhu s1, 0(a1)
-; CHECK-NOV-NEXT: lhu s2, 24(a1)
-; CHECK-NOV-NEXT: lhu s3, 16(a1)
-; CHECK-NOV-NEXT: lhu a1, 8(a1)
+; CHECK-NOV-NEXT: lhu a2, 8(a1)
+; CHECK-NOV-NEXT: lhu s2, 16(a1)
+; CHECK-NOV-NEXT: lhu s3, 24(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: fmv.w.x fa0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs0, fa0
; CHECK-NOV-NEXT: fmv.w.x fa0, s1
@@ -4010,21 +4010,21 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset fs0, -48
; CHECK-NOV-NEXT: .cfi_offset fs1, -56
; CHECK-NOV-NEXT: .cfi_offset fs2, -64
-; CHECK-NOV-NEXT: lhu s1, 24(a1)
-; CHECK-NOV-NEXT: lhu s2, 0(a1)
-; CHECK-NOV-NEXT: lhu s3, 8(a1)
-; CHECK-NOV-NEXT: lhu a1, 16(a1)
+; CHECK-NOV-NEXT: lhu s1, 0(a1)
+; CHECK-NOV-NEXT: lhu s2, 8(a1)
+; CHECK-NOV-NEXT: lhu a2, 16(a1)
+; CHECK-NOV-NEXT: lhu s3, 24(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: fmv.w.x fa0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: fcvt.l.s s1, fs2, rtz
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
@@ -4510,37 +4510,37 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset fs4, -112
; CHECK-NOV-NEXT: .cfi_offset fs5, -120
; CHECK-NOV-NEXT: .cfi_offset fs6, -128
-; CHECK-NOV-NEXT: lhu s1, 56(a1)
-; CHECK-NOV-NEXT: lhu s2, 0(a1)
-; CHECK-NOV-NEXT: lhu s3, 8(a1)
-; CHECK-NOV-NEXT: lhu s4, 16(a1)
-; CHECK-NOV-NEXT: lhu s5, 24(a1)
-; CHECK-NOV-NEXT: lhu s6, 32(a1)
-; CHECK-NOV-NEXT: lhu s7, 40(a1)
-; CHECK-NOV-NEXT: lhu a1, 48(a1)
+; CHECK-NOV-NEXT: lhu s1, 32(a1)
+; CHECK-NOV-NEXT: lhu s2, 40(a1)
+; CHECK-NOV-NEXT: lhu a2, 48(a1)
+; CHECK-NOV-NEXT: lhu s3, 56(a1)
+; CHECK-NOV-NEXT: lhu s4, 0(a1)
+; CHECK-NOV-NEXT: lhu s5, 8(a1)
+; CHECK-NOV-NEXT: lhu s6, 16(a1)
+; CHECK-NOV-NEXT: lhu s7, 24(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: fmv.w.x fa0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs6, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s7
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs5, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s6
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs4, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s5
+; CHECK-NOV-NEXT: fmv.w.x fa0, s7
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs3, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s4
+; CHECK-NOV-NEXT: fmv.w.x fa0, s6
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s5
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s4
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: fcvt.l.s s1, fs6, rtz
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
@@ -4863,37 +4863,37 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset fs4, -112
; CHECK-NOV-NEXT: .cfi_offset fs5, -120
; CHECK-NOV-NEXT: .cfi_offset fs6, -128
-; CHECK-NOV-NEXT: lhu s1, 0(a1)
-; CHECK-NOV-NEXT: lhu s2, 56(a1)
+; CHECK-NOV-NEXT: lhu s1, 32(a1)
+; CHECK-NOV-NEXT: lhu s2, 40(a1)
; CHECK-NOV-NEXT: lhu s3, 48(a1)
-; CHECK-NOV-NEXT: lhu s4, 40(a1)
-; CHECK-NOV-NEXT: lhu s5, 32(a1)
-; CHECK-NOV-NEXT: lhu s6, 24(a1)
-; CHECK-NOV-NEXT: lhu s7, 16(a1)
-; CHECK-NOV-NEXT: lhu a1, 8(a1)
+; CHECK-NOV-NEXT: lhu s4, 56(a1)
+; CHECK-NOV-NEXT: lhu s5, 0(a1)
+; CHECK-NOV-NEXT: lhu a2, 8(a1)
+; CHECK-NOV-NEXT: lhu s6, 16(a1)
+; CHECK-NOV-NEXT: lhu s7, 24(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: fmv.w.x fa0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs6, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s7
+; CHECK-NOV-NEXT: fmv.w.x fa0, s6
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs5, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s6
+; CHECK-NOV-NEXT: fmv.w.x fa0, s7
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs4, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s5
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs3, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s4
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs2, fa0
; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s4
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s5
; CHECK-NOV-NEXT: fcvt.lu.s s1, fs6, rtz
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fcvt.lu.s a0, fa0, rtz
@@ -5173,37 +5173,37 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: .cfi_offset fs4, -112
; CHECK-NOV-NEXT: .cfi_offset fs5, -120
; CHECK-NOV-NEXT: .cfi_offset fs6, -128
-; CHECK-NOV-NEXT: lhu s1, 56(a1)
-; CHECK-NOV-NEXT: lhu s2, 0(a1)
-; CHECK-NOV-NEXT: lhu s3, 8(a1)
-; CHECK-NOV-NEXT: lhu s4, 16(a1)
-; CHECK-NOV-NEXT: lhu s5, 24(a1)
-; CHECK-NOV-NEXT: lhu s6, 32(a1)
-; CHECK-NOV-NEXT: lhu s7, 40(a1)
-; CHECK-NOV-NEXT: lhu a1, 48(a1)
+; CHECK-NOV-NEXT: lhu s1, 32(a1)
+; CHECK-NOV-NEXT: lhu s2, 40(a1)
+; CHECK-NOV-NEXT: lhu a2, 48(a1)
+; CHECK-NOV-NEXT: lhu s3, 56(a1)
+; CHECK-NOV-NEXT: lhu s4, 0(a1)
+; CHECK-NOV-NEXT: lhu s5, 8(a1)
+; CHECK-NOV-NEXT: lhu s6, 16(a1)
+; CHECK-NOV-NEXT: lhu s7, 24(a1)
; CHECK-NOV-NEXT: mv s0, a0
-; CHECK-NOV-NEXT: fmv.w.x fa0, a1
+; CHECK-NOV-NEXT: fmv.w.x fa0, a2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs6, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s7
+; CHECK-NOV-NEXT: fmv.w.x fa0, s2
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs5, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s6
+; CHECK-NOV-NEXT: fmv.w.x fa0, s1
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs4, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s5
+; CHECK-NOV-NEXT: fmv.w.x fa0, s7
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs3, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s4
+; CHECK-NOV-NEXT: fmv.w.x fa0, s6
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs2, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s3
+; CHECK-NOV-NEXT: fmv.w.x fa0, s5
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs1, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s2
+; CHECK-NOV-NEXT: fmv.w.x fa0, s4
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fmv.s fs0, fa0
-; CHECK-NOV-NEXT: fmv.w.x fa0, s1
+; CHECK-NOV-NEXT: fmv.w.x fa0, s3
; CHECK-NOV-NEXT: fcvt.l.s s1, fs6, rtz
; CHECK-NOV-NEXT: call __extendhfsf2
; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz
diff --git a/llvm/test/CodeGen/RISCV/scmp.ll b/llvm/test/CodeGen/RISCV/scmp.ll
index e79b6989410a6c..a212714db53e09 100644
--- a/llvm/test/CodeGen/RISCV/scmp.ll
+++ b/llvm/test/CodeGen/RISCV/scmp.ll
@@ -87,10 +87,10 @@ define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
; RV32I-LABEL: scmp.8.128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a2, 4(a1)
-; RV32I-NEXT: lw a3, 4(a0)
; RV32I-NEXT: lw a4, 8(a1)
; RV32I-NEXT: lw a5, 12(a1)
; RV32I-NEXT: lw a6, 12(a0)
+; RV32I-NEXT: lw a3, 4(a0)
; RV32I-NEXT: lw a7, 8(a0)
; RV32I-NEXT: beq a6, a5, .LBB4_2
; RV32I-NEXT: # %bb.1:
diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll
index 5ba8755201ddf5..dcc99ebaa5514b 100644
--- a/llvm/test/CodeGen/RISCV/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/shifts.ll
@@ -171,21 +171,21 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: add a1, a3, a1
; RV32I-NEXT: lw a3, 0(a1)
; RV32I-NEXT: lw a4, 4(a1)
+; RV32I-NEXT: lw a5, 8(a1)
+; RV32I-NEXT: lw a1, 12(a1)
; RV32I-NEXT: srl a3, a3, a2
-; RV32I-NEXT: slli a5, a4, 1
-; RV32I-NEXT: andi a6, a2, 31
-; RV32I-NEXT: xori a6, a6, 31
-; RV32I-NEXT: lw a7, 8(a1)
-; RV32I-NEXT: sll a5, a5, a6
-; RV32I-NEXT: or a3, a3, a5
+; RV32I-NEXT: slli a6, a4, 1
+; RV32I-NEXT: andi a7, a2, 31
+; RV32I-NEXT: xori a7, a7, 31
+; RV32I-NEXT: sll a6, a6, a7
+; RV32I-NEXT: or a3, a3, a6
; RV32I-NEXT: srl a4, a4, a2
-; RV32I-NEXT: slli a5, a7, 1
-; RV32I-NEXT: lw a1, 12(a1)
-; RV32I-NEXT: sll a5, a5, a6
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: srl a5, a7, a2
-; RV32I-NEXT: slli a7, a1, 1
-; RV32I-NEXT: sll a6, a7, a6
+; RV32I-NEXT: slli a6, a5, 1
+; RV32I-NEXT: sll a6, a6, a7
+; RV32I-NEXT: or a4, a4, a6
+; RV32I-NEXT: srl a5, a5, a2
+; RV32I-NEXT: slli a6, a1, 1
+; RV32I-NEXT: sll a6, a6, a7
; RV32I-NEXT: or a5, a5, a6
; RV32I-NEXT: srl a1, a1, a2
; RV32I-NEXT: sw a1, 12(a0)
@@ -221,41 +221,41 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
; RV32I-LABEL: ashr128:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -32
+; RV32I-NEXT: lw a3, 8(a1)
+; RV32I-NEXT: lw a4, 12(a1)
+; RV32I-NEXT: lw a5, 0(a1)
+; RV32I-NEXT: lw a1, 4(a1)
; RV32I-NEXT: lw a2, 0(a2)
-; RV32I-NEXT: lw a3, 12(a1)
-; RV32I-NEXT: lw a4, 8(a1)
-; RV32I-NEXT: lw a5, 4(a1)
-; RV32I-NEXT: lw a1, 0(a1)
-; RV32I-NEXT: sw a3, 12(sp)
-; RV32I-NEXT: sw a4, 8(sp)
-; RV32I-NEXT: sw a5, 4(sp)
-; RV32I-NEXT: sw a1, 0(sp)
-; RV32I-NEXT: srai a3, a3, 31
-; RV32I-NEXT: sw a3, 28(sp)
-; RV32I-NEXT: sw a3, 24(sp)
-; RV32I-NEXT: sw a3, 20(sp)
-; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a4, 12(sp)
+; RV32I-NEXT: sw a3, 8(sp)
+; RV32I-NEXT: sw a1, 4(sp)
+; RV32I-NEXT: sw a5, 0(sp)
+; RV32I-NEXT: srai a4, a4, 31
+; RV32I-NEXT: sw a4, 28(sp)
+; RV32I-NEXT: sw a4, 24(sp)
+; RV32I-NEXT: sw a4, 20(sp)
+; RV32I-NEXT: sw a4, 16(sp)
; RV32I-NEXT: srli a1, a2, 3
; RV32I-NEXT: andi a1, a1, 12
; RV32I-NEXT: mv a3, sp
; RV32I-NEXT: add a1, a3, a1
; RV32I-NEXT: lw a3, 0(a1)
; RV32I-NEXT: lw a4, 4(a1)
+; RV32I-NEXT: lw a5, 8(a1)
+; RV32I-NEXT: lw a1, 12(a1)
; RV32I-NEXT: srl a3, a3, a2
-; RV32I-NEXT: slli a5, a4, 1
-; RV32I-NEXT: andi a6, a2, 31
-; RV32I-NEXT: xori a6, a6, 31
-; RV32I-NEXT: lw a7, 8(a1)
-; RV32I-NEXT: sll a5, a5, a6
-; RV32I-NEXT: or a3, a3, a5
+; RV32I-NEXT: slli a6, a4, 1
+; RV32I-NEXT: andi a7, a2, 31
+; RV32I-NEXT: xori a7, a7, 31
+; RV32I-NEXT: sll a6, a6, a7
+; RV32I-NEXT: or a3, a3, a6
; RV32I-NEXT: srl a4, a4, a2
-; RV32I-NEXT: slli a5, a7, 1
-; RV32I-NEXT: lw a1, 12(a1)
-; RV32I-NEXT: sll a5, a5, a6
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: srl a5, a7, a2
-; RV32I-NEXT: slli a7, a1, 1
-; RV32I-NEXT: sll a6, a7, a6
+; RV32I-NEXT: slli a6, a5, 1
+; RV32I-NEXT: sll a6, a6, a7
+; RV32I-NEXT: or a4, a4, a6
+; RV32I-NEXT: srl a5, a5, a2
+; RV32I-NEXT: slli a6, a1, 1
+; RV32I-NEXT: sll a6, a6, a7
; RV32I-NEXT: or a5, a5, a6
; RV32I-NEXT: sra a1, a1, a2
; RV32I-NEXT: sw a1, 12(a0)
@@ -310,27 +310,27 @@ define i128 @shl128(i128 %a, i128 %b) nounwind {
; RV32I-NEXT: sub a3, a3, a1
; RV32I-NEXT: lw a1, 4(a3)
; RV32I-NEXT: lw a4, 0(a3)
-; RV32I-NEXT: sll a5, a1, a2
-; RV32I-NEXT: srli a6, a4, 1
-; RV32I-NEXT: andi a7, a2, 31
-; RV32I-NEXT: lw t0, 8(a3)
-; RV32I-NEXT: xori a7, a7, 31
-; RV32I-NEXT: srl a6, a6, a7
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: sll a6, t0, a2
+; RV32I-NEXT: lw a5, 8(a3)
; RV32I-NEXT: lw a3, 12(a3)
+; RV32I-NEXT: sll a6, a1, a2
+; RV32I-NEXT: srli a7, a4, 1
+; RV32I-NEXT: andi t0, a2, 31
+; RV32I-NEXT: xori t0, t0, 31
+; RV32I-NEXT: srl a7, a7, t0
+; RV32I-NEXT: or a6, a6, a7
+; RV32I-NEXT: sll a7, a5, a2
; RV32I-NEXT: srli a1, a1, 1
-; RV32I-NEXT: srl a1, a1, a7
-; RV32I-NEXT: or a1, a6, a1
+; RV32I-NEXT: srl a1, a1, t0
+; RV32I-NEXT: or a1, a7, a1
; RV32I-NEXT: sll a3, a3, a2
-; RV32I-NEXT: srli a6, t0, 1
-; RV32I-NEXT: srl a6, a6, a7
-; RV32I-NEXT: or a3, a3, a6
+; RV32I-NEXT: srli a5, a5, 1
+; RV32I-NEXT: srl a5, a5, t0
+; RV32I-NEXT: or a3, a3, a5
; RV32I-NEXT: sll a2, a4, a2
; RV32I-NEXT: sw a2, 0(a0)
; RV32I-NEXT: sw a3, 12(a0)
; RV32I-NEXT: sw a1, 8(a0)
-; RV32I-NEXT: sw a5, 4(a0)
+; RV32I-NEXT: sw a6, 4(a0)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
@@ -394,10 +394,10 @@ define i64 @fshr64_minsize(i64 %a, i64 %b) minsize nounwind {
define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind {
; RV32I-LABEL: fshr128_minsize:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a3, 8(a1)
-; RV32I-NEXT: lw t2, 0(a1)
; RV32I-NEXT: lw a2, 0(a2)
+; RV32I-NEXT: lw t2, 0(a1)
; RV32I-NEXT: lw a7, 4(a1)
+; RV32I-NEXT: lw a3, 8(a1)
; RV32I-NEXT: lw a1, 12(a1)
; RV32I-NEXT: andi t1, a2, 64
; RV32I-NEXT: mv t0, a7
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 162f7e34536a7c..5d00e90366c3be 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -308,22 +308,22 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
-; RV32-NEXT: lbu a0, 12(a0)
-; RV32-NEXT: lw a1, 8(s0)
-; RV32-NEXT: slli a2, a0, 30
-; RV32-NEXT: lw a3, 4(s0)
-; RV32-NEXT: srli s1, a1, 2
-; RV32-NEXT: or s1, s1, a2
-; RV32-NEXT: slli a2, a1, 31
-; RV32-NEXT: srli a4, a3, 1
-; RV32-NEXT: or s2, a4, a2
-; RV32-NEXT: srli a0, a0, 2
-; RV32-NEXT: slli a0, a0, 31
-; RV32-NEXT: srai s3, a0, 31
-; RV32-NEXT: srli a1, a1, 1
+; RV32-NEXT: lbu a1, 12(a0)
+; RV32-NEXT: lw a2, 8(a0)
+; RV32-NEXT: lw a3, 4(a0)
+; RV32-NEXT: lw a0, 0(a0)
+; RV32-NEXT: slli a4, a1, 30
+; RV32-NEXT: srli s1, a2, 2
+; RV32-NEXT: or s1, s1, a4
+; RV32-NEXT: slli a4, a2, 31
+; RV32-NEXT: srli a5, a3, 1
+; RV32-NEXT: or s2, a5, a4
+; RV32-NEXT: srli a1, a1, 2
; RV32-NEXT: slli a1, a1, 31
-; RV32-NEXT: lw a0, 0(s0)
-; RV32-NEXT: srai s4, a1, 31
+; RV32-NEXT: srai s3, a1, 31
+; RV32-NEXT: srli a2, a2, 1
+; RV32-NEXT: slli a2, a2, 31
+; RV32-NEXT: srai s4, a2, 31
; RV32-NEXT: slli a1, a3, 31
; RV32-NEXT: srai a1, a1, 31
; RV32-NEXT: li a2, 6
@@ -389,8 +389,8 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64-NEXT: mv s0, a0
; RV64-NEXT: lbu a0, 12(a0)
; RV64-NEXT: lwu a1, 8(s0)
-; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: ld a2, 0(s0)
+; RV64-NEXT: slli a0, a0, 32
; RV64-NEXT: or a0, a1, a0
; RV64-NEXT: slli a0, a0, 29
; RV64-NEXT: srai s1, a0, 31
@@ -460,22 +460,22 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32M-NEXT: sw s5, 4(sp) # 4-byte Folded Spill
; RV32M-NEXT: sw s6, 0(sp) # 4-byte Folded Spill
; RV32M-NEXT: mv s0, a0
-; RV32M-NEXT: lbu a0, 12(a0)
-; RV32M-NEXT: lw a1, 8(s0)
-; RV32M-NEXT: slli a2, a0, 30
-; RV32M-NEXT: lw a3, 4(s0)
-; RV32M-NEXT: srli s1, a1, 2
-; RV32M-NEXT: or s1, s1, a2
-; RV32M-NEXT: slli a2, a1, 31
-; RV32M-NEXT: srli a4, a3, 1
-; RV32M-NEXT: or s2, a4, a2
-; RV32M-NEXT: srli a0, a0, 2
-; RV32M-NEXT: slli a0, a0, 31
-; RV32M-NEXT: srai s3, a0, 31
-; RV32M-NEXT: srli a1, a1, 1
+; RV32M-NEXT: lbu a1, 12(a0)
+; RV32M-NEXT: lw a2, 8(a0)
+; RV32M-NEXT: lw a3, 4(a0)
+; RV32M-NEXT: lw a0, 0(a0)
+; RV32M-NEXT: slli a4, a1, 30
+; RV32M-NEXT: srli s1, a2, 2
+; RV32M-NEXT: or s1, s1, a4
+; RV32M-NEXT: slli a4, a2, 31
+; RV32M-NEXT: srli a5, a3, 1
+; RV32M-NEXT: or s2, a5, a4
+; RV32M-NEXT: srli a1, a1, 2
; RV32M-NEXT: slli a1, a1, 31
-; RV32M-NEXT: lw a0, 0(s0)
-; RV32M-NEXT: srai s4, a1, 31
+; RV32M-NEXT: srai s3, a1, 31
+; RV32M-NEXT: srli a2, a2, 1
+; RV32M-NEXT: slli a2, a2, 31
+; RV32M-NEXT: srai s4, a2, 31
; RV32M-NEXT: slli a1, a3, 31
; RV32M-NEXT: srai a1, a1, 31
; RV32M-NEXT: li a2, 6
@@ -534,34 +534,34 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64M: # %bb.0:
; RV64M-NEXT: ld a1, 0(a0)
; RV64M-NEXT: lwu a2, 8(a0)
-; RV64M-NEXT: srli a3, a1, 2
-; RV64M-NEXT: lbu a4, 12(a0)
+; RV64M-NEXT: lbu a3, 12(a0)
+; RV64M-NEXT: srli a4, a1, 2
; RV64M-NEXT: slli a5, a2, 62
-; RV64M-NEXT: or a3, a5, a3
-; RV64M-NEXT: srai a3, a3, 31
-; RV64M-NEXT: slli a4, a4, 32
-; RV64M-NEXT: or a2, a2, a4
+; RV64M-NEXT: or a4, a5, a4
+; RV64M-NEXT: srai a4, a4, 31
+; RV64M-NEXT: slli a3, a3, 32
+; RV64M-NEXT: or a2, a2, a3
; RV64M-NEXT: slli a2, a2, 29
-; RV64M-NEXT: lui a4, %hi(.LCPI3_0)
-; RV64M-NEXT: ld a4, %lo(.LCPI3_0)(a4)
+; RV64M-NEXT: lui a3, %hi(.LCPI3_0)
+; RV64M-NEXT: ld a3, %lo(.LCPI3_0)(a3)
; RV64M-NEXT: srai a2, a2, 31
; RV64M-NEXT: slli a1, a1, 31
; RV64M-NEXT: srai a1, a1, 31
-; RV64M-NEXT: mulh a4, a2, a4
-; RV64M-NEXT: srli a5, a4, 63
-; RV64M-NEXT: srai a4, a4, 1
-; RV64M-NEXT: add a4, a4, a5
+; RV64M-NEXT: mulh a3, a2, a3
+; RV64M-NEXT: srli a5, a3, 63
+; RV64M-NEXT: srai a3, a3, 1
+; RV64M-NEXT: add a3, a3, a5
; RV64M-NEXT: lui a5, %hi(.LCPI3_1)
; RV64M-NEXT: ld a5, %lo(.LCPI3_1)(a5)
-; RV64M-NEXT: add a2, a2, a4
-; RV64M-NEXT: slli a4, a4, 2
-; RV64M-NEXT: add a2, a2, a4
-; RV64M-NEXT: mulh a4, a3, a5
-; RV64M-NEXT: srli a5, a4, 63
-; RV64M-NEXT: srai a4, a4, 1
-; RV64M-NEXT: add a4, a4, a5
-; RV64M-NEXT: slli a5, a4, 3
-; RV64M-NEXT: add a3, a3, a4
+; RV64M-NEXT: add a2, a2, a3
+; RV64M-NEXT: slli a3, a3, 2
+; RV64M-NEXT: add a2, a2, a3
+; RV64M-NEXT: mulh a3, a4, a5
+; RV64M-NEXT: srli a5, a3, 63
+; RV64M-NEXT: srai a3, a3, 1
+; RV64M-NEXT: add a3, a3, a5
+; RV64M-NEXT: slli a5, a3, 3
+; RV64M-NEXT: add a3, a4, a3
; RV64M-NEXT: sub a3, a3, a5
; RV64M-NEXT: addi a3, a3, -1
; RV64M-NEXT: seqz a3, a3
@@ -610,22 +610,22 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV32MV-NEXT: slli a1, a1, 1
; RV32MV-NEXT: sub sp, sp, a1
; RV32MV-NEXT: mv s0, a0
-; RV32MV-NEXT: lbu a0, 12(a0)
-; RV32MV-NEXT: lw a1, 8(s0)
-; RV32MV-NEXT: slli a2, a0, 30
-; RV32MV-NEXT: lw a3, 4(s0)
-; RV32MV-NEXT: srli s1, a1, 2
-; RV32MV-NEXT: or s1, s1, a2
-; RV32MV-NEXT: slli a2, a1, 31
-; RV32MV-NEXT: srli a4, a3, 1
-; RV32MV-NEXT: or s2, a4, a2
-; RV32MV-NEXT: srli a0, a0, 2
-; RV32MV-NEXT: slli a0, a0, 31
-; RV32MV-NEXT: srai s3, a0, 31
-; RV32MV-NEXT: srli a1, a1, 1
+; RV32MV-NEXT: lbu a1, 12(a0)
+; RV32MV-NEXT: lw a2, 8(a0)
+; RV32MV-NEXT: lw a3, 4(a0)
+; RV32MV-NEXT: lw a0, 0(a0)
+; RV32MV-NEXT: slli a4, a1, 30
+; RV32MV-NEXT: srli s1, a2, 2
+; RV32MV-NEXT: or s1, s1, a4
+; RV32MV-NEXT: slli a4, a2, 31
+; RV32MV-NEXT: srli a5, a3, 1
+; RV32MV-NEXT: or s2, a5, a4
+; RV32MV-NEXT: srli a1, a1, 2
; RV32MV-NEXT: slli a1, a1, 31
-; RV32MV-NEXT: srai s4, a1, 31
-; RV32MV-NEXT: lw a0, 0(s0)
+; RV32MV-NEXT: srai s3, a1, 31
+; RV32MV-NEXT: srli a2, a2, 1
+; RV32MV-NEXT: slli a2, a2, 31
+; RV32MV-NEXT: srai s4, a2, 31
; RV32MV-NEXT: slli a1, a3, 31
; RV32MV-NEXT: srai a1, a1, 31
; RV32MV-NEXT: li a2, 1
@@ -728,8 +728,8 @@ define void @test_srem_vec(ptr %X) nounwind {
; RV64MV: # %bb.0:
; RV64MV-NEXT: lbu a1, 12(a0)
; RV64MV-NEXT: lwu a2, 8(a0)
-; RV64MV-NEXT: slli a1, a1, 32
; RV64MV-NEXT: ld a3, 0(a0)
+; RV64MV-NEXT: slli a1, a1, 32
; RV64MV-NEXT: or a1, a2, a1
; RV64MV-NEXT: slli a1, a1, 29
; RV64MV-NEXT: srai a1, a1, 31
diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
index 7fc4713ac2d6e1..90443051d4b574 100644
--- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
@@ -18,29 +18,29 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lh s0, 12(a1)
-; RV32I-NEXT: lh s1, 8(a1)
-; RV32I-NEXT: lh s2, 4(a1)
; RV32I-NEXT: lh a2, 0(a1)
+; RV32I-NEXT: lh s0, 4(a1)
+; RV32I-NEXT: lh s1, 8(a1)
+; RV32I-NEXT: lh s2, 12(a1)
; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: li a1, 95
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: call __modsi3
; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: li a1, -124
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __modsi3
-; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: li a1, 98
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __modsi3
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: li a1, -1003
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __modsi3
; RV32I-NEXT: sh a0, 6(s3)
; RV32I-NEXT: sh s1, 4(s3)
-; RV32I-NEXT: sh s2, 2(s3)
+; RV32I-NEXT: sh s0, 2(s3)
; RV32I-NEXT: sh s4, 0(s3)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -53,52 +53,52 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
;
; RV32IM-LABEL: fold_srem_vec_1:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: lh a2, 12(a1)
-; RV32IM-NEXT: lh a3, 8(a1)
-; RV32IM-NEXT: lh a4, 0(a1)
-; RV32IM-NEXT: lh a1, 4(a1)
+; RV32IM-NEXT: lh a2, 0(a1)
+; RV32IM-NEXT: lh a3, 4(a1)
+; RV32IM-NEXT: lh a4, 8(a1)
+; RV32IM-NEXT: lh a1, 12(a1)
; RV32IM-NEXT: lui a5, 706409
; RV32IM-NEXT: addi a5, a5, 389
-; RV32IM-NEXT: mulh a5, a4, a5
-; RV32IM-NEXT: add a5, a5, a4
+; RV32IM-NEXT: mulh a5, a2, a5
+; RV32IM-NEXT: add a5, a5, a2
; RV32IM-NEXT: srli a6, a5, 31
; RV32IM-NEXT: srli a5, a5, 6
; RV32IM-NEXT: add a5, a5, a6
; RV32IM-NEXT: li a6, 95
; RV32IM-NEXT: mul a5, a5, a6
-; RV32IM-NEXT: sub a4, a4, a5
+; RV32IM-NEXT: sub a2, a2, a5
; RV32IM-NEXT: lui a5, 507375
; RV32IM-NEXT: addi a5, a5, 1981
-; RV32IM-NEXT: mulh a5, a1, a5
-; RV32IM-NEXT: sub a5, a5, a1
+; RV32IM-NEXT: mulh a5, a3, a5
+; RV32IM-NEXT: sub a5, a5, a3
; RV32IM-NEXT: srli a6, a5, 31
; RV32IM-NEXT: srli a5, a5, 6
; RV32IM-NEXT: add a5, a5, a6
; RV32IM-NEXT: li a6, -124
; RV32IM-NEXT: mul a5, a5, a6
-; RV32IM-NEXT: sub a1, a1, a5
+; RV32IM-NEXT: sub a3, a3, a5
; RV32IM-NEXT: lui a5, 342392
; RV32IM-NEXT: addi a5, a5, 669
-; RV32IM-NEXT: mulh a5, a3, a5
+; RV32IM-NEXT: mulh a5, a4, a5
; RV32IM-NEXT: srli a6, a5, 31
; RV32IM-NEXT: srli a5, a5, 5
; RV32IM-NEXT: add a5, a5, a6
; RV32IM-NEXT: li a6, 98
; RV32IM-NEXT: mul a5, a5, a6
-; RV32IM-NEXT: sub a3, a3, a5
+; RV32IM-NEXT: sub a4, a4, a5
; RV32IM-NEXT: lui a5, 780943
; RV32IM-NEXT: addi a5, a5, 1809
-; RV32IM-NEXT: mulh a5, a2, a5
+; RV32IM-NEXT: mulh a5, a1, a5
; RV32IM-NEXT: srli a6, a5, 31
; RV32IM-NEXT: srli a5, a5, 8
; RV32IM-NEXT: add a5, a5, a6
; RV32IM-NEXT: li a6, -1003
; RV32IM-NEXT: mul a5, a5, a6
-; RV32IM-NEXT: sub a2, a2, a5
-; RV32IM-NEXT: sh a2, 6(a0)
-; RV32IM-NEXT: sh a3, 4(a0)
-; RV32IM-NEXT: sh a1, 2(a0)
-; RV32IM-NEXT: sh a4, 0(a0)
+; RV32IM-NEXT: sub a1, a1, a5
+; RV32IM-NEXT: sh a1, 6(a0)
+; RV32IM-NEXT: sh a4, 4(a0)
+; RV32IM-NEXT: sh a3, 2(a0)
+; RV32IM-NEXT: sh a2, 0(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: fold_srem_vec_1:
@@ -110,29 +110,29 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lh s0, 24(a1)
-; RV64I-NEXT: lh s1, 16(a1)
-; RV64I-NEXT: lh s2, 8(a1)
; RV64I-NEXT: lh a2, 0(a1)
+; RV64I-NEXT: lh s0, 8(a1)
+; RV64I-NEXT: lh s1, 16(a1)
+; RV64I-NEXT: lh s2, 24(a1)
; RV64I-NEXT: mv s3, a0
; RV64I-NEXT: li a1, 95
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: call __moddi3
; RV64I-NEXT: mv s4, a0
; RV64I-NEXT: li a1, -124
-; RV64I-NEXT: mv a0, s2
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __moddi3
-; RV64I-NEXT: mv s2, a0
+; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: li a1, 98
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __moddi3
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: li a1, -1003
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __moddi3
; RV64I-NEXT: sh a0, 6(s3)
; RV64I-NEXT: sh s1, 4(s3)
-; RV64I-NEXT: sh s2, 2(s3)
+; RV64I-NEXT: sh s0, 2(s3)
; RV64I-NEXT: sh s4, 0(s3)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -145,52 +145,52 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
;
; RV64IM-LABEL: fold_srem_vec_1:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: lh a2, 0(a1)
-; RV64IM-NEXT: lui a3, %hi(.LCPI0_0)
-; RV64IM-NEXT: ld a3, %lo(.LCPI0_0)(a3)
-; RV64IM-NEXT: lh a4, 24(a1)
+; RV64IM-NEXT: lui a2, %hi(.LCPI0_0)
+; RV64IM-NEXT: ld a2, %lo(.LCPI0_0)(a2)
+; RV64IM-NEXT: lh a3, 0(a1)
+; RV64IM-NEXT: lh a4, 8(a1)
; RV64IM-NEXT: lh a5, 16(a1)
-; RV64IM-NEXT: lh a1, 8(a1)
-; RV64IM-NEXT: mulh a3, a2, a3
-; RV64IM-NEXT: add a3, a3, a2
-; RV64IM-NEXT: srli a6, a3, 63
-; RV64IM-NEXT: srli a3, a3, 6
-; RV64IM-NEXT: add a3, a3, a6
+; RV64IM-NEXT: lh a1, 24(a1)
+; RV64IM-NEXT: mulh a2, a3, a2
+; RV64IM-NEXT: add a2, a2, a3
+; RV64IM-NEXT: srli a6, a2, 63
+; RV64IM-NEXT: srli a2, a2, 6
+; RV64IM-NEXT: add a2, a2, a6
; RV64IM-NEXT: lui a6, %hi(.LCPI0_1)
; RV64IM-NEXT: ld a6, %lo(.LCPI0_1)(a6)
; RV64IM-NEXT: li a7, 95
-; RV64IM-NEXT: mul a3, a3, a7
-; RV64IM-NEXT: subw a2, a2, a3
-; RV64IM-NEXT: mulh a3, a1, a6
-; RV64IM-NEXT: sub a3, a3, a1
-; RV64IM-NEXT: srli a6, a3, 63
-; RV64IM-NEXT: srli a3, a3, 6
-; RV64IM-NEXT: add a3, a3, a6
+; RV64IM-NEXT: mul a2, a2, a7
+; RV64IM-NEXT: subw a3, a3, a2
+; RV64IM-NEXT: mulh a2, a4, a6
+; RV64IM-NEXT: sub a2, a2, a4
+; RV64IM-NEXT: srli a6, a2, 63
+; RV64IM-NEXT: srli a2, a2, 6
+; RV64IM-NEXT: add a2, a2, a6
; RV64IM-NEXT: lui a6, %hi(.LCPI0_2)
; RV64IM-NEXT: ld a6, %lo(.LCPI0_2)(a6)
; RV64IM-NEXT: li a7, -124
-; RV64IM-NEXT: mul a3, a3, a7
-; RV64IM-NEXT: subw a1, a1, a3
-; RV64IM-NEXT: mulh a3, a5, a6
-; RV64IM-NEXT: srli a6, a3, 63
-; RV64IM-NEXT: srli a3, a3, 5
-; RV64IM-NEXT: add a3, a3, a6
+; RV64IM-NEXT: mul a2, a2, a7
+; RV64IM-NEXT: subw a4, a4, a2
+; RV64IM-NEXT: mulh a2, a5, a6
+; RV64IM-NEXT: srli a6, a2, 63
+; RV64IM-NEXT: srli a2, a2, 5
+; RV64IM-NEXT: add a2, a2, a6
; RV64IM-NEXT: lui a6, %hi(.LCPI0_3)
; RV64IM-NEXT: ld a6, %lo(.LCPI0_3)(a6)
; RV64IM-NEXT: li a7, 98
-; RV64IM-NEXT: mul a3, a3, a7
-; RV64IM-NEXT: subw a5, a5, a3
-; RV64IM-NEXT: mulh a3, a4, a6
-; RV64IM-NEXT: srli a6, a3, 63
-; RV64IM-NEXT: srli a3, a3, 7
-; RV64IM-NEXT: add a3, a3, a6
+; RV64IM-NEXT: mul a2, a2, a7
+; RV64IM-NEXT: subw a5, a5, a2
+; RV64IM-NEXT: mulh a2, a1, a6
+; RV64IM-NEXT: srli a6, a2, 63
+; RV64IM-NEXT: srli a2, a2, 7
+; RV64IM-NEXT: add a2, a2, a6
; RV64IM-NEXT: li a6, -1003
-; RV64IM-NEXT: mul a3, a3, a6
-; RV64IM-NEXT: subw a4, a4, a3
-; RV64IM-NEXT: sh a4, 6(a0)
+; RV64IM-NEXT: mul a2, a2, a6
+; RV64IM-NEXT: subw a1, a1, a2
+; RV64IM-NEXT: sh a1, 6(a0)
; RV64IM-NEXT: sh a5, 4(a0)
-; RV64IM-NEXT: sh a1, 2(a0)
-; RV64IM-NEXT: sh a2, 0(a0)
+; RV64IM-NEXT: sh a4, 2(a0)
+; RV64IM-NEXT: sh a3, 0(a0)
; RV64IM-NEXT: ret
%1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003>
ret <4 x i16> %1
@@ -206,29 +206,29 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind {
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lh s0, 12(a1)
-; RV32I-NEXT: lh s1, 8(a1)
-; RV32I-NEXT: lh s2, 4(a1)
; RV32I-NEXT: lh a2, 0(a1)
+; RV32I-NEXT: lh s0, 4(a1)
+; RV32I-NEXT: lh s1, 8(a1)
+; RV32I-NEXT: lh s2, 12(a1)
; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: li a1, 95
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: call __modsi3
; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __modsi3
-; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: li a1, 95
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __modsi3
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __modsi3
; RV32I-NEXT: sh a0, 6(s3)
; RV32I-NEXT: sh s1, 4(s3)
-; RV32I-NEXT: sh s2, 2(s3)
+; RV32I-NEXT: sh s0, 2(s3)
; RV32I-NEXT: sh s4, 0(s3)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -241,45 +241,45 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind {
;
; RV32IM-LABEL: fold_srem_vec_2:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: lh a2, 12(a1)
-; RV32IM-NEXT: lh a3, 8(a1)
-; RV32IM-NEXT: lh a4, 0(a1)
-; RV32IM-NEXT: lh a1, 4(a1)
+; RV32IM-NEXT: lh a2, 0(a1)
+; RV32IM-NEXT: lh a3, 4(a1)
+; RV32IM-NEXT: lh a4, 8(a1)
+; RV32IM-NEXT: lh a1, 12(a1)
; RV32IM-NEXT: lui a5, 706409
; RV32IM-NEXT: addi a5, a5, 389
-; RV32IM-NEXT: mulh a6, a4, a5
-; RV32IM-NEXT: add a6, a6, a4
+; RV32IM-NEXT: mulh a6, a2, a5
+; RV32IM-NEXT: add a6, a6, a2
; RV32IM-NEXT: srli a7, a6, 31
; RV32IM-NEXT: srli a6, a6, 6
; RV32IM-NEXT: add a6, a6, a7
; RV32IM-NEXT: li a7, 95
; RV32IM-NEXT: mul a6, a6, a7
-; RV32IM-NEXT: sub a4, a4, a6
-; RV32IM-NEXT: mulh a6, a1, a5
-; RV32IM-NEXT: add a6, a6, a1
+; RV32IM-NEXT: sub a2, a2, a6
+; RV32IM-NEXT: mulh a6, a3, a5
+; RV32IM-NEXT: add a6, a6, a3
; RV32IM-NEXT: srli t0, a6, 31
; RV32IM-NEXT: srli a6, a6, 6
; RV32IM-NEXT: add a6, a6, t0
; RV32IM-NEXT: mul a6, a6, a7
-; RV32IM-NEXT: sub a1, a1, a6
-; RV32IM-NEXT: mulh a6, a3, a5
-; RV32IM-NEXT: add a6, a6, a3
+; RV32IM-NEXT: sub a3, a3, a6
+; RV32IM-NEXT: mulh a6, a4, a5
+; RV32IM-NEXT: add a6, a6, a4
; RV32IM-NEXT: srli t0, a6, 31
; RV32IM-NEXT: srli a6, a6, 6
; RV32IM-NEXT: add a6, a6, t0
; RV32IM-NEXT: mul a6, a6, a7
-; RV32IM-NEXT: sub a3, a3, a6
-; RV32IM-NEXT: mulh a5, a2, a5
-; RV32IM-NEXT: add a5, a5, a2
+; RV32IM-NEXT: sub a4, a4, a6
+; RV32IM-NEXT: mulh a5, a1, a5
+; RV32IM-NEXT: add a5, a5, a1
; RV32IM-NEXT: srli a6, a5, 31
; RV32IM-NEXT: srli a5, a5, 6
; RV32IM-NEXT: add a5, a5, a6
; RV32IM-NEXT: mul a5, a5, a7
-; RV32IM-NEXT: sub a2, a2, a5
-; RV32IM-NEXT: sh a2, 6(a0)
-; RV32IM-NEXT: sh a3, 4(a0)
-; RV32IM-NEXT: sh a1, 2(a0)
-; RV32IM-NEXT: sh a4, 0(a0)
+; RV32IM-NEXT: sub a1, a1, a5
+; RV32IM-NEXT: sh a1, 6(a0)
+; RV32IM-NEXT: sh a4, 4(a0)
+; RV32IM-NEXT: sh a3, 2(a0)
+; RV32IM-NEXT: sh a2, 0(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: fold_srem_vec_2:
@@ -291,29 +291,29 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind {
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lh s0, 24(a1)
-; RV64I-NEXT: lh s1, 16(a1)
-; RV64I-NEXT: lh s2, 8(a1)
; RV64I-NEXT: lh a2, 0(a1)
+; RV64I-NEXT: lh s0, 8(a1)
+; RV64I-NEXT: lh s1, 16(a1)
+; RV64I-NEXT: lh s2, 24(a1)
; RV64I-NEXT: mv s3, a0
; RV64I-NEXT: li a1, 95
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: call __moddi3
; RV64I-NEXT: mv s4, a0
; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s2
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __moddi3
-; RV64I-NEXT: mv s2, a0
+; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: li a1, 95
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __moddi3
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __moddi3
; RV64I-NEXT: sh a0, 6(s3)
; RV64I-NEXT: sh s1, 4(s3)
-; RV64I-NEXT: sh s2, 2(s3)
+; RV64I-NEXT: sh s0, 2(s3)
; RV64I-NEXT: sh s4, 0(s3)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -326,45 +326,45 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind {
;
; RV64IM-LABEL: fold_srem_vec_2:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: lh a2, 0(a1)
-; RV64IM-NEXT: lui a3, %hi(.LCPI1_0)
-; RV64IM-NEXT: ld a3, %lo(.LCPI1_0)(a3)
-; RV64IM-NEXT: lh a4, 24(a1)
+; RV64IM-NEXT: lui a2, %hi(.LCPI1_0)
+; RV64IM-NEXT: ld a2, %lo(.LCPI1_0)(a2)
+; RV64IM-NEXT: lh a3, 0(a1)
+; RV64IM-NEXT: lh a4, 8(a1)
; RV64IM-NEXT: lh a5, 16(a1)
-; RV64IM-NEXT: lh a1, 8(a1)
-; RV64IM-NEXT: mulh a6, a2, a3
-; RV64IM-NEXT: add a6, a6, a2
+; RV64IM-NEXT: lh a1, 24(a1)
+; RV64IM-NEXT: mulh a6, a3, a2
+; RV64IM-NEXT: add a6, a6, a3
; RV64IM-NEXT: srli a7, a6, 63
; RV64IM-NEXT: srli a6, a6, 6
; RV64IM-NEXT: add a6, a6, a7
; RV64IM-NEXT: li a7, 95
; RV64IM-NEXT: mul a6, a6, a7
-; RV64IM-NEXT: subw a2, a2, a6
-; RV64IM-NEXT: mulh a6, a1, a3
-; RV64IM-NEXT: add a6, a6, a1
+; RV64IM-NEXT: subw a3, a3, a6
+; RV64IM-NEXT: mulh a6, a4, a2
+; RV64IM-NEXT: add a6, a6, a4
; RV64IM-NEXT: srli t0, a6, 63
; RV64IM-NEXT: srli a6, a6, 6
; RV64IM-NEXT: add a6, a6, t0
; RV64IM-NEXT: mul a6, a6, a7
-; RV64IM-NEXT: subw a1, a1, a6
-; RV64IM-NEXT: mulh a6, a5, a3
+; RV64IM-NEXT: subw a4, a4, a6
+; RV64IM-NEXT: mulh a6, a5, a2
; RV64IM-NEXT: add a6, a6, a5
; RV64IM-NEXT: srli t0, a6, 63
; RV64IM-NEXT: srli a6, a6, 6
; RV64IM-NEXT: add a6, a6, t0
; RV64IM-NEXT: mul a6, a6, a7
; RV64IM-NEXT: subw a5, a5, a6
-; RV64IM-NEXT: mulh a3, a4, a3
-; RV64IM-NEXT: add a3, a3, a4
-; RV64IM-NEXT: srli a6, a3, 63
-; RV64IM-NEXT: srli a3, a3, 6
-; RV64IM-NEXT: add a3, a3, a6
-; RV64IM-NEXT: mul a3, a3, a7
-; RV64IM-NEXT: subw a4, a4, a3
-; RV64IM-NEXT: sh a4, 6(a0)
+; RV64IM-NEXT: mulh a2, a1, a2
+; RV64IM-NEXT: add a2, a2, a1
+; RV64IM-NEXT: srli a6, a2, 63
+; RV64IM-NEXT: srli a2, a2, 6
+; RV64IM-NEXT: add a2, a2, a6
+; RV64IM-NEXT: mul a2, a2, a7
+; RV64IM-NEXT: subw a1, a1, a2
+; RV64IM-NEXT: sh a1, 6(a0)
; RV64IM-NEXT: sh a5, 4(a0)
-; RV64IM-NEXT: sh a1, 2(a0)
-; RV64IM-NEXT: sh a2, 0(a0)
+; RV64IM-NEXT: sh a4, 2(a0)
+; RV64IM-NEXT: sh a3, 0(a0)
; RV64IM-NEXT: ret
%1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
ret <4 x i16> %1
@@ -445,14 +445,14 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
;
; RV32IM-LABEL: combine_srem_sdiv:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: lh a2, 0(a1)
-; RV32IM-NEXT: lh a3, 4(a1)
-; RV32IM-NEXT: lh a4, 12(a1)
+; RV32IM-NEXT: lh a2, 12(a1)
+; RV32IM-NEXT: lh a3, 0(a1)
+; RV32IM-NEXT: lh a4, 4(a1)
; RV32IM-NEXT: lh a1, 8(a1)
; RV32IM-NEXT: lui a5, 706409
; RV32IM-NEXT: addi a5, a5, 389
-; RV32IM-NEXT: mulh a6, a4, a5
-; RV32IM-NEXT: add a6, a6, a4
+; RV32IM-NEXT: mulh a6, a2, a5
+; RV32IM-NEXT: add a6, a6, a2
; RV32IM-NEXT: srli a7, a6, 31
; RV32IM-NEXT: srai a6, a6, 6
; RV32IM-NEXT: add a6, a6, a7
@@ -464,30 +464,30 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
; RV32IM-NEXT: srai t1, t1, 6
; RV32IM-NEXT: add t1, t1, t2
; RV32IM-NEXT: mul t2, t1, a7
-; RV32IM-NEXT: mulh t3, a3, a5
-; RV32IM-NEXT: add t3, t3, a3
+; RV32IM-NEXT: mulh t3, a4, a5
+; RV32IM-NEXT: add t3, t3, a4
; RV32IM-NEXT: srli t4, t3, 31
; RV32IM-NEXT: srai t3, t3, 6
; RV32IM-NEXT: add t3, t3, t4
; RV32IM-NEXT: mul t4, t3, a7
-; RV32IM-NEXT: mulh a5, a2, a5
-; RV32IM-NEXT: add a5, a5, a2
+; RV32IM-NEXT: mulh a5, a3, a5
+; RV32IM-NEXT: add a5, a5, a3
; RV32IM-NEXT: srli t5, a5, 31
; RV32IM-NEXT: srai a5, a5, 6
; RV32IM-NEXT: add a5, a5, t5
; RV32IM-NEXT: mul a7, a5, a7
-; RV32IM-NEXT: add a2, a2, a5
-; RV32IM-NEXT: sub a2, a2, a7
-; RV32IM-NEXT: add a3, a3, t3
-; RV32IM-NEXT: sub a3, a3, t4
+; RV32IM-NEXT: add a3, a3, a5
+; RV32IM-NEXT: sub a3, a3, a7
+; RV32IM-NEXT: add a4, a4, t3
+; RV32IM-NEXT: sub a4, a4, t4
; RV32IM-NEXT: add a1, a1, t1
; RV32IM-NEXT: sub a1, a1, t2
-; RV32IM-NEXT: add a4, a4, a6
-; RV32IM-NEXT: sub a4, a4, t0
-; RV32IM-NEXT: sh a4, 6(a0)
+; RV32IM-NEXT: add a2, a2, a6
+; RV32IM-NEXT: sub a2, a2, t0
+; RV32IM-NEXT: sh a2, 6(a0)
; RV32IM-NEXT: sh a1, 4(a0)
-; RV32IM-NEXT: sh a3, 2(a0)
-; RV32IM-NEXT: sh a2, 0(a0)
+; RV32IM-NEXT: sh a4, 2(a0)
+; RV32IM-NEXT: sh a3, 0(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: combine_srem_sdiv:
@@ -624,21 +624,21 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lh a2, 0(a1)
+; RV32I-NEXT: lh a3, 4(a1)
+; RV32I-NEXT: lh a4, 8(a1)
; RV32I-NEXT: lh a0, 12(a1)
-; RV32I-NEXT: lh a3, 8(a1)
-; RV32I-NEXT: lh a1, 4(a1)
-; RV32I-NEXT: srli a4, a2, 26
-; RV32I-NEXT: add a4, a2, a4
-; RV32I-NEXT: andi a4, a4, -64
-; RV32I-NEXT: sub s1, a2, a4
-; RV32I-NEXT: srli a2, a1, 27
-; RV32I-NEXT: add a2, a1, a2
-; RV32I-NEXT: andi a2, a2, -32
-; RV32I-NEXT: sub s2, a1, a2
-; RV32I-NEXT: srli a1, a3, 29
+; RV32I-NEXT: srli a1, a2, 26
+; RV32I-NEXT: add a1, a2, a1
+; RV32I-NEXT: andi a1, a1, -64
+; RV32I-NEXT: sub s1, a2, a1
+; RV32I-NEXT: srli a1, a3, 27
; RV32I-NEXT: add a1, a3, a1
+; RV32I-NEXT: andi a1, a1, -32
+; RV32I-NEXT: sub s2, a3, a1
+; RV32I-NEXT: srli a1, a4, 29
+; RV32I-NEXT: add a1, a4, a1
; RV32I-NEXT: andi a1, a1, -8
-; RV32I-NEXT: sub s3, a3, a1
+; RV32I-NEXT: sub s3, a4, a1
; RV32I-NEXT: li a1, 95
; RV32I-NEXT: call __modsi3
; RV32I-NEXT: sh a0, 6(s0)
@@ -655,8 +655,8 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
;
; RV32IM-LABEL: dont_fold_srem_power_of_two:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: lh a2, 8(a1)
-; RV32IM-NEXT: lh a3, 4(a1)
+; RV32IM-NEXT: lh a2, 4(a1)
+; RV32IM-NEXT: lh a3, 8(a1)
; RV32IM-NEXT: lh a4, 12(a1)
; RV32IM-NEXT: lh a1, 0(a1)
; RV32IM-NEXT: lui a5, 706409
@@ -673,16 +673,16 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
; RV32IM-NEXT: add a5, a1, a5
; RV32IM-NEXT: andi a5, a5, -64
; RV32IM-NEXT: sub a1, a1, a5
-; RV32IM-NEXT: srli a5, a3, 27
-; RV32IM-NEXT: add a5, a3, a5
-; RV32IM-NEXT: andi a5, a5, -32
-; RV32IM-NEXT: sub a3, a3, a5
-; RV32IM-NEXT: srli a5, a2, 29
+; RV32IM-NEXT: srli a5, a2, 27
; RV32IM-NEXT: add a5, a2, a5
-; RV32IM-NEXT: andi a5, a5, -8
+; RV32IM-NEXT: andi a5, a5, -32
; RV32IM-NEXT: sub a2, a2, a5
-; RV32IM-NEXT: sh a2, 4(a0)
-; RV32IM-NEXT: sh a3, 2(a0)
+; RV32IM-NEXT: srli a5, a3, 29
+; RV32IM-NEXT: add a5, a3, a5
+; RV32IM-NEXT: andi a5, a5, -8
+; RV32IM-NEXT: sub a3, a3, a5
+; RV32IM-NEXT: sh a3, 4(a0)
+; RV32IM-NEXT: sh a2, 2(a0)
; RV32IM-NEXT: sh a1, 0(a0)
; RV32IM-NEXT: sh a4, 6(a0)
; RV32IM-NEXT: ret
@@ -697,21 +697,21 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: lh a2, 0(a1)
+; RV64I-NEXT: lh a3, 8(a1)
+; RV64I-NEXT: lh a4, 16(a1)
; RV64I-NEXT: lh a0, 24(a1)
-; RV64I-NEXT: lh a3, 16(a1)
-; RV64I-NEXT: lh a1, 8(a1)
-; RV64I-NEXT: srli a4, a2, 58
-; RV64I-NEXT: add a4, a2, a4
-; RV64I-NEXT: andi a4, a4, -64
-; RV64I-NEXT: subw s1, a2, a4
-; RV64I-NEXT: srli a2, a1, 59
-; RV64I-NEXT: add a2, a1, a2
-; RV64I-NEXT: andi a2, a2, -32
-; RV64I-NEXT: subw s2, a1, a2
-; RV64I-NEXT: srli a1, a3, 61
+; RV64I-NEXT: srli a1, a2, 58
+; RV64I-NEXT: add a1, a2, a1
+; RV64I-NEXT: andi a1, a1, -64
+; RV64I-NEXT: subw s1, a2, a1
+; RV64I-NEXT: srli a1, a3, 59
; RV64I-NEXT: add a1, a3, a1
+; RV64I-NEXT: andi a1, a1, -32
+; RV64I-NEXT: subw s2, a3, a1
+; RV64I-NEXT: srli a1, a4, 61
+; RV64I-NEXT: add a1, a4, a1
; RV64I-NEXT: andi a1, a1, -8
-; RV64I-NEXT: subw s3, a3, a1
+; RV64I-NEXT: subw s3, a4, a1
; RV64I-NEXT: li a1, 95
; RV64I-NEXT: call __moddi3
; RV64I-NEXT: sh a0, 6(s0)
@@ -773,24 +773,24 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lh s0, 12(a1)
-; RV32I-NEXT: lh s1, 8(a1)
; RV32I-NEXT: lh a2, 4(a1)
+; RV32I-NEXT: lh s0, 8(a1)
+; RV32I-NEXT: lh s1, 12(a1)
; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: li a1, 654
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: call __modsi3
; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: li a1, 23
-; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __modsi3
-; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: addi a1, a0, 1327
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __modsi3
; RV32I-NEXT: sh a0, 6(s2)
-; RV32I-NEXT: sh s1, 4(s2)
+; RV32I-NEXT: sh s0, 4(s2)
; RV32I-NEXT: sh s3, 2(s2)
; RV32I-NEXT: sh zero, 0(s2)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -803,43 +803,43 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
;
; RV32IM-LABEL: dont_fold_srem_one:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: lh a2, 12(a1)
-; RV32IM-NEXT: lh a3, 4(a1)
-; RV32IM-NEXT: lh a1, 8(a1)
+; RV32IM-NEXT: lh a2, 4(a1)
+; RV32IM-NEXT: lh a3, 8(a1)
+; RV32IM-NEXT: lh a1, 12(a1)
; RV32IM-NEXT: lui a4, 820904
; RV32IM-NEXT: addi a4, a4, -1903
-; RV32IM-NEXT: mulh a4, a3, a4
-; RV32IM-NEXT: add a4, a4, a3
+; RV32IM-NEXT: mulh a4, a2, a4
+; RV32IM-NEXT: add a4, a4, a2
; RV32IM-NEXT: srli a5, a4, 31
; RV32IM-NEXT: srli a4, a4, 9
; RV32IM-NEXT: add a4, a4, a5
; RV32IM-NEXT: li a5, 654
; RV32IM-NEXT: mul a4, a4, a5
-; RV32IM-NEXT: sub a3, a3, a4
+; RV32IM-NEXT: sub a2, a2, a4
; RV32IM-NEXT: lui a4, 729444
; RV32IM-NEXT: addi a4, a4, 713
-; RV32IM-NEXT: mulh a4, a1, a4
-; RV32IM-NEXT: add a4, a4, a1
+; RV32IM-NEXT: mulh a4, a3, a4
+; RV32IM-NEXT: add a4, a4, a3
; RV32IM-NEXT: srli a5, a4, 31
; RV32IM-NEXT: srli a4, a4, 4
; RV32IM-NEXT: add a4, a4, a5
; RV32IM-NEXT: li a5, 23
; RV32IM-NEXT: mul a4, a4, a5
-; RV32IM-NEXT: sub a1, a1, a4
+; RV32IM-NEXT: sub a3, a3, a4
; RV32IM-NEXT: lui a4, 395996
; RV32IM-NEXT: addi a4, a4, -2009
-; RV32IM-NEXT: mulh a4, a2, a4
+; RV32IM-NEXT: mulh a4, a1, a4
; RV32IM-NEXT: srli a5, a4, 31
; RV32IM-NEXT: srli a4, a4, 11
; RV32IM-NEXT: add a4, a4, a5
; RV32IM-NEXT: lui a5, 1
; RV32IM-NEXT: addi a5, a5, 1327
; RV32IM-NEXT: mul a4, a4, a5
-; RV32IM-NEXT: sub a2, a2, a4
+; RV32IM-NEXT: sub a1, a1, a4
; RV32IM-NEXT: sh zero, 0(a0)
-; RV32IM-NEXT: sh a2, 6(a0)
-; RV32IM-NEXT: sh a1, 4(a0)
-; RV32IM-NEXT: sh a3, 2(a0)
+; RV32IM-NEXT: sh a1, 6(a0)
+; RV32IM-NEXT: sh a3, 4(a0)
+; RV32IM-NEXT: sh a2, 2(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: dont_fold_srem_one:
@@ -850,24 +850,24 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lh s0, 24(a1)
-; RV64I-NEXT: lh s1, 16(a1)
; RV64I-NEXT: lh a2, 8(a1)
+; RV64I-NEXT: lh s0, 16(a1)
+; RV64I-NEXT: lh s1, 24(a1)
; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: li a1, 654
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: call __moddi3
; RV64I-NEXT: mv s3, a0
; RV64I-NEXT: li a1, 23
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __moddi3
-; RV64I-NEXT: mv s1, a0
+; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addiw a1, a0, 1327
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __moddi3
; RV64I-NEXT: sh a0, 6(s2)
-; RV64I-NEXT: sh s1, 4(s2)
+; RV64I-NEXT: sh s0, 4(s2)
; RV64I-NEXT: sh s3, 2(s2)
; RV64I-NEXT: sh zero, 0(s2)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
@@ -880,42 +880,42 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
;
; RV64IM-LABEL: dont_fold_srem_one:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: lh a2, 16(a1)
-; RV64IM-NEXT: lui a3, %hi(.LCPI4_0)
-; RV64IM-NEXT: ld a3, %lo(.LCPI4_0)(a3)
-; RV64IM-NEXT: lh a4, 24(a1)
-; RV64IM-NEXT: lh a1, 8(a1)
-; RV64IM-NEXT: mulh a3, a2, a3
-; RV64IM-NEXT: add a3, a3, a2
-; RV64IM-NEXT: srli a5, a3, 63
-; RV64IM-NEXT: srli a3, a3, 4
-; RV64IM-NEXT: add a3, a3, a5
+; RV64IM-NEXT: lui a2, %hi(.LCPI4_0)
+; RV64IM-NEXT: ld a2, %lo(.LCPI4_0)(a2)
+; RV64IM-NEXT: lh a3, 16(a1)
+; RV64IM-NEXT: lh a4, 8(a1)
+; RV64IM-NEXT: lh a1, 24(a1)
+; RV64IM-NEXT: mulh a2, a3, a2
+; RV64IM-NEXT: add a2, a2, a3
+; RV64IM-NEXT: srli a5, a2, 63
+; RV64IM-NEXT: srli a2, a2, 4
+; RV64IM-NEXT: add a2, a2, a5
; RV64IM-NEXT: lui a5, %hi(.LCPI4_1)
; RV64IM-NEXT: ld a5, %lo(.LCPI4_1)(a5)
; RV64IM-NEXT: li a6, 23
-; RV64IM-NEXT: mul a3, a3, a6
-; RV64IM-NEXT: subw a2, a2, a3
-; RV64IM-NEXT: mulh a3, a1, a5
-; RV64IM-NEXT: srli a5, a3, 63
-; RV64IM-NEXT: srli a3, a3, 8
-; RV64IM-NEXT: add a3, a3, a5
+; RV64IM-NEXT: mul a2, a2, a6
+; RV64IM-NEXT: subw a3, a3, a2
+; RV64IM-NEXT: mulh a2, a4, a5
+; RV64IM-NEXT: srli a5, a2, 63
+; RV64IM-NEXT: srli a2, a2, 8
+; RV64IM-NEXT: add a2, a2, a5
; RV64IM-NEXT: lui a5, %hi(.LCPI4_2)
; RV64IM-NEXT: ld a5, %lo(.LCPI4_2)(a5)
; RV64IM-NEXT: li a6, 654
-; RV64IM-NEXT: mul a3, a3, a6
-; RV64IM-NEXT: subw a1, a1, a3
-; RV64IM-NEXT: mulh a3, a4, a5
-; RV64IM-NEXT: srli a5, a3, 63
-; RV64IM-NEXT: srli a3, a3, 11
-; RV64IM-NEXT: add a3, a3, a5
+; RV64IM-NEXT: mul a2, a2, a6
+; RV64IM-NEXT: subw a4, a4, a2
+; RV64IM-NEXT: mulh a2, a1, a5
+; RV64IM-NEXT: srli a5, a2, 63
+; RV64IM-NEXT: srli a2, a2, 11
+; RV64IM-NEXT: add a2, a2, a5
; RV64IM-NEXT: lui a5, 1
; RV64IM-NEXT: addi a5, a5, 1327
-; RV64IM-NEXT: mul a3, a3, a5
-; RV64IM-NEXT: subw a4, a4, a3
+; RV64IM-NEXT: mul a2, a2, a5
+; RV64IM-NEXT: subw a1, a1, a2
; RV64IM-NEXT: sh zero, 0(a0)
-; RV64IM-NEXT: sh a4, 6(a0)
-; RV64IM-NEXT: sh a1, 2(a0)
-; RV64IM-NEXT: sh a2, 4(a0)
+; RV64IM-NEXT: sh a1, 6(a0)
+; RV64IM-NEXT: sh a4, 2(a0)
+; RV64IM-NEXT: sh a3, 4(a0)
; RV64IM-NEXT: ret
%1 = srem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
ret <4 x i16> %1
@@ -933,8 +933,8 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: lh a2, 4(a1)
; RV32I-NEXT: mv s0, a0
-; RV32I-NEXT: lh s1, 12(a1)
; RV32I-NEXT: lh a0, 8(a1)
+; RV32I-NEXT: lh s1, 12(a1)
; RV32I-NEXT: srli a1, a2, 17
; RV32I-NEXT: add a1, a2, a1
; RV32I-NEXT: lui a3, 8
@@ -1005,8 +1005,8 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: lh a2, 8(a1)
; RV64I-NEXT: mv s0, a0
-; RV64I-NEXT: lh s1, 24(a1)
; RV64I-NEXT: lh a0, 16(a1)
+; RV64I-NEXT: lh s1, 24(a1)
; RV64I-NEXT: srli a1, a2, 49
; RV64I-NEXT: add a1, a2, a1
; RV64I-NEXT: lui a3, 8
@@ -1033,38 +1033,38 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
;
; RV64IM-LABEL: dont_fold_urem_i16_smax:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: lh a2, 16(a1)
-; RV64IM-NEXT: lui a3, %hi(.LCPI5_0)
-; RV64IM-NEXT: ld a3, %lo(.LCPI5_0)(a3)
-; RV64IM-NEXT: lh a4, 24(a1)
-; RV64IM-NEXT: mulh a3, a2, a3
-; RV64IM-NEXT: add a3, a3, a2
-; RV64IM-NEXT: srli a5, a3, 63
-; RV64IM-NEXT: srli a3, a3, 4
-; RV64IM-NEXT: add a3, a3, a5
-; RV64IM-NEXT: li a5, 23
-; RV64IM-NEXT: lui a6, %hi(.LCPI5_1)
-; RV64IM-NEXT: ld a6, %lo(.LCPI5_1)(a6)
-; RV64IM-NEXT: mul a3, a3, a5
-; RV64IM-NEXT: lh a1, 8(a1)
-; RV64IM-NEXT: subw a2, a2, a3
-; RV64IM-NEXT: mulh a3, a4, a6
-; RV64IM-NEXT: srli a5, a3, 63
-; RV64IM-NEXT: srli a3, a3, 11
-; RV64IM-NEXT: add a3, a3, a5
+; RV64IM-NEXT: lui a2, %hi(.LCPI5_0)
+; RV64IM-NEXT: ld a2, %lo(.LCPI5_0)(a2)
+; RV64IM-NEXT: lh a3, 16(a1)
+; RV64IM-NEXT: lh a4, 8(a1)
+; RV64IM-NEXT: lh a1, 24(a1)
+; RV64IM-NEXT: mulh a2, a3, a2
+; RV64IM-NEXT: add a2, a2, a3
+; RV64IM-NEXT: srli a5, a2, 63
+; RV64IM-NEXT: srli a2, a2, 4
+; RV64IM-NEXT: add a2, a2, a5
+; RV64IM-NEXT: lui a5, %hi(.LCPI5_1)
+; RV64IM-NEXT: ld a5, %lo(.LCPI5_1)(a5)
+; RV64IM-NEXT: li a6, 23
+; RV64IM-NEXT: mul a2, a2, a6
+; RV64IM-NEXT: subw a3, a3, a2
+; RV64IM-NEXT: mulh a2, a1, a5
+; RV64IM-NEXT: srli a5, a2, 63
+; RV64IM-NEXT: srli a2, a2, 11
+; RV64IM-NEXT: add a2, a2, a5
; RV64IM-NEXT: lui a5, 1
; RV64IM-NEXT: addi a5, a5, 1327
-; RV64IM-NEXT: mul a3, a3, a5
-; RV64IM-NEXT: subw a4, a4, a3
-; RV64IM-NEXT: srli a3, a1, 49
-; RV64IM-NEXT: add a3, a1, a3
+; RV64IM-NEXT: mul a2, a2, a5
+; RV64IM-NEXT: subw a1, a1, a2
+; RV64IM-NEXT: srli a2, a4, 49
+; RV64IM-NEXT: add a2, a4, a2
; RV64IM-NEXT: lui a5, 8
-; RV64IM-NEXT: and a3, a3, a5
-; RV64IM-NEXT: subw a1, a1, a3
+; RV64IM-NEXT: and a2, a2, a5
+; RV64IM-NEXT: subw a4, a4, a2
; RV64IM-NEXT: sh zero, 0(a0)
-; RV64IM-NEXT: sh a1, 2(a0)
-; RV64IM-NEXT: sh a4, 6(a0)
-; RV64IM-NEXT: sh a2, 4(a0)
+; RV64IM-NEXT: sh a4, 2(a0)
+; RV64IM-NEXT: sh a1, 6(a0)
+; RV64IM-NEXT: sh a3, 4(a0)
; RV64IM-NEXT: ret
%1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423>
ret <4 x i16> %1
@@ -1085,17 +1085,18 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw s0, 24(a1)
-; RV32I-NEXT: lw s1, 28(a1)
-; RV32I-NEXT: lw s2, 16(a1)
-; RV32I-NEXT: lw s3, 20(a1)
+; RV32I-NEXT: lw s0, 16(a1)
+; RV32I-NEXT: lw s1, 20(a1)
+; RV32I-NEXT: lw s2, 24(a1)
+; RV32I-NEXT: lw s3, 28(a1)
+; RV32I-NEXT: lw a3, 0(a1)
+; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw s4, 8(a1)
; RV32I-NEXT: lw s5, 12(a1)
-; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw a1, 4(a1)
; RV32I-NEXT: mv s6, a0
; RV32I-NEXT: li a2, 1
; RV32I-NEXT: mv a0, a3
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __moddi3
; RV32I-NEXT: mv s7, a0
@@ -1108,22 +1109,22 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: mv s5, a1
; RV32I-NEXT: li a2, 23
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s3
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __moddi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv s3, a1
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: addi a2, a0, 1327
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __moddi3
; RV32I-NEXT: sw a1, 28(s6)
; RV32I-NEXT: sw a0, 24(s6)
-; RV32I-NEXT: sw s3, 20(s6)
-; RV32I-NEXT: sw s2, 16(s6)
+; RV32I-NEXT: sw s1, 20(s6)
+; RV32I-NEXT: sw s0, 16(s6)
; RV32I-NEXT: sw s5, 12(s6)
; RV32I-NEXT: sw s4, 8(s6)
; RV32I-NEXT: sw s8, 4(s6)
@@ -1154,17 +1155,18 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s0, 24(a1)
-; RV32IM-NEXT: lw s1, 28(a1)
-; RV32IM-NEXT: lw s2, 16(a1)
-; RV32IM-NEXT: lw s3, 20(a1)
+; RV32IM-NEXT: lw s0, 16(a1)
+; RV32IM-NEXT: lw s1, 20(a1)
+; RV32IM-NEXT: lw s2, 24(a1)
+; RV32IM-NEXT: lw s3, 28(a1)
+; RV32IM-NEXT: lw a3, 0(a1)
+; RV32IM-NEXT: lw a4, 4(a1)
; RV32IM-NEXT: lw s4, 8(a1)
; RV32IM-NEXT: lw s5, 12(a1)
-; RV32IM-NEXT: lw a3, 0(a1)
-; RV32IM-NEXT: lw a1, 4(a1)
; RV32IM-NEXT: mv s6, a0
; RV32IM-NEXT: li a2, 1
; RV32IM-NEXT: mv a0, a3
+; RV32IM-NEXT: mv a1, a4
; RV32IM-NEXT: li a3, 0
; RV32IM-NEXT: call __moddi3
; RV32IM-NEXT: mv s7, a0
@@ -1177,22 +1179,22 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
; RV32IM-NEXT: mv s4, a0
; RV32IM-NEXT: mv s5, a1
; RV32IM-NEXT: li a2, 23
-; RV32IM-NEXT: mv a0, s2
-; RV32IM-NEXT: mv a1, s3
+; RV32IM-NEXT: mv a0, s0
+; RV32IM-NEXT: mv a1, s1
; RV32IM-NEXT: li a3, 0
; RV32IM-NEXT: call __moddi3
-; RV32IM-NEXT: mv s2, a0
-; RV32IM-NEXT: mv s3, a1
+; RV32IM-NEXT: mv s0, a0
+; RV32IM-NEXT: mv s1, a1
; RV32IM-NEXT: lui a0, 1
; RV32IM-NEXT: addi a2, a0, 1327
-; RV32IM-NEXT: mv a0, s0
-; RV32IM-NEXT: mv a1, s1
+; RV32IM-NEXT: mv a0, s2
+; RV32IM-NEXT: mv a1, s3
; RV32IM-NEXT: li a3, 0
; RV32IM-NEXT: call __moddi3
; RV32IM-NEXT: sw a1, 28(s6)
; RV32IM-NEXT: sw a0, 24(s6)
-; RV32IM-NEXT: sw s3, 20(s6)
-; RV32IM-NEXT: sw s2, 16(s6)
+; RV32IM-NEXT: sw s1, 20(s6)
+; RV32IM-NEXT: sw s0, 16(s6)
; RV32IM-NEXT: sw s5, 12(s6)
; RV32IM-NEXT: sw s4, 8(s6)
; RV32IM-NEXT: sw s8, 4(s6)
@@ -1218,24 +1220,24 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: ld s0, 24(a1)
-; RV64I-NEXT: ld s1, 16(a1)
; RV64I-NEXT: ld a2, 8(a1)
+; RV64I-NEXT: ld s0, 16(a1)
+; RV64I-NEXT: ld s1, 24(a1)
; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: li a1, 654
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: call __moddi3
; RV64I-NEXT: mv s3, a0
; RV64I-NEXT: li a1, 23
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __moddi3
-; RV64I-NEXT: mv s1, a0
+; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addiw a1, a0, 1327
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __moddi3
; RV64I-NEXT: sd a0, 24(s2)
-; RV64I-NEXT: sd s1, 16(s2)
+; RV64I-NEXT: sd s0, 16(s2)
; RV64I-NEXT: sd s3, 8(s2)
; RV64I-NEXT: sd zero, 0(s2)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
@@ -1248,42 +1250,42 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
;
; RV64IM-LABEL: dont_fold_srem_i64:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: ld a2, 16(a1)
-; RV64IM-NEXT: lui a3, %hi(.LCPI6_0)
-; RV64IM-NEXT: ld a3, %lo(.LCPI6_0)(a3)
-; RV64IM-NEXT: ld a4, 24(a1)
-; RV64IM-NEXT: ld a1, 8(a1)
-; RV64IM-NEXT: mulh a3, a2, a3
-; RV64IM-NEXT: add a3, a3, a2
-; RV64IM-NEXT: srli a5, a3, 63
-; RV64IM-NEXT: srai a3, a3, 4
-; RV64IM-NEXT: add a3, a3, a5
+; RV64IM-NEXT: lui a2, %hi(.LCPI6_0)
+; RV64IM-NEXT: ld a2, %lo(.LCPI6_0)(a2)
+; RV64IM-NEXT: ld a3, 16(a1)
+; RV64IM-NEXT: ld a4, 8(a1)
+; RV64IM-NEXT: ld a1, 24(a1)
+; RV64IM-NEXT: mulh a2, a3, a2
+; RV64IM-NEXT: add a2, a2, a3
+; RV64IM-NEXT: srli a5, a2, 63
+; RV64IM-NEXT: srai a2, a2, 4
+; RV64IM-NEXT: add a2, a2, a5
; RV64IM-NEXT: lui a5, %hi(.LCPI6_1)
; RV64IM-NEXT: ld a5, %lo(.LCPI6_1)(a5)
; RV64IM-NEXT: li a6, 23
-; RV64IM-NEXT: mul a3, a3, a6
-; RV64IM-NEXT: sub a2, a2, a3
-; RV64IM-NEXT: mulh a3, a1, a5
-; RV64IM-NEXT: srli a5, a3, 63
-; RV64IM-NEXT: srai a3, a3, 8
-; RV64IM-NEXT: add a3, a3, a5
+; RV64IM-NEXT: mul a2, a2, a6
+; RV64IM-NEXT: sub a3, a3, a2
+; RV64IM-NEXT: mulh a2, a4, a5
+; RV64IM-NEXT: srli a5, a2, 63
+; RV64IM-NEXT: srai a2, a2, 8
+; RV64IM-NEXT: add a2, a2, a5
; RV64IM-NEXT: lui a5, %hi(.LCPI6_2)
; RV64IM-NEXT: ld a5, %lo(.LCPI6_2)(a5)
; RV64IM-NEXT: li a6, 654
-; RV64IM-NEXT: mul a3, a3, a6
-; RV64IM-NEXT: sub a1, a1, a3
-; RV64IM-NEXT: mulh a3, a4, a5
-; RV64IM-NEXT: srli a5, a3, 63
-; RV64IM-NEXT: srai a3, a3, 11
-; RV64IM-NEXT: add a3, a3, a5
+; RV64IM-NEXT: mul a2, a2, a6
+; RV64IM-NEXT: sub a4, a4, a2
+; RV64IM-NEXT: mulh a2, a1, a5
+; RV64IM-NEXT: srli a5, a2, 63
+; RV64IM-NEXT: srai a2, a2, 11
+; RV64IM-NEXT: add a2, a2, a5
; RV64IM-NEXT: lui a5, 1
; RV64IM-NEXT: addiw a5, a5, 1327
-; RV64IM-NEXT: mul a3, a3, a5
-; RV64IM-NEXT: sub a4, a4, a3
+; RV64IM-NEXT: mul a2, a2, a5
+; RV64IM-NEXT: sub a1, a1, a2
; RV64IM-NEXT: sd zero, 0(a0)
-; RV64IM-NEXT: sd a4, 24(a0)
-; RV64IM-NEXT: sd a1, 8(a0)
-; RV64IM-NEXT: sd a2, 16(a0)
+; RV64IM-NEXT: sd a1, 24(a0)
+; RV64IM-NEXT: sd a4, 8(a0)
+; RV64IM-NEXT: sd a3, 16(a0)
; RV64IM-NEXT: ret
%1 = srem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>
ret <4 x i64> %1
diff --git a/llvm/test/CodeGen/RISCV/stack-store-check.ll b/llvm/test/CodeGen/RISCV/stack-store-check.ll
index 91cfb2a4cef706..b51a759a87b859 100644
--- a/llvm/test/CodeGen/RISCV/stack-store-check.ll
+++ b/llvm/test/CodeGen/RISCV/stack-store-check.ll
@@ -143,15 +143,15 @@ define void @main() local_unnamed_addr nounwind {
; CHECK-NEXT: addi a2, sp, 392
; CHECK-NEXT: sw a3, 392(sp)
; CHECK-NEXT: call __subtf3
-; CHECK-NEXT: lw a0, 424(sp)
+; CHECK-NEXT: lw a0, 432(sp)
; CHECK-NEXT: lw a1, 436(sp)
-; CHECK-NEXT: lw a2, 432(sp)
+; CHECK-NEXT: lw a2, 424(sp)
; CHECK-NEXT: lw a3, 428(sp)
; CHECK-NEXT: lui a4, %hi(X)
; CHECK-NEXT: sw a1, %lo(X+12)(a4)
-; CHECK-NEXT: sw a2, %lo(X+8)(a4)
+; CHECK-NEXT: sw a0, %lo(X+8)(a4)
; CHECK-NEXT: sw a3, %lo(X+4)(a4)
-; CHECK-NEXT: sw a0, %lo(X)(a4)
+; CHECK-NEXT: sw a2, %lo(X)(a4)
; CHECK-NEXT: lw s8, 4(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw s8, 212(sp)
; CHECK-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
@@ -190,15 +190,15 @@ define void @main() local_unnamed_addr nounwind {
; CHECK-NEXT: addi a2, sp, 344
; CHECK-NEXT: sw s9, 360(sp)
; CHECK-NEXT: call __multf3
-; CHECK-NEXT: lw a0, 376(sp)
+; CHECK-NEXT: lw a0, 384(sp)
; CHECK-NEXT: lw a1, 388(sp)
-; CHECK-NEXT: lw a2, 384(sp)
+; CHECK-NEXT: lw a2, 376(sp)
; CHECK-NEXT: lw a3, 380(sp)
; CHECK-NEXT: lui a4, %hi(S)
; CHECK-NEXT: sw a1, %lo(S+12)(a4)
-; CHECK-NEXT: sw a2, %lo(S+8)(a4)
+; CHECK-NEXT: sw a0, %lo(S+8)(a4)
; CHECK-NEXT: sw a3, %lo(S+4)(a4)
-; CHECK-NEXT: sw a0, %lo(S)(a4)
+; CHECK-NEXT: sw a2, %lo(S)(a4)
; CHECK-NEXT: lw a0, 48(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw a0, 260(sp)
; CHECK-NEXT: sw s10, 256(sp)
@@ -216,15 +216,15 @@ define void @main() local_unnamed_addr nounwind {
; CHECK-NEXT: lw a3, 44(sp) # 4-byte Folded Reload
; CHECK-NEXT: sw a3, 264(sp)
; CHECK-NEXT: call __subtf3
-; CHECK-NEXT: lw a0, 280(sp)
+; CHECK-NEXT: lw a0, 288(sp)
; CHECK-NEXT: lw a1, 292(sp)
-; CHECK-NEXT: lw a2, 288(sp)
+; CHECK-NEXT: lw a2, 280(sp)
; CHECK-NEXT: lw a3, 284(sp)
; CHECK-NEXT: lui a4, %hi(T)
; CHECK-NEXT: sw a1, %lo(T+12)(a4)
-; CHECK-NEXT: sw a2, %lo(T+8)(a4)
+; CHECK-NEXT: sw a0, %lo(T+8)(a4)
; CHECK-NEXT: sw a3, %lo(T+4)(a4)
-; CHECK-NEXT: sw a0, %lo(T)(a4)
+; CHECK-NEXT: sw a2, %lo(T)(a4)
; CHECK-NEXT: sw zero, 164(sp)
; CHECK-NEXT: sw zero, 160(sp)
; CHECK-NEXT: sw zero, 156(sp)
@@ -238,15 +238,15 @@ define void @main() local_unnamed_addr nounwind {
; CHECK-NEXT: addi a2, sp, 152
; CHECK-NEXT: sw s1, 168(sp)
; CHECK-NEXT: call __addtf3
-; CHECK-NEXT: lw a0, 184(sp)
+; CHECK-NEXT: lw a0, 192(sp)
; CHECK-NEXT: lw a1, 196(sp)
-; CHECK-NEXT: lw a2, 192(sp)
+; CHECK-NEXT: lw a2, 184(sp)
; CHECK-NEXT: lw a3, 188(sp)
; CHECK-NEXT: lui a4, %hi(Y)
; CHECK-NEXT: sw a1, %lo(Y+12)(a4)
-; CHECK-NEXT: sw a2, %lo(Y+8)(a4)
+; CHECK-NEXT: sw a0, %lo(Y+8)(a4)
; CHECK-NEXT: sw a3, %lo(Y+4)(a4)
-; CHECK-NEXT: sw a0, %lo(Y)(a4)
+; CHECK-NEXT: sw a2, %lo(Y)(a4)
; CHECK-NEXT: sw zero, 116(sp)
; CHECK-NEXT: sw zero, 112(sp)
; CHECK-NEXT: sw zero, 108(sp)
diff --git a/llvm/test/CodeGen/RISCV/ucmp.ll b/llvm/test/CodeGen/RISCV/ucmp.ll
index c74bc6838ff7df..50da56fbc59518 100644
--- a/llvm/test/CodeGen/RISCV/ucmp.ll
+++ b/llvm/test/CodeGen/RISCV/ucmp.ll
@@ -87,10 +87,10 @@ define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind {
; RV32I-LABEL: ucmp.8.128:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a2, 4(a1)
-; RV32I-NEXT: lw a3, 4(a0)
; RV32I-NEXT: lw a4, 8(a1)
; RV32I-NEXT: lw a5, 12(a1)
; RV32I-NEXT: lw a6, 12(a0)
+; RV32I-NEXT: lw a3, 4(a0)
; RV32I-NEXT: lw a7, 8(a0)
; RV32I-NEXT: beq a6, a5, .LBB4_2
; RV32I-NEXT: # %bb.1:
diff --git a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll
index f1ae3200175636..dde69667b8ec30 100644
--- a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll
@@ -10,47 +10,47 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) #0 {
; RISCV32-NEXT: sw s2, 20(sp) # 4-byte Folded Spill
; RISCV32-NEXT: sw s3, 16(sp) # 4-byte Folded Spill
; RISCV32-NEXT: sw s4, 12(sp) # 4-byte Folded Spill
-; RISCV32-NEXT: lw a3, 12(a1)
-; RISCV32-NEXT: lw a7, 12(a2)
-; RISCV32-NEXT: lw a6, 8(a1)
-; RISCV32-NEXT: lw a4, 0(a2)
-; RISCV32-NEXT: lw a5, 0(a1)
+; RISCV32-NEXT: lw a3, 0(a1)
; RISCV32-NEXT: lw t2, 4(a1)
-; RISCV32-NEXT: lw t0, 8(a2)
-; RISCV32-NEXT: lw a2, 4(a2)
-; RISCV32-NEXT: mulhu a1, a5, a4
-; RISCV32-NEXT: mul t1, t2, a4
-; RISCV32-NEXT: add a1, t1, a1
-; RISCV32-NEXT: sltu t1, a1, t1
-; RISCV32-NEXT: mulhu t3, t2, a4
+; RISCV32-NEXT: lw a4, 8(a1)
+; RISCV32-NEXT: lw a5, 12(a1)
+; RISCV32-NEXT: lw a1, 0(a2)
+; RISCV32-NEXT: lw t0, 4(a2)
+; RISCV32-NEXT: lw a6, 8(a2)
+; RISCV32-NEXT: lw a7, 12(a2)
+; RISCV32-NEXT: mulhu a2, a3, a1
+; RISCV32-NEXT: mul t1, t2, a1
+; RISCV32-NEXT: add a2, t1, a2
+; RISCV32-NEXT: sltu t1, a2, t1
+; RISCV32-NEXT: mulhu t3, t2, a1
; RISCV32-NEXT: add t4, t3, t1
-; RISCV32-NEXT: mul t1, a5, a2
-; RISCV32-NEXT: add a1, t1, a1
-; RISCV32-NEXT: sltu t1, a1, t1
-; RISCV32-NEXT: mulhu t3, a5, a2
+; RISCV32-NEXT: mul t1, a3, t0
+; RISCV32-NEXT: add a2, t1, a2
+; RISCV32-NEXT: sltu t1, a2, t1
+; RISCV32-NEXT: mulhu t3, a3, t0
; RISCV32-NEXT: add t1, t3, t1
; RISCV32-NEXT: add t5, t4, t1
-; RISCV32-NEXT: mul t6, t2, a2
+; RISCV32-NEXT: mul t6, t2, t0
; RISCV32-NEXT: add s0, t6, t5
-; RISCV32-NEXT: mul t1, t0, a5
-; RISCV32-NEXT: mul s3, a6, a4
+; RISCV32-NEXT: mul t1, a6, a3
+; RISCV32-NEXT: mul s3, a4, a1
; RISCV32-NEXT: add s4, s3, t1
; RISCV32-NEXT: add t1, s0, s4
; RISCV32-NEXT: sltu t3, t1, s0
; RISCV32-NEXT: sltu s0, s0, t6
; RISCV32-NEXT: sltu t4, t5, t4
-; RISCV32-NEXT: mulhu t5, t2, a2
+; RISCV32-NEXT: mulhu t5, t2, t0
; RISCV32-NEXT: add t4, t5, t4
; RISCV32-NEXT: add s0, t4, s0
-; RISCV32-NEXT: mul t4, t2, t0
-; RISCV32-NEXT: mul t5, a7, a5
+; RISCV32-NEXT: mul t4, t2, a6
+; RISCV32-NEXT: mul t5, a7, a3
; RISCV32-NEXT: add t4, t5, t4
-; RISCV32-NEXT: mulhu s1, t0, a5
+; RISCV32-NEXT: mulhu s1, a6, a3
; RISCV32-NEXT: add s2, s1, t4
-; RISCV32-NEXT: mul t4, a2, a6
-; RISCV32-NEXT: mul t5, a3, a4
+; RISCV32-NEXT: mul t4, t0, a4
+; RISCV32-NEXT: mul t5, a5, a1
; RISCV32-NEXT: add t4, t5, t4
-; RISCV32-NEXT: mulhu t5, a6, a4
+; RISCV32-NEXT: mulhu t5, a4, a1
; RISCV32-NEXT: add t6, t5, t4
; RISCV32-NEXT: add t4, t6, s2
; RISCV32-NEXT: sltu s3, s4, s3
@@ -65,39 +65,39 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) #0 {
; RISCV32-NEXT: snez s1, t2
; RISCV32-NEXT: snez s2, a7
; RISCV32-NEXT: and s1, s2, s1
-; RISCV32-NEXT: mulhu s2, a7, a5
+; RISCV32-NEXT: mulhu s2, a7, a3
; RISCV32-NEXT: snez s2, s2
; RISCV32-NEXT: or s1, s1, s2
-; RISCV32-NEXT: mulhu t2, t2, t0
+; RISCV32-NEXT: mulhu t2, t2, a6
; RISCV32-NEXT: snez t2, t2
; RISCV32-NEXT: or t2, s1, t2
; RISCV32-NEXT: or t2, t2, s0
; RISCV32-NEXT: sltu t5, t6, t5
-; RISCV32-NEXT: snez t6, a2
-; RISCV32-NEXT: snez s0, a3
+; RISCV32-NEXT: snez t6, t0
+; RISCV32-NEXT: snez s0, a5
; RISCV32-NEXT: and t6, s0, t6
-; RISCV32-NEXT: mulhu s0, a3, a4
+; RISCV32-NEXT: mulhu s0, a5, a1
; RISCV32-NEXT: snez s0, s0
; RISCV32-NEXT: or t6, t6, s0
-; RISCV32-NEXT: mulhu a2, a2, a6
-; RISCV32-NEXT: snez a2, a2
-; RISCV32-NEXT: or a2, t6, a2
-; RISCV32-NEXT: or a2, a2, t5
-; RISCV32-NEXT: or a7, t0, a7
-; RISCV32-NEXT: snez a7, a7
-; RISCV32-NEXT: or a3, a6, a3
-; RISCV32-NEXT: snez a3, a3
-; RISCV32-NEXT: and a3, a3, a7
-; RISCV32-NEXT: or a2, a3, a2
-; RISCV32-NEXT: or a2, a2, t2
-; RISCV32-NEXT: or a2, a2, t3
-; RISCV32-NEXT: mul a3, a5, a4
-; RISCV32-NEXT: andi a2, a2, 1
-; RISCV32-NEXT: sw a3, 0(a0)
-; RISCV32-NEXT: sw a1, 4(a0)
+; RISCV32-NEXT: mulhu t0, t0, a4
+; RISCV32-NEXT: snez t0, t0
+; RISCV32-NEXT: or t0, t6, t0
+; RISCV32-NEXT: or t0, t0, t5
+; RISCV32-NEXT: or a6, a6, a7
+; RISCV32-NEXT: snez a6, a6
+; RISCV32-NEXT: or a4, a4, a5
+; RISCV32-NEXT: snez a4, a4
+; RISCV32-NEXT: and a4, a4, a6
+; RISCV32-NEXT: or a4, a4, t0
+; RISCV32-NEXT: or a4, a4, t2
+; RISCV32-NEXT: or a4, a4, t3
+; RISCV32-NEXT: mul a1, a3, a1
+; RISCV32-NEXT: andi a4, a4, 1
+; RISCV32-NEXT: sw a1, 0(a0)
+; RISCV32-NEXT: sw a2, 4(a0)
; RISCV32-NEXT: sw t1, 8(a0)
; RISCV32-NEXT: sw t4, 12(a0)
-; RISCV32-NEXT: sb a2, 16(a0)
+; RISCV32-NEXT: sb a4, 16(a0)
; RISCV32-NEXT: lw s0, 28(sp) # 4-byte Folded Reload
; RISCV32-NEXT: lw s1, 24(sp) # 4-byte Folded Reload
; RISCV32-NEXT: lw s2, 20(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
index 9af18428adf196..74d34b2b64d41f 100644
--- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll
@@ -54,19 +54,19 @@ define i24 @load_i24(ptr %p) {
;
; RV32IZBKB-LABEL: load_i24:
; RV32IZBKB: # %bb.0:
-; RV32IZBKB-NEXT: lbu a1, 1(a0)
-; RV32IZBKB-NEXT: lbu a2, 0(a0)
+; RV32IZBKB-NEXT: lbu a1, 0(a0)
+; RV32IZBKB-NEXT: lbu a2, 1(a0)
; RV32IZBKB-NEXT: lbu a0, 2(a0)
-; RV32IZBKB-NEXT: packh a1, a2, a1
+; RV32IZBKB-NEXT: packh a1, a1, a2
; RV32IZBKB-NEXT: pack a0, a1, a0
; RV32IZBKB-NEXT: ret
;
; RV64IZBKB-LABEL: load_i24:
; RV64IZBKB: # %bb.0:
-; RV64IZBKB-NEXT: lbu a1, 1(a0)
-; RV64IZBKB-NEXT: lbu a2, 0(a0)
+; RV64IZBKB-NEXT: lbu a1, 0(a0)
+; RV64IZBKB-NEXT: lbu a2, 1(a0)
; RV64IZBKB-NEXT: lbu a0, 2(a0)
-; RV64IZBKB-NEXT: packh a1, a2, a1
+; RV64IZBKB-NEXT: packh a1, a1, a2
; RV64IZBKB-NEXT: slli a0, a0, 16
; RV64IZBKB-NEXT: or a0, a1, a0
; RV64IZBKB-NEXT: ret
@@ -99,11 +99,11 @@ define i32 @load_i32(ptr %p) {
;
; SLOWZBKB-LABEL: load_i32:
; SLOWZBKB: # %bb.0:
-; SLOWZBKB-NEXT: lbu a1, 1(a0)
-; SLOWZBKB-NEXT: lbu a2, 0(a0)
+; SLOWZBKB-NEXT: lbu a1, 0(a0)
+; SLOWZBKB-NEXT: lbu a2, 1(a0)
; SLOWZBKB-NEXT: lbu a3, 2(a0)
; SLOWZBKB-NEXT: lbu a0, 3(a0)
-; SLOWZBKB-NEXT: packh a1, a2, a1
+; SLOWZBKB-NEXT: packh a1, a1, a2
; SLOWZBKB-NEXT: slli a3, a3, 16
; SLOWZBKB-NEXT: slli a0, a0, 24
; SLOWZBKB-NEXT: or a0, a0, a3
@@ -130,17 +130,17 @@ define i64 @load_i64(ptr %p) {
; RV32I-NEXT: slli a3, a3, 16
; RV32I-NEXT: slli a4, a4, 24
; RV32I-NEXT: or a2, a4, a3
-; RV32I-NEXT: or a2, a2, a1
-; RV32I-NEXT: lbu a1, 5(a0)
; RV32I-NEXT: lbu a3, 4(a0)
-; RV32I-NEXT: lbu a4, 6(a0)
+; RV32I-NEXT: lbu a4, 5(a0)
+; RV32I-NEXT: or a2, a2, a1
+; RV32I-NEXT: lbu a1, 6(a0)
; RV32I-NEXT: lbu a0, 7(a0)
-; RV32I-NEXT: slli a1, a1, 8
-; RV32I-NEXT: or a1, a1, a3
-; RV32I-NEXT: slli a4, a4, 16
+; RV32I-NEXT: slli a4, a4, 8
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: slli a1, a1, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a0, a0, a4
-; RV32I-NEXT: or a1, a0, a1
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: or a1, a0, a3
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: ret
;
@@ -155,16 +155,16 @@ define i64 @load_i64(ptr %p) {
; RV64I-NEXT: slli a3, a3, 16
; RV64I-NEXT: slli a4, a4, 24
; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a2, 4(a0)
+; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: or a1, a3, a1
-; RV64I-NEXT: lbu a2, 5(a0)
-; RV64I-NEXT: lbu a3, 4(a0)
-; RV64I-NEXT: lbu a4, 6(a0)
+; RV64I-NEXT: lbu a3, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
-; RV64I-NEXT: slli a2, a2, 8
-; RV64I-NEXT: or a2, a2, a3
-; RV64I-NEXT: slli a4, a4, 16
+; RV64I-NEXT: slli a4, a4, 8
+; RV64I-NEXT: or a2, a4, a2
+; RV64I-NEXT: slli a3, a3, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, a4
+; RV64I-NEXT: or a0, a0, a3
; RV64I-NEXT: or a0, a0, a2
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a1
@@ -172,20 +172,20 @@ define i64 @load_i64(ptr %p) {
;
; RV32IZBKB-LABEL: load_i64:
; RV32IZBKB: # %bb.0:
-; RV32IZBKB-NEXT: lbu a1, 1(a0)
-; RV32IZBKB-NEXT: lbu a2, 0(a0)
+; RV32IZBKB-NEXT: lbu a1, 0(a0)
+; RV32IZBKB-NEXT: lbu a2, 1(a0)
; RV32IZBKB-NEXT: lbu a3, 2(a0)
; RV32IZBKB-NEXT: lbu a4, 3(a0)
-; RV32IZBKB-NEXT: packh a1, a2, a1
+; RV32IZBKB-NEXT: packh a1, a1, a2
; RV32IZBKB-NEXT: slli a3, a3, 16
; RV32IZBKB-NEXT: slli a4, a4, 24
; RV32IZBKB-NEXT: or a3, a4, a3
-; RV32IZBKB-NEXT: lbu a2, 5(a0)
-; RV32IZBKB-NEXT: lbu a4, 4(a0)
+; RV32IZBKB-NEXT: lbu a2, 4(a0)
+; RV32IZBKB-NEXT: lbu a4, 5(a0)
; RV32IZBKB-NEXT: lbu a5, 6(a0)
; RV32IZBKB-NEXT: lbu a6, 7(a0)
; RV32IZBKB-NEXT: or a0, a3, a1
-; RV32IZBKB-NEXT: packh a1, a4, a2
+; RV32IZBKB-NEXT: packh a1, a2, a4
; RV32IZBKB-NEXT: slli a5, a5, 16
; RV32IZBKB-NEXT: slli a6, a6, 24
; RV32IZBKB-NEXT: or a2, a6, a5
@@ -194,20 +194,20 @@ define i64 @load_i64(ptr %p) {
;
; RV64IZBKB-LABEL: load_i64:
; RV64IZBKB: # %bb.0:
-; RV64IZBKB-NEXT: lbu a1, 5(a0)
-; RV64IZBKB-NEXT: lbu a2, 4(a0)
+; RV64IZBKB-NEXT: lbu a1, 4(a0)
+; RV64IZBKB-NEXT: lbu a2, 5(a0)
; RV64IZBKB-NEXT: lbu a3, 6(a0)
; RV64IZBKB-NEXT: lbu a4, 7(a0)
-; RV64IZBKB-NEXT: packh a1, a2, a1
+; RV64IZBKB-NEXT: packh a1, a1, a2
; RV64IZBKB-NEXT: slli a3, a3, 16
; RV64IZBKB-NEXT: slli a4, a4, 24
; RV64IZBKB-NEXT: or a3, a4, a3
-; RV64IZBKB-NEXT: lbu a2, 1(a0)
-; RV64IZBKB-NEXT: lbu a4, 0(a0)
+; RV64IZBKB-NEXT: lbu a2, 0(a0)
+; RV64IZBKB-NEXT: lbu a4, 1(a0)
; RV64IZBKB-NEXT: lbu a5, 2(a0)
; RV64IZBKB-NEXT: lbu a0, 3(a0)
; RV64IZBKB-NEXT: or a1, a3, a1
-; RV64IZBKB-NEXT: packh a2, a4, a2
+; RV64IZBKB-NEXT: packh a2, a2, a4
; RV64IZBKB-NEXT: slli a5, a5, 16
; RV64IZBKB-NEXT: slli a0, a0, 24
; RV64IZBKB-NEXT: or a0, a0, a5
diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index c016e8f3163635..5a5ae66b5fa767 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -522,10 +522,10 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV32MV-LABEL: test_urem_vec:
; RV32MV: # %bb.0:
; RV32MV-NEXT: lw a1, 0(a0)
-; RV32MV-NEXT: andi a2, a1, 2047
-; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32MV-NEXT: vmv.v.x v8, a2
; RV32MV-NEXT: lbu a2, 4(a0)
+; RV32MV-NEXT: andi a3, a1, 2047
+; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32MV-NEXT: vmv.v.x v8, a3
; RV32MV-NEXT: slli a3, a1, 10
; RV32MV-NEXT: srli a3, a3, 21
; RV32MV-NEXT: vslide1down.vx v8, v8, a3
diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
index c057c656e0fb70..b0e790ed606350 100644
--- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
@@ -19,29 +19,29 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind {
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lhu s0, 12(a1)
-; RV32I-NEXT: lhu s1, 8(a1)
-; RV32I-NEXT: lhu s2, 4(a1)
; RV32I-NEXT: lhu a2, 0(a1)
+; RV32I-NEXT: lhu s0, 4(a1)
+; RV32I-NEXT: lhu s1, 8(a1)
+; RV32I-NEXT: lhu s2, 12(a1)
; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: li a1, 95
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: call __umodsi3
; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: li a1, 124
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __umodsi3
-; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: li a1, 98
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __umodsi3
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: li a1, 1003
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __umodsi3
; RV32I-NEXT: sh a0, 6(s3)
; RV32I-NEXT: sh s1, 4(s3)
-; RV32I-NEXT: sh s2, 2(s3)
+; RV32I-NEXT: sh s0, 2(s3)
; RV32I-NEXT: sh s4, 0(s3)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -54,39 +54,39 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind {
;
; RV32IM-LABEL: fold_urem_vec_1:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: lhu a2, 12(a1)
-; RV32IM-NEXT: lhu a3, 8(a1)
-; RV32IM-NEXT: lhu a4, 4(a1)
-; RV32IM-NEXT: lhu a1, 0(a1)
+; RV32IM-NEXT: lhu a2, 0(a1)
+; RV32IM-NEXT: lhu a3, 4(a1)
+; RV32IM-NEXT: lhu a4, 8(a1)
+; RV32IM-NEXT: lhu a1, 12(a1)
; RV32IM-NEXT: lui a5, 8456
; RV32IM-NEXT: addi a5, a5, 1058
-; RV32IM-NEXT: mulhu a5, a4, a5
+; RV32IM-NEXT: mulhu a5, a3, a5
; RV32IM-NEXT: slli a6, a5, 7
; RV32IM-NEXT: slli a5, a5, 2
; RV32IM-NEXT: sub a5, a5, a6
-; RV32IM-NEXT: add a4, a4, a5
+; RV32IM-NEXT: add a3, a3, a5
; RV32IM-NEXT: lui a5, 11038
; RV32IM-NEXT: addi a5, a5, -1465
-; RV32IM-NEXT: mulhu a5, a1, a5
+; RV32IM-NEXT: mulhu a5, a2, a5
; RV32IM-NEXT: li a6, 95
; RV32IM-NEXT: mul a5, a5, a6
-; RV32IM-NEXT: sub a1, a1, a5
+; RV32IM-NEXT: sub a2, a2, a5
; RV32IM-NEXT: lui a5, 10700
; RV32IM-NEXT: addi a5, a5, -1003
-; RV32IM-NEXT: mulhu a5, a3, a5
+; RV32IM-NEXT: mulhu a5, a4, a5
; RV32IM-NEXT: li a6, 98
; RV32IM-NEXT: mul a5, a5, a6
-; RV32IM-NEXT: sub a3, a3, a5
+; RV32IM-NEXT: sub a4, a4, a5
; RV32IM-NEXT: lui a5, 1045
; RV32IM-NEXT: addi a5, a5, 1801
-; RV32IM-NEXT: mulhu a5, a2, a5
+; RV32IM-NEXT: mulhu a5, a1, a5
; RV32IM-NEXT: li a6, 1003
; RV32IM-NEXT: mul a5, a5, a6
-; RV32IM-NEXT: sub a2, a2, a5
-; RV32IM-NEXT: sh a2, 6(a0)
-; RV32IM-NEXT: sh a3, 4(a0)
-; RV32IM-NEXT: sh a1, 0(a0)
-; RV32IM-NEXT: sh a4, 2(a0)
+; RV32IM-NEXT: sub a1, a1, a5
+; RV32IM-NEXT: sh a1, 6(a0)
+; RV32IM-NEXT: sh a4, 4(a0)
+; RV32IM-NEXT: sh a2, 0(a0)
+; RV32IM-NEXT: sh a3, 2(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: fold_urem_vec_1:
@@ -98,29 +98,29 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind {
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lhu s0, 24(a1)
-; RV64I-NEXT: lhu s1, 16(a1)
-; RV64I-NEXT: lhu s2, 8(a1)
; RV64I-NEXT: lhu a2, 0(a1)
+; RV64I-NEXT: lhu s0, 8(a1)
+; RV64I-NEXT: lhu s1, 16(a1)
+; RV64I-NEXT: lhu s2, 24(a1)
; RV64I-NEXT: mv s3, a0
; RV64I-NEXT: li a1, 95
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: call __umoddi3
; RV64I-NEXT: mv s4, a0
; RV64I-NEXT: li a1, 124
-; RV64I-NEXT: mv a0, s2
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __umoddi3
-; RV64I-NEXT: mv s2, a0
+; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: li a1, 98
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __umoddi3
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: li a1, 1003
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __umoddi3
; RV64I-NEXT: sh a0, 6(s3)
; RV64I-NEXT: sh s1, 4(s3)
-; RV64I-NEXT: sh s2, 2(s3)
+; RV64I-NEXT: sh s0, 2(s3)
; RV64I-NEXT: sh s4, 0(s3)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -133,38 +133,38 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind {
;
; RV64IM-LABEL: fold_urem_vec_1:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: lhu a2, 8(a1)
-; RV64IM-NEXT: lui a3, %hi(.LCPI0_0)
-; RV64IM-NEXT: ld a3, %lo(.LCPI0_0)(a3)
-; RV64IM-NEXT: lhu a4, 24(a1)
+; RV64IM-NEXT: lui a2, %hi(.LCPI0_0)
+; RV64IM-NEXT: ld a2, %lo(.LCPI0_0)(a2)
+; RV64IM-NEXT: lhu a3, 8(a1)
+; RV64IM-NEXT: lhu a4, 0(a1)
; RV64IM-NEXT: lhu a5, 16(a1)
-; RV64IM-NEXT: lhu a1, 0(a1)
-; RV64IM-NEXT: mulhu a3, a2, a3
-; RV64IM-NEXT: slli a6, a3, 7
+; RV64IM-NEXT: lhu a1, 24(a1)
+; RV64IM-NEXT: mulhu a2, a3, a2
+; RV64IM-NEXT: slli a6, a2, 7
; RV64IM-NEXT: lui a7, %hi(.LCPI0_1)
; RV64IM-NEXT: ld a7, %lo(.LCPI0_1)(a7)
-; RV64IM-NEXT: slli a3, a3, 2
-; RV64IM-NEXT: subw a3, a3, a6
-; RV64IM-NEXT: add a2, a2, a3
-; RV64IM-NEXT: mulhu a3, a1, a7
+; RV64IM-NEXT: slli a2, a2, 2
+; RV64IM-NEXT: subw a2, a2, a6
+; RV64IM-NEXT: add a2, a3, a2
+; RV64IM-NEXT: mulhu a3, a4, a7
; RV64IM-NEXT: lui a6, %hi(.LCPI0_2)
; RV64IM-NEXT: ld a6, %lo(.LCPI0_2)(a6)
; RV64IM-NEXT: li a7, 95
; RV64IM-NEXT: mul a3, a3, a7
-; RV64IM-NEXT: subw a1, a1, a3
+; RV64IM-NEXT: subw a4, a4, a3
; RV64IM-NEXT: mulhu a3, a5, a6
; RV64IM-NEXT: lui a6, %hi(.LCPI0_3)
; RV64IM-NEXT: ld a6, %lo(.LCPI0_3)(a6)
; RV64IM-NEXT: li a7, 98
; RV64IM-NEXT: mul a3, a3, a7
; RV64IM-NEXT: subw a5, a5, a3
-; RV64IM-NEXT: mulhu a3, a4, a6
+; RV64IM-NEXT: mulhu a3, a1, a6
; RV64IM-NEXT: li a6, 1003
; RV64IM-NEXT: mul a3, a3, a6
-; RV64IM-NEXT: subw a4, a4, a3
-; RV64IM-NEXT: sh a4, 6(a0)
+; RV64IM-NEXT: subw a1, a1, a3
+; RV64IM-NEXT: sh a1, 6(a0)
; RV64IM-NEXT: sh a5, 4(a0)
-; RV64IM-NEXT: sh a1, 0(a0)
+; RV64IM-NEXT: sh a4, 0(a0)
; RV64IM-NEXT: sh a2, 2(a0)
; RV64IM-NEXT: ret
%1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003>
@@ -181,29 +181,29 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lhu s0, 12(a1)
-; RV32I-NEXT: lhu s1, 8(a1)
-; RV32I-NEXT: lhu s2, 4(a1)
; RV32I-NEXT: lhu a2, 0(a1)
+; RV32I-NEXT: lhu s0, 4(a1)
+; RV32I-NEXT: lhu s1, 8(a1)
+; RV32I-NEXT: lhu s2, 12(a1)
; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: li a1, 95
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: call __umodsi3
; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __umodsi3
-; RV32I-NEXT: mv s2, a0
+; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: li a1, 95
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __umodsi3
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: li a1, 95
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __umodsi3
; RV32I-NEXT: sh a0, 6(s3)
; RV32I-NEXT: sh s1, 4(s3)
-; RV32I-NEXT: sh s2, 2(s3)
+; RV32I-NEXT: sh s0, 2(s3)
; RV32I-NEXT: sh s4, 0(s3)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
@@ -216,29 +216,29 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
;
; RV32IM-LABEL: fold_urem_vec_2:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: lhu a2, 12(a1)
-; RV32IM-NEXT: lhu a3, 8(a1)
-; RV32IM-NEXT: lhu a4, 0(a1)
-; RV32IM-NEXT: lhu a1, 4(a1)
+; RV32IM-NEXT: lhu a2, 0(a1)
+; RV32IM-NEXT: lhu a3, 4(a1)
+; RV32IM-NEXT: lhu a4, 8(a1)
+; RV32IM-NEXT: lhu a1, 12(a1)
; RV32IM-NEXT: lui a5, 11038
; RV32IM-NEXT: addi a5, a5, -1465
-; RV32IM-NEXT: mulhu a6, a4, a5
+; RV32IM-NEXT: mulhu a6, a2, a5
; RV32IM-NEXT: li a7, 95
; RV32IM-NEXT: mul a6, a6, a7
-; RV32IM-NEXT: sub a4, a4, a6
-; RV32IM-NEXT: mulhu a6, a1, a5
-; RV32IM-NEXT: mul a6, a6, a7
-; RV32IM-NEXT: sub a1, a1, a6
+; RV32IM-NEXT: sub a2, a2, a6
; RV32IM-NEXT: mulhu a6, a3, a5
; RV32IM-NEXT: mul a6, a6, a7
; RV32IM-NEXT: sub a3, a3, a6
-; RV32IM-NEXT: mulhu a5, a2, a5
+; RV32IM-NEXT: mulhu a6, a4, a5
+; RV32IM-NEXT: mul a6, a6, a7
+; RV32IM-NEXT: sub a4, a4, a6
+; RV32IM-NEXT: mulhu a5, a1, a5
; RV32IM-NEXT: mul a5, a5, a7
-; RV32IM-NEXT: sub a2, a2, a5
-; RV32IM-NEXT: sh a2, 6(a0)
-; RV32IM-NEXT: sh a3, 4(a0)
-; RV32IM-NEXT: sh a1, 2(a0)
-; RV32IM-NEXT: sh a4, 0(a0)
+; RV32IM-NEXT: sub a1, a1, a5
+; RV32IM-NEXT: sh a1, 6(a0)
+; RV32IM-NEXT: sh a4, 4(a0)
+; RV32IM-NEXT: sh a3, 2(a0)
+; RV32IM-NEXT: sh a2, 0(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: fold_urem_vec_2:
@@ -250,29 +250,29 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lhu s0, 24(a1)
-; RV64I-NEXT: lhu s1, 16(a1)
-; RV64I-NEXT: lhu s2, 8(a1)
; RV64I-NEXT: lhu a2, 0(a1)
+; RV64I-NEXT: lhu s0, 8(a1)
+; RV64I-NEXT: lhu s1, 16(a1)
+; RV64I-NEXT: lhu s2, 24(a1)
; RV64I-NEXT: mv s3, a0
; RV64I-NEXT: li a1, 95
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: call __umoddi3
; RV64I-NEXT: mv s4, a0
; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s2
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __umoddi3
-; RV64I-NEXT: mv s2, a0
+; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: li a1, 95
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __umoddi3
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: li a1, 95
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s2
; RV64I-NEXT: call __umoddi3
; RV64I-NEXT: sh a0, 6(s3)
; RV64I-NEXT: sh s1, 4(s3)
-; RV64I-NEXT: sh s2, 2(s3)
+; RV64I-NEXT: sh s0, 2(s3)
; RV64I-NEXT: sh s4, 0(s3)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
@@ -285,29 +285,29 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
;
; RV64IM-LABEL: fold_urem_vec_2:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: lhu a2, 0(a1)
-; RV64IM-NEXT: lui a3, %hi(.LCPI1_0)
-; RV64IM-NEXT: ld a3, %lo(.LCPI1_0)(a3)
-; RV64IM-NEXT: lhu a4, 24(a1)
+; RV64IM-NEXT: lui a2, %hi(.LCPI1_0)
+; RV64IM-NEXT: ld a2, %lo(.LCPI1_0)(a2)
+; RV64IM-NEXT: lhu a3, 0(a1)
+; RV64IM-NEXT: lhu a4, 8(a1)
; RV64IM-NEXT: lhu a5, 16(a1)
-; RV64IM-NEXT: lhu a1, 8(a1)
-; RV64IM-NEXT: mulhu a6, a2, a3
+; RV64IM-NEXT: lhu a1, 24(a1)
+; RV64IM-NEXT: mulhu a6, a3, a2
; RV64IM-NEXT: li a7, 95
; RV64IM-NEXT: mul a6, a6, a7
-; RV64IM-NEXT: subw a2, a2, a6
-; RV64IM-NEXT: mulhu a6, a1, a3
+; RV64IM-NEXT: subw a3, a3, a6
+; RV64IM-NEXT: mulhu a6, a4, a2
; RV64IM-NEXT: mul a6, a6, a7
-; RV64IM-NEXT: subw a1, a1, a6
-; RV64IM-NEXT: mulhu a6, a5, a3
+; RV64IM-NEXT: subw a4, a4, a6
+; RV64IM-NEXT: mulhu a6, a5, a2
; RV64IM-NEXT: mul a6, a6, a7
; RV64IM-NEXT: subw a5, a5, a6
-; RV64IM-NEXT: mulhu a3, a4, a3
-; RV64IM-NEXT: mul a3, a3, a7
-; RV64IM-NEXT: subw a4, a4, a3
-; RV64IM-NEXT: sh a4, 6(a0)
+; RV64IM-NEXT: mulhu a2, a1, a2
+; RV64IM-NEXT: mul a2, a2, a7
+; RV64IM-NEXT: subw a1, a1, a2
+; RV64IM-NEXT: sh a1, 6(a0)
; RV64IM-NEXT: sh a5, 4(a0)
-; RV64IM-NEXT: sh a1, 2(a0)
-; RV64IM-NEXT: sh a2, 0(a0)
+; RV64IM-NEXT: sh a4, 2(a0)
+; RV64IM-NEXT: sh a3, 0(a0)
; RV64IM-NEXT: ret
%1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
ret <4 x i16> %1
@@ -388,33 +388,33 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
;
; RV32IM-LABEL: combine_urem_udiv:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: lhu a2, 0(a1)
-; RV32IM-NEXT: lhu a3, 4(a1)
-; RV32IM-NEXT: lhu a4, 12(a1)
+; RV32IM-NEXT: lhu a2, 12(a1)
+; RV32IM-NEXT: lhu a3, 0(a1)
+; RV32IM-NEXT: lhu a4, 4(a1)
; RV32IM-NEXT: lhu a1, 8(a1)
; RV32IM-NEXT: lui a5, 11038
; RV32IM-NEXT: addi a5, a5, -1465
-; RV32IM-NEXT: mulhu a6, a4, a5
+; RV32IM-NEXT: mulhu a6, a2, a5
; RV32IM-NEXT: li a7, 95
; RV32IM-NEXT: mul t0, a6, a7
; RV32IM-NEXT: mulhu t1, a1, a5
; RV32IM-NEXT: mul t2, t1, a7
-; RV32IM-NEXT: mulhu t3, a3, a5
+; RV32IM-NEXT: mulhu t3, a4, a5
; RV32IM-NEXT: mul t4, t3, a7
-; RV32IM-NEXT: mulhu a5, a2, a5
+; RV32IM-NEXT: mulhu a5, a3, a5
; RV32IM-NEXT: mul a7, a5, a7
-; RV32IM-NEXT: add a2, a2, a5
-; RV32IM-NEXT: sub a2, a2, a7
-; RV32IM-NEXT: add a3, a3, t3
-; RV32IM-NEXT: sub a3, a3, t4
+; RV32IM-NEXT: add a3, a3, a5
+; RV32IM-NEXT: sub a3, a3, a7
+; RV32IM-NEXT: add a4, a4, t3
+; RV32IM-NEXT: sub a4, a4, t4
; RV32IM-NEXT: add a1, a1, t1
; RV32IM-NEXT: sub a1, a1, t2
-; RV32IM-NEXT: add a4, a4, a6
-; RV32IM-NEXT: sub a4, a4, t0
-; RV32IM-NEXT: sh a4, 6(a0)
+; RV32IM-NEXT: add a2, a2, a6
+; RV32IM-NEXT: sub a2, a2, t0
+; RV32IM-NEXT: sh a2, 6(a0)
; RV32IM-NEXT: sh a1, 4(a0)
-; RV32IM-NEXT: sh a3, 2(a0)
-; RV32IM-NEXT: sh a2, 0(a0)
+; RV32IM-NEXT: sh a4, 2(a0)
+; RV32IM-NEXT: sh a3, 0(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: combine_urem_udiv:
@@ -533,19 +533,19 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lhu s1, 8(a1)
+; RV32I-NEXT: lhu s1, 0(a1)
; RV32I-NEXT: lhu s2, 4(a1)
-; RV32I-NEXT: lhu s3, 0(a1)
+; RV32I-NEXT: lhu s3, 8(a1)
; RV32I-NEXT: lhu a2, 12(a1)
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: li a1, 95
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: call __umodsi3
-; RV32I-NEXT: andi a1, s3, 63
+; RV32I-NEXT: andi a1, s1, 63
; RV32I-NEXT: andi a2, s2, 31
-; RV32I-NEXT: andi s1, s1, 7
+; RV32I-NEXT: andi a3, s3, 7
; RV32I-NEXT: sh a0, 6(s0)
-; RV32I-NEXT: sh s1, 4(s0)
+; RV32I-NEXT: sh a3, 4(s0)
; RV32I-NEXT: sh a2, 2(s0)
; RV32I-NEXT: sh a1, 0(s0)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -558,8 +558,8 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind {
;
; RV32IM-LABEL: dont_fold_urem_power_of_two:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: lhu a2, 8(a1)
-; RV32IM-NEXT: lhu a3, 4(a1)
+; RV32IM-NEXT: lhu a2, 4(a1)
+; RV32IM-NEXT: lhu a3, 8(a1)
; RV32IM-NEXT: lhu a4, 12(a1)
; RV32IM-NEXT: lhu a1, 0(a1)
; RV32IM-NEXT: lui a5, 11038
@@ -569,10 +569,10 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind {
; RV32IM-NEXT: mul a5, a5, a6
; RV32IM-NEXT: sub a4, a4, a5
; RV32IM-NEXT: andi a1, a1, 63
-; RV32IM-NEXT: andi a3, a3, 31
-; RV32IM-NEXT: andi a2, a2, 7
-; RV32IM-NEXT: sh a2, 4(a0)
-; RV32IM-NEXT: sh a3, 2(a0)
+; RV32IM-NEXT: andi a2, a2, 31
+; RV32IM-NEXT: andi a3, a3, 7
+; RV32IM-NEXT: sh a3, 4(a0)
+; RV32IM-NEXT: sh a2, 2(a0)
; RV32IM-NEXT: sh a1, 0(a0)
; RV32IM-NEXT: sh a4, 6(a0)
; RV32IM-NEXT: ret
@@ -585,19 +585,19 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lhu s1, 16(a1)
+; RV64I-NEXT: lhu s1, 0(a1)
; RV64I-NEXT: lhu s2, 8(a1)
-; RV64I-NEXT: lhu s3, 0(a1)
+; RV64I-NEXT: lhu s3, 16(a1)
; RV64I-NEXT: lhu a2, 24(a1)
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: li a1, 95
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: call __umoddi3
-; RV64I-NEXT: andi a1, s3, 63
+; RV64I-NEXT: andi a1, s1, 63
; RV64I-NEXT: andi a2, s2, 31
-; RV64I-NEXT: andi s1, s1, 7
+; RV64I-NEXT: andi a3, s3, 7
; RV64I-NEXT: sh a0, 6(s0)
-; RV64I-NEXT: sh s1, 4(s0)
+; RV64I-NEXT: sh a3, 4(s0)
; RV64I-NEXT: sh a2, 2(s0)
; RV64I-NEXT: sh a1, 0(s0)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
@@ -642,24 +642,24 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lhu s0, 12(a1)
-; RV32I-NEXT: lhu s1, 8(a1)
; RV32I-NEXT: lhu a2, 4(a1)
+; RV32I-NEXT: lhu s0, 8(a1)
+; RV32I-NEXT: lhu s1, 12(a1)
; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: li a1, 654
; RV32I-NEXT: mv a0, a2
; RV32I-NEXT: call __umodsi3
; RV32I-NEXT: mv s3, a0
; RV32I-NEXT: li a1, 23
-; RV32I-NEXT: mv a0, s1
+; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __umodsi3
-; RV32I-NEXT: mv s1, a0
+; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: addi a1, a0, 1327
-; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __umodsi3
; RV32I-NEXT: sh a0, 6(s2)
-; RV32I-NEXT: sh s1, 4(s2)
+; RV32I-NEXT: sh s0, 4(s2)
; RV32I-NEXT: sh s3, 2(s2)
; RV32I-NEXT: sh zero, 0(s2)
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
@@ -672,32 +672,32 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
;
; RV32IM-LABEL: dont_fold_urem_one:
; RV32IM: # %bb.0:
-; RV32IM-NEXT: lhu a2, 12(a1)
-; RV32IM-NEXT: lhu a3, 4(a1)
-; RV32IM-NEXT: lhu a1, 8(a1)
+; RV32IM-NEXT: lhu a2, 4(a1)
+; RV32IM-NEXT: lhu a3, 8(a1)
+; RV32IM-NEXT: lhu a1, 12(a1)
; RV32IM-NEXT: lui a4, 1603
; RV32IM-NEXT: addi a4, a4, 1341
-; RV32IM-NEXT: mulhu a4, a3, a4
+; RV32IM-NEXT: mulhu a4, a2, a4
; RV32IM-NEXT: li a5, 654
; RV32IM-NEXT: mul a4, a4, a5
-; RV32IM-NEXT: sub a3, a3, a4
+; RV32IM-NEXT: sub a2, a2, a4
; RV32IM-NEXT: lui a4, 45590
; RV32IM-NEXT: addi a4, a4, 1069
-; RV32IM-NEXT: mulhu a4, a1, a4
+; RV32IM-NEXT: mulhu a4, a3, a4
; RV32IM-NEXT: li a5, 23
; RV32IM-NEXT: mul a4, a4, a5
-; RV32IM-NEXT: sub a1, a1, a4
+; RV32IM-NEXT: sub a3, a3, a4
; RV32IM-NEXT: lui a4, 193
; RV32IM-NEXT: addi a4, a4, 1464
-; RV32IM-NEXT: mulhu a4, a2, a4
+; RV32IM-NEXT: mulhu a4, a1, a4
; RV32IM-NEXT: lui a5, 1
; RV32IM-NEXT: addi a5, a5, 1327
; RV32IM-NEXT: mul a4, a4, a5
-; RV32IM-NEXT: sub a2, a2, a4
+; RV32IM-NEXT: sub a1, a1, a4
; RV32IM-NEXT: sh zero, 0(a0)
-; RV32IM-NEXT: sh a2, 6(a0)
-; RV32IM-NEXT: sh a1, 4(a0)
-; RV32IM-NEXT: sh a3, 2(a0)
+; RV32IM-NEXT: sh a1, 6(a0)
+; RV32IM-NEXT: sh a3, 4(a0)
+; RV32IM-NEXT: sh a2, 2(a0)
; RV32IM-NEXT: ret
;
; RV64I-LABEL: dont_fold_urem_one:
@@ -708,24 +708,24 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: lhu s0, 24(a1)
-; RV64I-NEXT: lhu s1, 16(a1)
; RV64I-NEXT: lhu a2, 8(a1)
+; RV64I-NEXT: lhu s0, 16(a1)
+; RV64I-NEXT: lhu s1, 24(a1)
; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: li a1, 654
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: call __umoddi3
; RV64I-NEXT: mv s3, a0
; RV64I-NEXT: li a1, 23
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __umoddi3
-; RV64I-NEXT: mv s1, a0
+; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addiw a1, a0, 1327
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __umoddi3
; RV64I-NEXT: sh a0, 6(s2)
-; RV64I-NEXT: sh s1, 4(s2)
+; RV64I-NEXT: sh s0, 4(s2)
; RV64I-NEXT: sh s3, 2(s2)
; RV64I-NEXT: sh zero, 0(s2)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
@@ -738,32 +738,32 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
;
; RV64IM-LABEL: dont_fold_urem_one:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: lhu a2, 8(a1)
-; RV64IM-NEXT: lui a3, %hi(.LCPI4_0)
-; RV64IM-NEXT: ld a3, %lo(.LCPI4_0)(a3)
-; RV64IM-NEXT: lhu a4, 24(a1)
-; RV64IM-NEXT: lhu a1, 16(a1)
-; RV64IM-NEXT: mulhu a3, a2, a3
+; RV64IM-NEXT: lui a2, %hi(.LCPI4_0)
+; RV64IM-NEXT: ld a2, %lo(.LCPI4_0)(a2)
+; RV64IM-NEXT: lhu a3, 8(a1)
+; RV64IM-NEXT: lhu a4, 16(a1)
+; RV64IM-NEXT: lhu a1, 24(a1)
+; RV64IM-NEXT: mulhu a2, a3, a2
; RV64IM-NEXT: lui a5, %hi(.LCPI4_1)
; RV64IM-NEXT: ld a5, %lo(.LCPI4_1)(a5)
; RV64IM-NEXT: li a6, 654
-; RV64IM-NEXT: mul a3, a3, a6
-; RV64IM-NEXT: subw a2, a2, a3
-; RV64IM-NEXT: mulhu a3, a1, a5
+; RV64IM-NEXT: mul a2, a2, a6
+; RV64IM-NEXT: subw a3, a3, a2
+; RV64IM-NEXT: mulhu a2, a4, a5
; RV64IM-NEXT: lui a5, %hi(.LCPI4_2)
; RV64IM-NEXT: ld a5, %lo(.LCPI4_2)(a5)
; RV64IM-NEXT: li a6, 23
-; RV64IM-NEXT: mul a3, a3, a6
-; RV64IM-NEXT: subw a1, a1, a3
-; RV64IM-NEXT: mulhu a3, a4, a5
+; RV64IM-NEXT: mul a2, a2, a6
+; RV64IM-NEXT: subw a4, a4, a2
+; RV64IM-NEXT: mulhu a2, a1, a5
; RV64IM-NEXT: lui a5, 1
; RV64IM-NEXT: addi a5, a5, 1327
-; RV64IM-NEXT: mul a3, a3, a5
-; RV64IM-NEXT: subw a4, a4, a3
+; RV64IM-NEXT: mul a2, a2, a5
+; RV64IM-NEXT: subw a1, a1, a2
; RV64IM-NEXT: sh zero, 0(a0)
-; RV64IM-NEXT: sh a4, 6(a0)
-; RV64IM-NEXT: sh a1, 4(a0)
-; RV64IM-NEXT: sh a2, 2(a0)
+; RV64IM-NEXT: sh a1, 6(a0)
+; RV64IM-NEXT: sh a4, 4(a0)
+; RV64IM-NEXT: sh a3, 2(a0)
; RV64IM-NEXT: ret
%1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
ret <4 x i16> %1
@@ -793,17 +793,18 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT: lw s0, 24(a1)
-; RV32I-NEXT: lw s1, 28(a1)
-; RV32I-NEXT: lw s2, 16(a1)
-; RV32I-NEXT: lw s3, 20(a1)
+; RV32I-NEXT: lw s0, 16(a1)
+; RV32I-NEXT: lw s1, 20(a1)
+; RV32I-NEXT: lw s2, 24(a1)
+; RV32I-NEXT: lw s3, 28(a1)
+; RV32I-NEXT: lw a3, 0(a1)
+; RV32I-NEXT: lw a4, 4(a1)
; RV32I-NEXT: lw s4, 8(a1)
; RV32I-NEXT: lw s5, 12(a1)
-; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw a1, 4(a1)
; RV32I-NEXT: mv s6, a0
; RV32I-NEXT: li a2, 1
; RV32I-NEXT: mv a0, a3
+; RV32I-NEXT: mv a1, a4
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __umoddi3
; RV32I-NEXT: mv s7, a0
@@ -816,22 +817,22 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
; RV32I-NEXT: mv s4, a0
; RV32I-NEXT: mv s5, a1
; RV32I-NEXT: li a2, 23
-; RV32I-NEXT: mv a0, s2
-; RV32I-NEXT: mv a1, s3
+; RV32I-NEXT: mv a0, s0
+; RV32I-NEXT: mv a1, s1
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __umoddi3
-; RV32I-NEXT: mv s2, a0
-; RV32I-NEXT: mv s3, a1
+; RV32I-NEXT: mv s0, a0
+; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: addi a2, a0, 1327
-; RV32I-NEXT: mv a0, s0
-; RV32I-NEXT: mv a1, s1
+; RV32I-NEXT: mv a0, s2
+; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: li a3, 0
; RV32I-NEXT: call __umoddi3
; RV32I-NEXT: sw a1, 28(s6)
; RV32I-NEXT: sw a0, 24(s6)
-; RV32I-NEXT: sw s3, 20(s6)
-; RV32I-NEXT: sw s2, 16(s6)
+; RV32I-NEXT: sw s1, 20(s6)
+; RV32I-NEXT: sw s0, 16(s6)
; RV32I-NEXT: sw s5, 12(s6)
; RV32I-NEXT: sw s4, 8(s6)
; RV32I-NEXT: sw s8, 4(s6)
@@ -862,17 +863,18 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT: lw s0, 24(a1)
-; RV32IM-NEXT: lw s1, 28(a1)
-; RV32IM-NEXT: lw s2, 16(a1)
-; RV32IM-NEXT: lw s3, 20(a1)
+; RV32IM-NEXT: lw s0, 16(a1)
+; RV32IM-NEXT: lw s1, 20(a1)
+; RV32IM-NEXT: lw s2, 24(a1)
+; RV32IM-NEXT: lw s3, 28(a1)
+; RV32IM-NEXT: lw a3, 0(a1)
+; RV32IM-NEXT: lw a4, 4(a1)
; RV32IM-NEXT: lw s4, 8(a1)
; RV32IM-NEXT: lw s5, 12(a1)
-; RV32IM-NEXT: lw a3, 0(a1)
-; RV32IM-NEXT: lw a1, 4(a1)
; RV32IM-NEXT: mv s6, a0
; RV32IM-NEXT: li a2, 1
; RV32IM-NEXT: mv a0, a3
+; RV32IM-NEXT: mv a1, a4
; RV32IM-NEXT: li a3, 0
; RV32IM-NEXT: call __umoddi3
; RV32IM-NEXT: mv s7, a0
@@ -885,22 +887,22 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
; RV32IM-NEXT: mv s4, a0
; RV32IM-NEXT: mv s5, a1
; RV32IM-NEXT: li a2, 23
-; RV32IM-NEXT: mv a0, s2
-; RV32IM-NEXT: mv a1, s3
+; RV32IM-NEXT: mv a0, s0
+; RV32IM-NEXT: mv a1, s1
; RV32IM-NEXT: li a3, 0
; RV32IM-NEXT: call __umoddi3
-; RV32IM-NEXT: mv s2, a0
-; RV32IM-NEXT: mv s3, a1
+; RV32IM-NEXT: mv s0, a0
+; RV32IM-NEXT: mv s1, a1
; RV32IM-NEXT: lui a0, 1
; RV32IM-NEXT: addi a2, a0, 1327
-; RV32IM-NEXT: mv a0, s0
-; RV32IM-NEXT: mv a1, s1
+; RV32IM-NEXT: mv a0, s2
+; RV32IM-NEXT: mv a1, s3
; RV32IM-NEXT: li a3, 0
; RV32IM-NEXT: call __umoddi3
; RV32IM-NEXT: sw a1, 28(s6)
; RV32IM-NEXT: sw a0, 24(s6)
-; RV32IM-NEXT: sw s3, 20(s6)
-; RV32IM-NEXT: sw s2, 16(s6)
+; RV32IM-NEXT: sw s1, 20(s6)
+; RV32IM-NEXT: sw s0, 16(s6)
; RV32IM-NEXT: sw s5, 12(s6)
; RV32IM-NEXT: sw s4, 8(s6)
; RV32IM-NEXT: sw s8, 4(s6)
@@ -926,24 +928,24 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT: ld s0, 24(a1)
-; RV64I-NEXT: ld s1, 16(a1)
; RV64I-NEXT: ld a2, 8(a1)
+; RV64I-NEXT: ld s0, 16(a1)
+; RV64I-NEXT: ld s1, 24(a1)
; RV64I-NEXT: mv s2, a0
; RV64I-NEXT: li a1, 654
; RV64I-NEXT: mv a0, a2
; RV64I-NEXT: call __umoddi3
; RV64I-NEXT: mv s3, a0
; RV64I-NEXT: li a1, 23
-; RV64I-NEXT: mv a0, s1
+; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __umoddi3
-; RV64I-NEXT: mv s1, a0
+; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addiw a1, a0, 1327
-; RV64I-NEXT: mv a0, s0
+; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __umoddi3
; RV64I-NEXT: sd a0, 24(s2)
-; RV64I-NEXT: sd s1, 16(s2)
+; RV64I-NEXT: sd s0, 16(s2)
; RV64I-NEXT: sd s3, 8(s2)
; RV64I-NEXT: sd zero, 0(s2)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
@@ -956,39 +958,39 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
;
; RV64IM-LABEL: dont_fold_urem_i64:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: ld a2, 16(a1)
-; RV64IM-NEXT: lui a3, %hi(.LCPI6_0)
-; RV64IM-NEXT: ld a3, %lo(.LCPI6_0)(a3)
-; RV64IM-NEXT: ld a4, 24(a1)
-; RV64IM-NEXT: ld a1, 8(a1)
-; RV64IM-NEXT: mulhu a3, a2, a3
-; RV64IM-NEXT: sub a5, a2, a3
+; RV64IM-NEXT: lui a2, %hi(.LCPI6_0)
+; RV64IM-NEXT: ld a2, %lo(.LCPI6_0)(a2)
+; RV64IM-NEXT: ld a3, 16(a1)
+; RV64IM-NEXT: ld a4, 8(a1)
+; RV64IM-NEXT: ld a1, 24(a1)
+; RV64IM-NEXT: mulhu a2, a3, a2
+; RV64IM-NEXT: sub a5, a3, a2
; RV64IM-NEXT: srli a5, a5, 1
-; RV64IM-NEXT: add a3, a5, a3
-; RV64IM-NEXT: srli a3, a3, 4
+; RV64IM-NEXT: add a2, a5, a2
+; RV64IM-NEXT: srli a2, a2, 4
; RV64IM-NEXT: li a5, 23
; RV64IM-NEXT: lui a6, %hi(.LCPI6_1)
; RV64IM-NEXT: ld a6, %lo(.LCPI6_1)(a6)
-; RV64IM-NEXT: mul a3, a3, a5
-; RV64IM-NEXT: sub a2, a2, a3
-; RV64IM-NEXT: srli a3, a1, 1
-; RV64IM-NEXT: mulhu a3, a3, a6
-; RV64IM-NEXT: srli a3, a3, 7
+; RV64IM-NEXT: mul a2, a2, a5
+; RV64IM-NEXT: sub a3, a3, a2
+; RV64IM-NEXT: srli a2, a4, 1
+; RV64IM-NEXT: mulhu a2, a2, a6
+; RV64IM-NEXT: srli a2, a2, 7
; RV64IM-NEXT: lui a5, %hi(.LCPI6_2)
; RV64IM-NEXT: ld a5, %lo(.LCPI6_2)(a5)
; RV64IM-NEXT: li a6, 654
-; RV64IM-NEXT: mul a3, a3, a6
-; RV64IM-NEXT: sub a1, a1, a3
-; RV64IM-NEXT: mulhu a3, a4, a5
-; RV64IM-NEXT: srli a3, a3, 12
+; RV64IM-NEXT: mul a2, a2, a6
+; RV64IM-NEXT: sub a4, a4, a2
+; RV64IM-NEXT: mulhu a2, a1, a5
+; RV64IM-NEXT: srli a2, a2, 12
; RV64IM-NEXT: lui a5, 1
; RV64IM-NEXT: addiw a5, a5, 1327
-; RV64IM-NEXT: mul a3, a3, a5
-; RV64IM-NEXT: sub a4, a4, a3
+; RV64IM-NEXT: mul a2, a2, a5
+; RV64IM-NEXT: sub a1, a1, a2
; RV64IM-NEXT: sd zero, 0(a0)
-; RV64IM-NEXT: sd a4, 24(a0)
-; RV64IM-NEXT: sd a1, 8(a0)
-; RV64IM-NEXT: sd a2, 16(a0)
+; RV64IM-NEXT: sd a1, 24(a0)
+; RV64IM-NEXT: sd a4, 8(a0)
+; RV64IM-NEXT: sd a3, 16(a0)
; RV64IM-NEXT: ret
%1 = urem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>
ret <4 x i64> %1
diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll
index 621f54946e4cdf..d2c30c54390702 100644
--- a/llvm/test/CodeGen/RISCV/vararg.ll
+++ b/llvm/test/CodeGen/RISCV/vararg.ll
@@ -822,11 +822,11 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 20
; ILP32-ILP32F-FPELIM-NEXT: sw a0, 12(sp)
; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 27
-; ILP32-ILP32F-FPELIM-NEXT: andi a0, a0, -8
-; ILP32-ILP32F-FPELIM-NEXT: addi a1, sp, 35
-; ILP32-ILP32F-FPELIM-NEXT: sw a1, 12(sp)
-; ILP32-ILP32F-FPELIM-NEXT: lw a1, 4(a0)
-; ILP32-ILP32F-FPELIM-NEXT: lw a0, 0(a0)
+; ILP32-ILP32F-FPELIM-NEXT: andi a1, a0, -8
+; ILP32-ILP32F-FPELIM-NEXT: addi a0, sp, 35
+; ILP32-ILP32F-FPELIM-NEXT: sw a0, 12(sp)
+; ILP32-ILP32F-FPELIM-NEXT: lw a0, 0(a1)
+; ILP32-ILP32F-FPELIM-NEXT: lw a1, 4(a1)
; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 48
; ILP32-ILP32F-FPELIM-NEXT: ret
;
@@ -846,11 +846,11 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 4
; ILP32-ILP32F-WITHFP-NEXT: sw a0, -12(s0)
; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 11
-; ILP32-ILP32F-WITHFP-NEXT: andi a0, a0, -8
-; ILP32-ILP32F-WITHFP-NEXT: addi a1, s0, 19
-; ILP32-ILP32F-WITHFP-NEXT: sw a1, -12(s0)
-; ILP32-ILP32F-WITHFP-NEXT: lw a1, 4(a0)
-; ILP32-ILP32F-WITHFP-NEXT: lw a0, 0(a0)
+; ILP32-ILP32F-WITHFP-NEXT: andi a1, a0, -8
+; ILP32-ILP32F-WITHFP-NEXT: addi a0, s0, 19
+; ILP32-ILP32F-WITHFP-NEXT: sw a0, -12(s0)
+; ILP32-ILP32F-WITHFP-NEXT: lw a0, 0(a1)
+; ILP32-ILP32F-WITHFP-NEXT: lw a1, 4(a1)
; ILP32-ILP32F-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; ILP32-ILP32F-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; ILP32-ILP32F-WITHFP-NEXT: addi sp, sp, 48
@@ -869,11 +869,11 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 20
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 12(sp)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 27
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: andi a0, a0, -8
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, sp, 35
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a1, 12(sp)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a1, 4(a0)
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 0(a0)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: andi a1, a0, -8
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, sp, 35
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 12(sp)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 0(a1)
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a1, 4(a1)
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48
; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret
;
@@ -888,11 +888,11 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; ILP32E-FPELIM-NEXT: addi a0, sp, 8
; ILP32E-FPELIM-NEXT: sw a0, 0(sp)
; ILP32E-FPELIM-NEXT: addi a0, sp, 15
-; ILP32E-FPELIM-NEXT: andi a0, a0, -8
-; ILP32E-FPELIM-NEXT: addi a1, sp, 23
-; ILP32E-FPELIM-NEXT: sw a1, 0(sp)
-; ILP32E-FPELIM-NEXT: lw a1, 4(a0)
-; ILP32E-FPELIM-NEXT: lw a0, 0(a0)
+; ILP32E-FPELIM-NEXT: andi a1, a0, -8
+; ILP32E-FPELIM-NEXT: addi a0, sp, 23
+; ILP32E-FPELIM-NEXT: sw a0, 0(sp)
+; ILP32E-FPELIM-NEXT: lw a0, 0(a1)
+; ILP32E-FPELIM-NEXT: lw a1, 4(a1)
; ILP32E-FPELIM-NEXT: addi sp, sp, 28
; ILP32E-FPELIM-NEXT: ret
;
@@ -910,11 +910,11 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; ILP32E-WITHFP-NEXT: addi a0, s0, 4
; ILP32E-WITHFP-NEXT: sw a0, -12(s0)
; ILP32E-WITHFP-NEXT: addi a0, s0, 11
-; ILP32E-WITHFP-NEXT: andi a0, a0, -8
-; ILP32E-WITHFP-NEXT: addi a1, s0, 19
-; ILP32E-WITHFP-NEXT: sw a1, -12(s0)
-; ILP32E-WITHFP-NEXT: lw a1, 4(a0)
-; ILP32E-WITHFP-NEXT: lw a0, 0(a0)
+; ILP32E-WITHFP-NEXT: andi a1, a0, -8
+; ILP32E-WITHFP-NEXT: addi a0, s0, 19
+; ILP32E-WITHFP-NEXT: sw a0, -12(s0)
+; ILP32E-WITHFP-NEXT: lw a0, 0(a1)
+; ILP32E-WITHFP-NEXT: lw a1, 4(a1)
; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload
; ILP32E-WITHFP-NEXT: lw s0, 4(sp) # 4-byte Folded Reload
; ILP32E-WITHFP-NEXT: addi sp, sp, 36
diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll
index 29fe0a7de6b3d4..3e14317a004745 100644
--- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll
+++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll
@@ -8,8 +8,8 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: lbu a3, 1(a0)
; RV64I-NEXT: lbu a4, 0(a0)
; RV64I-NEXT: lbu a5, 2(a0)
-; RV64I-NEXT: slli a3, a3, 8
; RV64I-NEXT: lb a0, 3(a0)
+; RV64I-NEXT: slli a3, a3, 8
; RV64I-NEXT: or a3, a3, a4
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: lbu a1, 0(a1)
@@ -38,17 +38,17 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: or a0, a0, a5
-; RV32I-NEXT: or a0, a0, a3
-; RV32I-NEXT: lbu a3, 1(a1)
; RV32I-NEXT: lbu a4, 0(a1)
-; RV32I-NEXT: lbu a5, 2(a1)
+; RV32I-NEXT: lbu a5, 1(a1)
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: lbu a3, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a3, a3, a4
-; RV32I-NEXT: slli a5, a5, 16
+; RV32I-NEXT: slli a5, a5, 8
+; RV32I-NEXT: or a4, a5, a4
+; RV32I-NEXT: slli a3, a3, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, a5
; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: srl a0, a0, a1
; RV32I-NEXT: sb a0, 0(a2)
@@ -72,8 +72,8 @@ define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: lbu a3, 1(a0)
; RV64I-NEXT: lbu a4, 0(a0)
; RV64I-NEXT: lbu a5, 2(a0)
-; RV64I-NEXT: slli a3, a3, 8
; RV64I-NEXT: lb a0, 3(a0)
+; RV64I-NEXT: slli a3, a3, 8
; RV64I-NEXT: or a3, a3, a4
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: lbu a1, 0(a1)
@@ -102,17 +102,17 @@ define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: or a0, a0, a5
-; RV32I-NEXT: or a0, a0, a3
-; RV32I-NEXT: lbu a3, 1(a1)
; RV32I-NEXT: lbu a4, 0(a1)
-; RV32I-NEXT: lbu a5, 2(a1)
+; RV32I-NEXT: lbu a5, 1(a1)
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: lbu a3, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a3, a3, a4
-; RV32I-NEXT: slli a5, a5, 16
+; RV32I-NEXT: slli a5, a5, 8
+; RV32I-NEXT: or a4, a5, a4
+; RV32I-NEXT: slli a3, a3, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, a5
; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sb a0, 0(a2)
@@ -136,8 +136,8 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: lbu a3, 1(a0)
; RV64I-NEXT: lbu a4, 0(a0)
; RV64I-NEXT: lbu a5, 2(a0)
-; RV64I-NEXT: slli a3, a3, 8
; RV64I-NEXT: lb a0, 3(a0)
+; RV64I-NEXT: slli a3, a3, 8
; RV64I-NEXT: or a3, a3, a4
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: lbu a1, 0(a1)
@@ -166,17 +166,17 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: or a0, a0, a5
-; RV32I-NEXT: or a0, a0, a3
-; RV32I-NEXT: lbu a3, 1(a1)
; RV32I-NEXT: lbu a4, 0(a1)
-; RV32I-NEXT: lbu a5, 2(a1)
+; RV32I-NEXT: lbu a5, 1(a1)
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: lbu a3, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a3, a3, a4
-; RV32I-NEXT: slli a5, a5, 16
+; RV32I-NEXT: slli a5, a5, 8
+; RV32I-NEXT: or a4, a5, a4
+; RV32I-NEXT: slli a3, a3, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, a5
; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: sra a0, a0, a1
; RV32I-NEXT: sb a0, 0(a2)
@@ -207,39 +207,39 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: or a0, a0, a4
+; RV64I-NEXT: or a0, a0, a5
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: or a0, a0, a3
-; RV64I-NEXT: lbu a3, 5(a1)
; RV64I-NEXT: lbu a4, 4(a1)
-; RV64I-NEXT: lbu a5, 6(a1)
+; RV64I-NEXT: lbu a5, 5(a1)
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: lbu a3, 6(a1)
; RV64I-NEXT: lbu a6, 7(a1)
-; RV64I-NEXT: slli a3, a3, 8
-; RV64I-NEXT: or a3, a3, a4
-; RV64I-NEXT: slli a5, a5, 16
+; RV64I-NEXT: slli a5, a5, 8
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: slli a3, a3, 16
; RV64I-NEXT: slli a6, a6, 24
-; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 1(a1)
+; RV64I-NEXT: or a3, a6, a3
; RV64I-NEXT: lbu a5, 0(a1)
-; RV64I-NEXT: lbu a6, 2(a1)
+; RV64I-NEXT: lbu a6, 1(a1)
+; RV64I-NEXT: or a3, a3, a4
+; RV64I-NEXT: lbu a4, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: or a1, a1, a4
+; RV64I-NEXT: or a1, a1, a5
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: slli a3, a3, 35
; RV64I-NEXT: or a1, a3, a1
@@ -272,17 +272,17 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 1(a1)
; RV32I-NEXT: lbu a5, 0(a1)
-; RV32I-NEXT: lbu a6, 2(a1)
+; RV32I-NEXT: lbu a6, 1(a1)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, a6
; RV32I-NEXT: or a1, a1, a4
+; RV32I-NEXT: or a1, a1, a5
; RV32I-NEXT: slli a5, a1, 3
; RV32I-NEXT: addi a4, a5, -32
; RV32I-NEXT: srl a1, a3, a5
@@ -343,39 +343,39 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: or a0, a0, a4
+; RV64I-NEXT: or a0, a0, a5
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: or a0, a0, a3
-; RV64I-NEXT: lbu a3, 5(a1)
; RV64I-NEXT: lbu a4, 4(a1)
-; RV64I-NEXT: lbu a5, 6(a1)
+; RV64I-NEXT: lbu a5, 5(a1)
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: lbu a3, 6(a1)
; RV64I-NEXT: lbu a6, 7(a1)
-; RV64I-NEXT: slli a3, a3, 8
-; RV64I-NEXT: or a3, a3, a4
-; RV64I-NEXT: slli a5, a5, 16
+; RV64I-NEXT: slli a5, a5, 8
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: slli a3, a3, 16
; RV64I-NEXT: slli a6, a6, 24
-; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 1(a1)
+; RV64I-NEXT: or a3, a6, a3
; RV64I-NEXT: lbu a5, 0(a1)
-; RV64I-NEXT: lbu a6, 2(a1)
+; RV64I-NEXT: lbu a6, 1(a1)
+; RV64I-NEXT: or a3, a3, a4
+; RV64I-NEXT: lbu a4, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: or a1, a1, a4
+; RV64I-NEXT: or a1, a1, a5
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: slli a3, a3, 35
; RV64I-NEXT: or a1, a3, a1
@@ -408,17 +408,17 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 1(a1)
; RV32I-NEXT: lbu a5, 0(a1)
-; RV32I-NEXT: lbu a6, 2(a1)
+; RV32I-NEXT: lbu a6, 1(a1)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, a6
; RV32I-NEXT: or a1, a1, a4
+; RV32I-NEXT: or a1, a1, a5
; RV32I-NEXT: slli a5, a1, 3
; RV32I-NEXT: addi a4, a5, -32
; RV32I-NEXT: sll a1, a3, a5
@@ -479,39 +479,39 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: or a0, a0, a4
+; RV64I-NEXT: or a0, a0, a5
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: or a0, a0, a3
-; RV64I-NEXT: lbu a3, 5(a1)
; RV64I-NEXT: lbu a4, 4(a1)
-; RV64I-NEXT: lbu a5, 6(a1)
+; RV64I-NEXT: lbu a5, 5(a1)
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: lbu a3, 6(a1)
; RV64I-NEXT: lbu a6, 7(a1)
-; RV64I-NEXT: slli a3, a3, 8
-; RV64I-NEXT: or a3, a3, a4
-; RV64I-NEXT: slli a5, a5, 16
+; RV64I-NEXT: slli a5, a5, 8
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: slli a3, a3, 16
; RV64I-NEXT: slli a6, a6, 24
-; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 1(a1)
+; RV64I-NEXT: or a3, a6, a3
; RV64I-NEXT: lbu a5, 0(a1)
-; RV64I-NEXT: lbu a6, 2(a1)
+; RV64I-NEXT: lbu a6, 1(a1)
+; RV64I-NEXT: or a3, a3, a4
+; RV64I-NEXT: lbu a4, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: or a1, a1, a4
+; RV64I-NEXT: or a1, a1, a5
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: slli a3, a3, 35
; RV64I-NEXT: or a1, a3, a1
@@ -544,18 +544,18 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a4, a6, 24
; RV32I-NEXT: or a5, a4, a5
-; RV32I-NEXT: or a3, a5, a3
-; RV32I-NEXT: lbu a5, 1(a1)
; RV32I-NEXT: lbu a6, 0(a1)
-; RV32I-NEXT: lbu a7, 2(a1)
+; RV32I-NEXT: lbu a7, 1(a1)
+; RV32I-NEXT: or a3, a5, a3
+; RV32I-NEXT: lbu a5, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, a7
; RV32I-NEXT: or a1, a1, a5
-; RV32I-NEXT: slli a5, a1, 3
+; RV32I-NEXT: or a5, a1, a6
+; RV32I-NEXT: slli a5, a5, 3
; RV32I-NEXT: addi a6, a5, -32
; RV32I-NEXT: sra a1, a3, a5
; RV32I-NEXT: bltz a6, .LBB5_2
@@ -616,39 +616,39 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 13(a0)
; RV64I-NEXT: lbu a5, 12(a0)
-; RV64I-NEXT: lbu a6, 14(a0)
+; RV64I-NEXT: lbu a6, 13(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 14(a0)
; RV64I-NEXT: lbu a7, 15(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a1)
; RV64I-NEXT: lbu a5, 4(a1)
-; RV64I-NEXT: lbu a6, 6(a1)
+; RV64I-NEXT: lbu a6, 5(a1)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a1)
; RV64I-NEXT: lbu a7, 7(a1)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 1(a1)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 0(a1)
-; RV64I-NEXT: lbu a7, 2(a1)
+; RV64I-NEXT: lbu a7, 1(a1)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, a7
; RV64I-NEXT: or a1, a1, a5
+; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: slli a4, a4, 35
; RV64I-NEXT: or a5, a4, a1
@@ -668,17 +668,17 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli t1, t1, 24
; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 5(a0)
; RV64I-NEXT: lbu t0, 4(a0)
-; RV64I-NEXT: lbu t1, 6(a0)
+; RV64I-NEXT: lbu t1, 5(a0)
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: lbu a7, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: srl a0, a0, a5
@@ -733,46 +733,46 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu a0, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a0, a0, t0
; RV32I-NEXT: or a0, a0, a6
-; RV32I-NEXT: lbu a6, 1(a1)
-; RV32I-NEXT: lbu a7, 0(a1)
-; RV32I-NEXT: lbu t0, 2(a1)
+; RV32I-NEXT: lbu a6, 0(a1)
+; RV32I-NEXT: lbu t0, 1(a1)
+; RV32I-NEXT: or a0, a0, a7
+; RV32I-NEXT: lbu a7, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a6, t0, a6
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, t0
+; RV32I-NEXT: or a1, a1, a7
; RV32I-NEXT: or a1, a1, a6
; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw zero, 24(sp)
@@ -785,48 +785,48 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: andi a0, a1, 12
; RV32I-NEXT: mv a3, sp
; RV32I-NEXT: add a0, a3, a0
-; RV32I-NEXT: lw a3, 4(a0)
+; RV32I-NEXT: lw a3, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: slli a1, a1, 3
-; RV32I-NEXT: srl a4, a3, a1
; RV32I-NEXT: lw a5, 8(a0)
-; RV32I-NEXT: andi a6, a1, 24
-; RV32I-NEXT: xori a6, a6, 31
-; RV32I-NEXT: lw a7, 0(a0)
-; RV32I-NEXT: slli t0, a5, 1
-; RV32I-NEXT: sll t0, t0, a6
-; RV32I-NEXT: or t0, a4, t0
-; RV32I-NEXT: srl a7, a7, a1
-; RV32I-NEXT: slli a3, a3, 1
; RV32I-NEXT: lw a0, 12(a0)
-; RV32I-NEXT: sll a3, a3, a6
-; RV32I-NEXT: or a3, a7, a3
+; RV32I-NEXT: srl a6, a4, a1
+; RV32I-NEXT: andi a7, a1, 24
+; RV32I-NEXT: xori a7, a7, 31
+; RV32I-NEXT: slli t0, a5, 1
+; RV32I-NEXT: sll t0, t0, a7
+; RV32I-NEXT: or t0, a6, t0
+; RV32I-NEXT: srl a3, a3, a1
+; RV32I-NEXT: slli a4, a4, 1
+; RV32I-NEXT: sll a4, a4, a7
+; RV32I-NEXT: or a4, a3, a4
; RV32I-NEXT: srl a5, a5, a1
; RV32I-NEXT: slli t1, a0, 1
-; RV32I-NEXT: sll a6, t1, a6
-; RV32I-NEXT: or a6, a5, a6
+; RV32I-NEXT: sll a7, t1, a7
+; RV32I-NEXT: or a7, a5, a7
; RV32I-NEXT: srl a0, a0, a1
; RV32I-NEXT: sb a5, 8(a2)
; RV32I-NEXT: sb a0, 12(a2)
-; RV32I-NEXT: sb a7, 0(a2)
-; RV32I-NEXT: sb a4, 4(a2)
+; RV32I-NEXT: sb a3, 0(a2)
+; RV32I-NEXT: sb a6, 4(a2)
; RV32I-NEXT: srli a1, a0, 16
; RV32I-NEXT: sb a1, 14(a2)
; RV32I-NEXT: srli a1, a0, 24
; RV32I-NEXT: sb a1, 15(a2)
; RV32I-NEXT: srli a0, a0, 8
; RV32I-NEXT: sb a0, 13(a2)
-; RV32I-NEXT: srli a0, a6, 16
+; RV32I-NEXT: srli a0, a7, 16
; RV32I-NEXT: sb a0, 10(a2)
-; RV32I-NEXT: srli a0, a6, 24
+; RV32I-NEXT: srli a0, a7, 24
; RV32I-NEXT: sb a0, 11(a2)
-; RV32I-NEXT: srli a0, a6, 8
+; RV32I-NEXT: srli a0, a7, 8
; RV32I-NEXT: sb a0, 9(a2)
-; RV32I-NEXT: srli a0, a3, 16
+; RV32I-NEXT: srli a0, a4, 16
; RV32I-NEXT: sb a0, 2(a2)
-; RV32I-NEXT: srli a0, a3, 24
+; RV32I-NEXT: srli a0, a4, 24
; RV32I-NEXT: sb a0, 3(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 1(a2)
+; RV32I-NEXT: srli a4, a4, 8
+; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: srli a0, t0, 16
; RV32I-NEXT: sb a0, 6(a2)
; RV32I-NEXT: srli a0, t0, 24
@@ -855,39 +855,39 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 13(a0)
; RV64I-NEXT: lbu a5, 12(a0)
-; RV64I-NEXT: lbu a6, 14(a0)
+; RV64I-NEXT: lbu a6, 13(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 14(a0)
; RV64I-NEXT: lbu a7, 15(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a1)
; RV64I-NEXT: lbu a5, 4(a1)
-; RV64I-NEXT: lbu a6, 6(a1)
+; RV64I-NEXT: lbu a6, 5(a1)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a1)
; RV64I-NEXT: lbu a7, 7(a1)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 1(a1)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 0(a1)
-; RV64I-NEXT: lbu a7, 2(a1)
+; RV64I-NEXT: lbu a7, 1(a1)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, a7
; RV64I-NEXT: or a1, a1, a5
+; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: slli a1, a1, 5
; RV64I-NEXT: slli a4, a4, 37
; RV64I-NEXT: or a5, a4, a1
@@ -907,17 +907,17 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli t1, t1, 24
; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 5(a0)
; RV64I-NEXT: lbu t0, 4(a0)
-; RV64I-NEXT: lbu t1, 6(a0)
+; RV64I-NEXT: lbu t1, 5(a0)
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: lbu a7, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: srl a0, a0, a5
@@ -972,37 +972,37 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu a0, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a0, a0, t0
; RV32I-NEXT: or a0, a0, a6
+; RV32I-NEXT: or a0, a0, a7
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw zero, 24(sp)
@@ -1070,39 +1070,39 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a7, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a1)
; RV64I-NEXT: lbu a5, 4(a1)
-; RV64I-NEXT: lbu a6, 6(a1)
+; RV64I-NEXT: lbu a6, 5(a1)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a1)
; RV64I-NEXT: lbu a7, 7(a1)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 1(a1)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 0(a1)
-; RV64I-NEXT: lbu a7, 2(a1)
+; RV64I-NEXT: lbu a7, 1(a1)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, a7
; RV64I-NEXT: or a1, a1, a5
+; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: slli a4, a4, 35
; RV64I-NEXT: or a5, a4, a1
@@ -1122,17 +1122,17 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli t1, t1, 24
; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 13(a0)
; RV64I-NEXT: lbu t0, 12(a0)
-; RV64I-NEXT: lbu t1, 14(a0)
+; RV64I-NEXT: lbu t1, 13(a0)
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: lbu a7, 14(a0)
; RV64I-NEXT: lbu a0, 15(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: sll a0, a0, a5
@@ -1187,46 +1187,46 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu a0, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a0, a0, t0
; RV32I-NEXT: or a0, a0, a6
-; RV32I-NEXT: lbu a6, 1(a1)
-; RV32I-NEXT: lbu a7, 0(a1)
-; RV32I-NEXT: lbu t0, 2(a1)
+; RV32I-NEXT: lbu a6, 0(a1)
+; RV32I-NEXT: lbu t0, 1(a1)
+; RV32I-NEXT: or a0, a0, a7
+; RV32I-NEXT: lbu a7, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a6, t0, a6
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, t0
+; RV32I-NEXT: or a1, a1, a7
; RV32I-NEXT: or a1, a1, a6
; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: sw zero, 8(sp)
@@ -1239,53 +1239,53 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: andi a0, a1, 12
; RV32I-NEXT: addi a3, sp, 16
; RV32I-NEXT: sub a3, a3, a0
-; RV32I-NEXT: lw a0, 4(a3)
+; RV32I-NEXT: lw a0, 0(a3)
+; RV32I-NEXT: lw a4, 4(a3)
; RV32I-NEXT: slli a1, a1, 3
-; RV32I-NEXT: lw a4, 0(a3)
-; RV32I-NEXT: sll a5, a0, a1
-; RV32I-NEXT: andi a6, a1, 24
-; RV32I-NEXT: xori a6, a6, 31
-; RV32I-NEXT: srli a7, a4, 1
-; RV32I-NEXT: lw t0, 12(a3)
-; RV32I-NEXT: lw a3, 8(a3)
-; RV32I-NEXT: srl a7, a7, a6
-; RV32I-NEXT: or a7, a5, a7
-; RV32I-NEXT: sll t0, t0, a1
-; RV32I-NEXT: srli t1, a3, 1
-; RV32I-NEXT: srl t1, t1, a6
-; RV32I-NEXT: or t1, t0, t1
+; RV32I-NEXT: lw a5, 8(a3)
+; RV32I-NEXT: lw a3, 12(a3)
+; RV32I-NEXT: sll a6, a4, a1
+; RV32I-NEXT: andi a7, a1, 24
+; RV32I-NEXT: xori a7, a7, 31
+; RV32I-NEXT: srli t0, a0, 1
+; RV32I-NEXT: srl t0, t0, a7
+; RV32I-NEXT: or t0, a6, t0
; RV32I-NEXT: sll a3, a3, a1
-; RV32I-NEXT: srli a0, a0, 1
-; RV32I-NEXT: srl a0, a0, a6
-; RV32I-NEXT: or a0, a3, a0
-; RV32I-NEXT: sll a1, a4, a1
-; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: srli t1, a5, 1
+; RV32I-NEXT: srl t1, t1, a7
+; RV32I-NEXT: or t1, a3, t1
+; RV32I-NEXT: sll a5, a5, a1
+; RV32I-NEXT: srli a4, a4, 1
+; RV32I-NEXT: srl a4, a4, a7
+; RV32I-NEXT: or a4, a5, a4
+; RV32I-NEXT: sll a0, a0, a1
+; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: srli a5, a5, 24
+; RV32I-NEXT: sb a5, 11(a2)
; RV32I-NEXT: srli a3, a3, 24
-; RV32I-NEXT: sb a3, 11(a2)
-; RV32I-NEXT: srli a3, t0, 24
; RV32I-NEXT: sb a3, 15(a2)
-; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 2(a2)
-; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 3(a2)
-; RV32I-NEXT: srli a1, a1, 8
-; RV32I-NEXT: sb a1, 1(a2)
-; RV32I-NEXT: srli a5, a5, 24
-; RV32I-NEXT: sb a5, 7(a2)
-; RV32I-NEXT: sb a0, 8(a2)
-; RV32I-NEXT: sb t1, 12(a2)
-; RV32I-NEXT: sb a7, 4(a2)
; RV32I-NEXT: srli a1, a0, 16
-; RV32I-NEXT: sb a1, 10(a2)
+; RV32I-NEXT: sb a1, 2(a2)
+; RV32I-NEXT: srli a1, a0, 24
+; RV32I-NEXT: sb a1, 3(a2)
; RV32I-NEXT: srli a0, a0, 8
-; RV32I-NEXT: sb a0, 9(a2)
+; RV32I-NEXT: sb a0, 1(a2)
+; RV32I-NEXT: srli a0, a6, 24
+; RV32I-NEXT: sb a0, 7(a2)
+; RV32I-NEXT: sb a4, 8(a2)
+; RV32I-NEXT: sb t1, 12(a2)
+; RV32I-NEXT: sb t0, 4(a2)
+; RV32I-NEXT: srli a0, a4, 16
+; RV32I-NEXT: sb a0, 10(a2)
+; RV32I-NEXT: srli a4, a4, 8
+; RV32I-NEXT: sb a4, 9(a2)
; RV32I-NEXT: srli a0, t1, 16
; RV32I-NEXT: sb a0, 14(a2)
; RV32I-NEXT: srli a0, t1, 8
; RV32I-NEXT: sb a0, 13(a2)
-; RV32I-NEXT: srli a0, a7, 16
+; RV32I-NEXT: srli a0, t0, 16
; RV32I-NEXT: sb a0, 6(a2)
-; RV32I-NEXT: srli a0, a7, 8
+; RV32I-NEXT: srli a0, t0, 8
; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
@@ -1309,39 +1309,39 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a7, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a1)
; RV64I-NEXT: lbu a5, 4(a1)
-; RV64I-NEXT: lbu a6, 6(a1)
+; RV64I-NEXT: lbu a6, 5(a1)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a1)
; RV64I-NEXT: lbu a7, 7(a1)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 1(a1)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 0(a1)
-; RV64I-NEXT: lbu a7, 2(a1)
+; RV64I-NEXT: lbu a7, 1(a1)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, a7
; RV64I-NEXT: or a1, a1, a5
+; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: slli a1, a1, 5
; RV64I-NEXT: slli a4, a4, 37
; RV64I-NEXT: or a5, a4, a1
@@ -1361,17 +1361,17 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli t1, t1, 24
; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 13(a0)
; RV64I-NEXT: lbu t0, 12(a0)
-; RV64I-NEXT: lbu t1, 14(a0)
+; RV64I-NEXT: lbu t1, 13(a0)
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: lbu a7, 14(a0)
; RV64I-NEXT: lbu a0, 15(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: sll a0, a0, a5
@@ -1426,37 +1426,37 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu a0, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a0, a0, t0
; RV32I-NEXT: or a0, a0, a6
+; RV32I-NEXT: or a0, a0, a7
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: sw zero, 8(sp)
@@ -1525,39 +1525,39 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 13(a0)
; RV64I-NEXT: lbu a5, 12(a0)
-; RV64I-NEXT: lbu a6, 14(a0)
+; RV64I-NEXT: lbu a6, 13(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 14(a0)
; RV64I-NEXT: lbu a7, 15(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a5, a4, 32
-; RV64I-NEXT: or a3, a5, a3
-; RV64I-NEXT: lbu a5, 5(a1)
; RV64I-NEXT: lbu a6, 4(a1)
-; RV64I-NEXT: lbu a7, 6(a1)
+; RV64I-NEXT: lbu a7, 5(a1)
+; RV64I-NEXT: or a3, a5, a3
+; RV64I-NEXT: lbu a5, 6(a1)
; RV64I-NEXT: lbu t0, 7(a1)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 1(a1)
+; RV64I-NEXT: or a5, t0, a5
; RV64I-NEXT: lbu a7, 0(a1)
-; RV64I-NEXT: lbu t0, 2(a1)
+; RV64I-NEXT: lbu t0, 1(a1)
+; RV64I-NEXT: or a5, a5, a6
+; RV64I-NEXT: lbu a6, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: or a1, a1, a6
+; RV64I-NEXT: or a1, a1, a7
; RV64I-NEXT: slli a1, a1, 3
; RV64I-NEXT: slli a5, a5, 35
; RV64I-NEXT: or a5, a5, a1
@@ -1579,17 +1579,17 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli t0, t0, 24
; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a4, a6, a4
-; RV64I-NEXT: lbu a6, 5(a0)
; RV64I-NEXT: lbu a7, 4(a0)
-; RV64I-NEXT: lbu t0, 6(a0)
+; RV64I-NEXT: lbu t0, 5(a0)
+; RV64I-NEXT: or a4, a6, a4
+; RV64I-NEXT: lbu a6, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: or a0, a0, a6
+; RV64I-NEXT: or a0, a0, a7
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a4
; RV64I-NEXT: srl a0, a0, a5
@@ -1642,47 +1642,47 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu a0, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a7, a0, t0
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: lbu a7, 1(a1)
+; RV32I-NEXT: or a6, a0, a6
; RV32I-NEXT: lbu t0, 0(a1)
-; RV32I-NEXT: lbu t1, 2(a1)
+; RV32I-NEXT: lbu t1, 1(a1)
+; RV32I-NEXT: or a6, a6, a7
+; RV32I-NEXT: lbu a7, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, t1
; RV32I-NEXT: or a1, a1, a7
+; RV32I-NEXT: or a1, a1, t0
; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: sw a0, 24(sp)
@@ -1695,48 +1695,48 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: andi a0, a1, 12
; RV32I-NEXT: mv a3, sp
; RV32I-NEXT: add a0, a3, a0
-; RV32I-NEXT: lw a3, 4(a0)
+; RV32I-NEXT: lw a3, 0(a0)
+; RV32I-NEXT: lw a4, 4(a0)
; RV32I-NEXT: slli a1, a1, 3
-; RV32I-NEXT: srl a4, a3, a1
; RV32I-NEXT: lw a5, 8(a0)
-; RV32I-NEXT: andi a6, a1, 24
-; RV32I-NEXT: xori a6, a6, 31
-; RV32I-NEXT: lw a7, 0(a0)
-; RV32I-NEXT: slli t0, a5, 1
-; RV32I-NEXT: sll t0, t0, a6
-; RV32I-NEXT: or t0, a4, t0
-; RV32I-NEXT: srl a7, a7, a1
-; RV32I-NEXT: slli a3, a3, 1
; RV32I-NEXT: lw a0, 12(a0)
-; RV32I-NEXT: sll a3, a3, a6
-; RV32I-NEXT: or a3, a7, a3
+; RV32I-NEXT: srl a6, a4, a1
+; RV32I-NEXT: andi a7, a1, 24
+; RV32I-NEXT: xori a7, a7, 31
+; RV32I-NEXT: slli t0, a5, 1
+; RV32I-NEXT: sll t0, t0, a7
+; RV32I-NEXT: or t0, a6, t0
+; RV32I-NEXT: srl a3, a3, a1
+; RV32I-NEXT: slli a4, a4, 1
+; RV32I-NEXT: sll a4, a4, a7
+; RV32I-NEXT: or a4, a3, a4
; RV32I-NEXT: srl a5, a5, a1
; RV32I-NEXT: slli t1, a0, 1
-; RV32I-NEXT: sll a6, t1, a6
-; RV32I-NEXT: or a6, a5, a6
+; RV32I-NEXT: sll a7, t1, a7
+; RV32I-NEXT: or a7, a5, a7
; RV32I-NEXT: sra a0, a0, a1
; RV32I-NEXT: sb a5, 8(a2)
; RV32I-NEXT: sb a0, 12(a2)
-; RV32I-NEXT: sb a7, 0(a2)
-; RV32I-NEXT: sb a4, 4(a2)
+; RV32I-NEXT: sb a3, 0(a2)
+; RV32I-NEXT: sb a6, 4(a2)
; RV32I-NEXT: srli a1, a0, 16
; RV32I-NEXT: sb a1, 14(a2)
; RV32I-NEXT: srli a1, a0, 24
; RV32I-NEXT: sb a1, 15(a2)
; RV32I-NEXT: srli a0, a0, 8
; RV32I-NEXT: sb a0, 13(a2)
-; RV32I-NEXT: srli a0, a6, 16
+; RV32I-NEXT: srli a0, a7, 16
; RV32I-NEXT: sb a0, 10(a2)
-; RV32I-NEXT: srli a0, a6, 24
+; RV32I-NEXT: srli a0, a7, 24
; RV32I-NEXT: sb a0, 11(a2)
-; RV32I-NEXT: srli a0, a6, 8
+; RV32I-NEXT: srli a0, a7, 8
; RV32I-NEXT: sb a0, 9(a2)
-; RV32I-NEXT: srli a0, a3, 16
+; RV32I-NEXT: srli a0, a4, 16
; RV32I-NEXT: sb a0, 2(a2)
-; RV32I-NEXT: srli a0, a3, 24
+; RV32I-NEXT: srli a0, a4, 24
; RV32I-NEXT: sb a0, 3(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 1(a2)
+; RV32I-NEXT: srli a4, a4, 8
+; RV32I-NEXT: sb a4, 1(a2)
; RV32I-NEXT: srli a0, t0, 16
; RV32I-NEXT: sb a0, 6(a2)
; RV32I-NEXT: srli a0, t0, 24
@@ -1765,39 +1765,39 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 13(a0)
; RV64I-NEXT: lbu a5, 12(a0)
-; RV64I-NEXT: lbu a6, 14(a0)
+; RV64I-NEXT: lbu a6, 13(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 14(a0)
; RV64I-NEXT: lbu a7, 15(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a5, a4, 32
-; RV64I-NEXT: or a3, a5, a3
-; RV64I-NEXT: lbu a5, 5(a1)
; RV64I-NEXT: lbu a6, 4(a1)
-; RV64I-NEXT: lbu a7, 6(a1)
+; RV64I-NEXT: lbu a7, 5(a1)
+; RV64I-NEXT: or a3, a5, a3
+; RV64I-NEXT: lbu a5, 6(a1)
; RV64I-NEXT: lbu t0, 7(a1)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 1(a1)
+; RV64I-NEXT: or a5, t0, a5
; RV64I-NEXT: lbu a7, 0(a1)
-; RV64I-NEXT: lbu t0, 2(a1)
+; RV64I-NEXT: lbu t0, 1(a1)
+; RV64I-NEXT: or a5, a5, a6
+; RV64I-NEXT: lbu a6, 2(a1)
; RV64I-NEXT: lbu a1, 3(a1)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: or a1, a1, a6
+; RV64I-NEXT: or a1, a1, a7
; RV64I-NEXT: slli a1, a1, 5
; RV64I-NEXT: slli a5, a5, 37
; RV64I-NEXT: or a5, a5, a1
@@ -1819,17 +1819,17 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli t0, t0, 24
; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a4, a6, a4
-; RV64I-NEXT: lbu a6, 5(a0)
; RV64I-NEXT: lbu a7, 4(a0)
-; RV64I-NEXT: lbu t0, 6(a0)
+; RV64I-NEXT: lbu t0, 5(a0)
+; RV64I-NEXT: or a4, a6, a4
+; RV64I-NEXT: lbu a6, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: or a0, a0, a6
+; RV64I-NEXT: or a0, a0, a7
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a4
; RV64I-NEXT: srl a0, a0, a5
@@ -1882,37 +1882,37 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu a0, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a7, a0, t0
-; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: or a6, a0, a6
+; RV32I-NEXT: or a6, a6, a7
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: sw a0, 28(sp)
@@ -1982,105 +1982,105 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a7, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 9(a0)
; RV64I-NEXT: lbu a5, 8(a0)
-; RV64I-NEXT: lbu a6, 10(a0)
+; RV64I-NEXT: lbu a6, 9(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 10(a0)
; RV64I-NEXT: lbu a7, 11(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 13(a0)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 12(a0)
-; RV64I-NEXT: lbu a7, 14(a0)
+; RV64I-NEXT: lbu a7, 13(a0)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 14(a0)
; RV64I-NEXT: lbu t0, 15(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: or a5, t0, a5
+; RV64I-NEXT: or a5, a5, a6
; RV64I-NEXT: slli a5, a5, 32
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 17(a0)
; RV64I-NEXT: lbu a6, 16(a0)
-; RV64I-NEXT: lbu a7, 18(a0)
+; RV64I-NEXT: lbu a7, 17(a0)
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: lbu a5, 18(a0)
; RV64I-NEXT: lbu t0, 19(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 21(a0)
+; RV64I-NEXT: or a5, t0, a5
; RV64I-NEXT: lbu a7, 20(a0)
-; RV64I-NEXT: lbu t0, 22(a0)
+; RV64I-NEXT: lbu t0, 21(a0)
+; RV64I-NEXT: or a5, a5, a6
+; RV64I-NEXT: lbu a6, 22(a0)
; RV64I-NEXT: lbu t1, 23(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: or a6, t1, a6
+; RV64I-NEXT: or a6, a6, a7
; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 25(a0)
; RV64I-NEXT: lbu a7, 24(a0)
-; RV64I-NEXT: lbu t0, 26(a0)
+; RV64I-NEXT: lbu t0, 25(a0)
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: lbu a6, 26(a0)
; RV64I-NEXT: lbu t1, 27(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 29(a0)
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 28(a0)
-; RV64I-NEXT: lbu t1, 30(a0)
+; RV64I-NEXT: lbu t1, 29(a0)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 30(a0)
; RV64I-NEXT: lbu a0, 31(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: or a0, a0, a6
-; RV64I-NEXT: lbu a6, 1(a1)
; RV64I-NEXT: lbu a7, 0(a1)
-; RV64I-NEXT: lbu t0, 2(a1)
+; RV64I-NEXT: lbu t0, 1(a1)
+; RV64I-NEXT: or a0, a0, a6
+; RV64I-NEXT: lbu a6, 2(a1)
; RV64I-NEXT: lbu t1, 3(a1)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 5(a1)
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 4(a1)
-; RV64I-NEXT: lbu t1, 6(a1)
+; RV64I-NEXT: lbu t1, 5(a1)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, t1
; RV64I-NEXT: or a1, a1, a7
+; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: sd zero, 56(sp)
@@ -2093,31 +2093,31 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: sd a3, 0(sp)
; RV64I-NEXT: andi a0, a1, 24
; RV64I-NEXT: mv a3, sp
-; RV64I-NEXT: add a3, a3, a0
-; RV64I-NEXT: ld a4, 8(a3)
+; RV64I-NEXT: add a0, a3, a0
+; RV64I-NEXT: ld a3, 0(a0)
+; RV64I-NEXT: ld a4, 8(a0)
; RV64I-NEXT: slli a1, a1, 3
-; RV64I-NEXT: srl a5, a4, a1
-; RV64I-NEXT: ld a6, 16(a3)
+; RV64I-NEXT: ld a5, 16(a0)
+; RV64I-NEXT: ld a6, 24(a0)
+; RV64I-NEXT: srl a7, a4, a1
; RV64I-NEXT: andi a0, a1, 56
-; RV64I-NEXT: xori a7, a0, 63
-; RV64I-NEXT: ld t0, 0(a3)
-; RV64I-NEXT: slli a0, a6, 1
-; RV64I-NEXT: sll a0, a0, a7
-; RV64I-NEXT: or a0, a5, a0
-; RV64I-NEXT: srl t0, t0, a1
+; RV64I-NEXT: xori t0, a0, 63
+; RV64I-NEXT: slli a0, a5, 1
+; RV64I-NEXT: sll a0, a0, t0
+; RV64I-NEXT: or a0, a7, a0
+; RV64I-NEXT: srl a3, a3, a1
; RV64I-NEXT: slli a4, a4, 1
-; RV64I-NEXT: ld a3, 24(a3)
-; RV64I-NEXT: sll a4, a4, a7
-; RV64I-NEXT: or a4, t0, a4
-; RV64I-NEXT: srl a6, a6, a1
-; RV64I-NEXT: slli t1, a3, 1
-; RV64I-NEXT: sll a7, t1, a7
-; RV64I-NEXT: or a7, a6, a7
-; RV64I-NEXT: srl a1, a3, a1
-; RV64I-NEXT: sb a6, 16(a2)
+; RV64I-NEXT: sll a4, a4, t0
+; RV64I-NEXT: or a4, a3, a4
+; RV64I-NEXT: srl a5, a5, a1
+; RV64I-NEXT: slli t1, a6, 1
+; RV64I-NEXT: sll t0, t1, t0
+; RV64I-NEXT: or t0, a5, t0
+; RV64I-NEXT: srl a1, a6, a1
+; RV64I-NEXT: sb a5, 16(a2)
; RV64I-NEXT: sb a1, 24(a2)
-; RV64I-NEXT: sb t0, 0(a2)
-; RV64I-NEXT: sb a5, 8(a2)
+; RV64I-NEXT: sb a3, 0(a2)
+; RV64I-NEXT: sb a7, 8(a2)
; RV64I-NEXT: srli a3, a1, 56
; RV64I-NEXT: sb a3, 31(a2)
; RV64I-NEXT: srli a3, a1, 48
@@ -2132,19 +2132,19 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: sb a3, 26(a2)
; RV64I-NEXT: srli a1, a1, 8
; RV64I-NEXT: sb a1, 25(a2)
-; RV64I-NEXT: srli a1, a7, 56
+; RV64I-NEXT: srli a1, t0, 56
; RV64I-NEXT: sb a1, 23(a2)
-; RV64I-NEXT: srli a1, a7, 48
+; RV64I-NEXT: srli a1, t0, 48
; RV64I-NEXT: sb a1, 22(a2)
-; RV64I-NEXT: srli a1, a7, 40
+; RV64I-NEXT: srli a1, t0, 40
; RV64I-NEXT: sb a1, 21(a2)
-; RV64I-NEXT: srli a1, a7, 32
+; RV64I-NEXT: srli a1, t0, 32
; RV64I-NEXT: sb a1, 20(a2)
-; RV64I-NEXT: srli a1, a7, 24
+; RV64I-NEXT: srli a1, t0, 24
; RV64I-NEXT: sb a1, 19(a2)
-; RV64I-NEXT: srli a1, a7, 16
+; RV64I-NEXT: srli a1, t0, 16
; RV64I-NEXT: sb a1, 18(a2)
-; RV64I-NEXT: srli a1, a7, 8
+; RV64I-NEXT: srli a1, t0, 8
; RV64I-NEXT: sb a1, 17(a2)
; RV64I-NEXT: srli a1, a4, 56
; RV64I-NEXT: sb a1, 7(a2)
@@ -2192,86 +2192,86 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu t1, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
-; RV32I-NEXT: slli t1, t1, 24
-; RV32I-NEXT: or a7, t1, t0
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: lbu a7, 17(a0)
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli t1, t1, 24
+; RV32I-NEXT: or a6, t1, a6
; RV32I-NEXT: lbu t0, 16(a0)
-; RV32I-NEXT: lbu t1, 18(a0)
+; RV32I-NEXT: lbu t1, 17(a0)
+; RV32I-NEXT: or a6, a6, a7
+; RV32I-NEXT: lbu a7, 18(a0)
; RV32I-NEXT: lbu t2, 19(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or a7, t0, a7
-; RV32I-NEXT: lbu t0, 21(a0)
+; RV32I-NEXT: or a7, t2, a7
; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 22(a0)
+; RV32I-NEXT: lbu t2, 21(a0)
+; RV32I-NEXT: or a7, a7, t0
+; RV32I-NEXT: lbu t0, 22(a0)
; RV32I-NEXT: lbu t3, 23(a0)
-; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: or t0, t0, t1
-; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli t2, t2, 8
+; RV32I-NEXT: or t1, t2, t1
+; RV32I-NEXT: slli t0, t0, 16
; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t3, t2
-; RV32I-NEXT: or t0, t1, t0
-; RV32I-NEXT: lbu t1, 25(a0)
+; RV32I-NEXT: or t0, t3, t0
; RV32I-NEXT: lbu t2, 24(a0)
-; RV32I-NEXT: lbu t3, 26(a0)
+; RV32I-NEXT: lbu t3, 25(a0)
+; RV32I-NEXT: or t0, t0, t1
+; RV32I-NEXT: lbu t1, 26(a0)
; RV32I-NEXT: lbu t4, 27(a0)
-; RV32I-NEXT: slli t1, t1, 8
-; RV32I-NEXT: or t1, t1, t2
-; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t3, t3, 8
+; RV32I-NEXT: or t2, t3, t2
+; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: or t1, t2, t1
-; RV32I-NEXT: lbu t2, 29(a0)
+; RV32I-NEXT: or t1, t4, t1
; RV32I-NEXT: lbu t3, 28(a0)
-; RV32I-NEXT: lbu t4, 30(a0)
+; RV32I-NEXT: lbu t4, 29(a0)
+; RV32I-NEXT: or t1, t1, t2
+; RV32I-NEXT: lbu t2, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: or t2, t2, t3
-; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: slli t2, t2, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a0, a0, t4
; RV32I-NEXT: or a0, a0, t2
-; RV32I-NEXT: lbu t2, 1(a1)
-; RV32I-NEXT: lbu t3, 0(a1)
-; RV32I-NEXT: lbu t4, 2(a1)
+; RV32I-NEXT: lbu t2, 0(a1)
+; RV32I-NEXT: lbu t4, 1(a1)
+; RV32I-NEXT: or a0, a0, t3
+; RV32I-NEXT: lbu t3, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: or t2, t2, t3
-; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: or t2, t4, t2
+; RV32I-NEXT: slli t3, t3, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, t4
+; RV32I-NEXT: or a1, a1, t3
; RV32I-NEXT: or a1, a1, t2
; RV32I-NEXT: sw zero, 60(sp)
; RV32I-NEXT: sw zero, 56(sp)
@@ -2291,54 +2291,54 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sw a3, 0(sp)
; RV32I-NEXT: andi a0, a1, 28
; RV32I-NEXT: mv a3, sp
-; RV32I-NEXT: add a5, a3, a0
-; RV32I-NEXT: lw a3, 4(a5)
-; RV32I-NEXT: slli a6, a1, 3
-; RV32I-NEXT: srl a4, a3, a6
-; RV32I-NEXT: lw a7, 8(a5)
-; RV32I-NEXT: andi a0, a6, 24
-; RV32I-NEXT: xori t0, a0, 31
-; RV32I-NEXT: lw a1, 0(a5)
-; RV32I-NEXT: slli a0, a7, 1
-; RV32I-NEXT: sll a0, a0, t0
+; RV32I-NEXT: add a3, a3, a0
+; RV32I-NEXT: lw a6, 0(a3)
+; RV32I-NEXT: lw a7, 4(a3)
+; RV32I-NEXT: slli a5, a1, 3
+; RV32I-NEXT: lw t0, 8(a3)
+; RV32I-NEXT: lw t1, 12(a3)
+; RV32I-NEXT: srl a4, a7, a5
+; RV32I-NEXT: andi a0, a5, 24
+; RV32I-NEXT: xori t2, a0, 31
+; RV32I-NEXT: slli a0, t0, 1
+; RV32I-NEXT: sll a0, a0, t2
; RV32I-NEXT: or a0, a4, a0
-; RV32I-NEXT: srl t1, a1, a6
-; RV32I-NEXT: slli a3, a3, 1
-; RV32I-NEXT: lw t2, 12(a5)
-; RV32I-NEXT: lw t3, 16(a5)
-; RV32I-NEXT: sll a1, a3, t0
-; RV32I-NEXT: or a1, t1, a1
-; RV32I-NEXT: srl t4, t2, a6
+; RV32I-NEXT: srl a6, a6, a5
+; RV32I-NEXT: slli a7, a7, 1
+; RV32I-NEXT: sll a1, a7, t2
+; RV32I-NEXT: or a1, a6, a1
+; RV32I-NEXT: srl a7, t1, a5
+; RV32I-NEXT: lw t3, 16(a3)
+; RV32I-NEXT: lw t4, 20(a3)
+; RV32I-NEXT: lw t5, 24(a3)
+; RV32I-NEXT: lw t6, 28(a3)
; RV32I-NEXT: slli a3, t3, 1
-; RV32I-NEXT: sll a3, a3, t0
-; RV32I-NEXT: or a3, t4, a3
-; RV32I-NEXT: srl a7, a7, a6
-; RV32I-NEXT: slli t2, t2, 1
-; RV32I-NEXT: lw t5, 20(a5)
-; RV32I-NEXT: lw t6, 24(a5)
-; RV32I-NEXT: sll t2, t2, t0
-; RV32I-NEXT: or t2, a7, t2
-; RV32I-NEXT: srl s0, t5, a6
-; RV32I-NEXT: slli s1, t6, 1
-; RV32I-NEXT: sll s1, s1, t0
+; RV32I-NEXT: sll a3, a3, t2
+; RV32I-NEXT: or a3, a7, a3
+; RV32I-NEXT: srl t0, t0, a5
+; RV32I-NEXT: slli t1, t1, 1
+; RV32I-NEXT: sll t1, t1, t2
+; RV32I-NEXT: or t1, t0, t1
+; RV32I-NEXT: srl s0, t4, a5
+; RV32I-NEXT: slli s1, t5, 1
+; RV32I-NEXT: sll s1, s1, t2
; RV32I-NEXT: or s1, s0, s1
-; RV32I-NEXT: srl t3, t3, a6
-; RV32I-NEXT: slli t5, t5, 1
-; RV32I-NEXT: lw a5, 28(a5)
-; RV32I-NEXT: sll t5, t5, t0
-; RV32I-NEXT: or t5, t3, t5
-; RV32I-NEXT: srl t6, t6, a6
-; RV32I-NEXT: slli s2, a5, 1
-; RV32I-NEXT: sll t0, s2, t0
-; RV32I-NEXT: or t0, t6, t0
-; RV32I-NEXT: srl a5, a5, a6
-; RV32I-NEXT: sb t6, 24(a2)
+; RV32I-NEXT: srl t3, t3, a5
+; RV32I-NEXT: slli t4, t4, 1
+; RV32I-NEXT: sll t4, t4, t2
+; RV32I-NEXT: or t4, t3, t4
+; RV32I-NEXT: srl t5, t5, a5
+; RV32I-NEXT: slli s2, t6, 1
+; RV32I-NEXT: sll t2, s2, t2
+; RV32I-NEXT: or t2, t5, t2
+; RV32I-NEXT: srl a5, t6, a5
+; RV32I-NEXT: sb t5, 24(a2)
; RV32I-NEXT: sb a5, 28(a2)
; RV32I-NEXT: sb t3, 16(a2)
; RV32I-NEXT: sb s0, 20(a2)
-; RV32I-NEXT: sb a7, 8(a2)
-; RV32I-NEXT: sb t4, 12(a2)
-; RV32I-NEXT: sb t1, 0(a2)
+; RV32I-NEXT: sb t0, 8(a2)
+; RV32I-NEXT: sb a7, 12(a2)
+; RV32I-NEXT: sb a6, 0(a2)
; RV32I-NEXT: sb a4, 4(a2)
; RV32I-NEXT: srli a4, a5, 24
; RV32I-NEXT: sb a4, 31(a2)
@@ -2346,17 +2346,17 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sb a4, 30(a2)
; RV32I-NEXT: srli a5, a5, 8
; RV32I-NEXT: sb a5, 29(a2)
-; RV32I-NEXT: srli a4, t0, 24
+; RV32I-NEXT: srli a4, t2, 24
; RV32I-NEXT: sb a4, 27(a2)
-; RV32I-NEXT: srli a4, t0, 16
+; RV32I-NEXT: srli a4, t2, 16
; RV32I-NEXT: sb a4, 26(a2)
-; RV32I-NEXT: srli a4, t0, 8
+; RV32I-NEXT: srli a4, t2, 8
; RV32I-NEXT: sb a4, 25(a2)
-; RV32I-NEXT: srli a4, t5, 24
+; RV32I-NEXT: srli a4, t4, 24
; RV32I-NEXT: sb a4, 19(a2)
-; RV32I-NEXT: srli a4, t5, 16
+; RV32I-NEXT: srli a4, t4, 16
; RV32I-NEXT: sb a4, 18(a2)
-; RV32I-NEXT: srli a4, t5, 8
+; RV32I-NEXT: srli a4, t4, 8
; RV32I-NEXT: sb a4, 17(a2)
; RV32I-NEXT: srli a4, s1, 24
; RV32I-NEXT: sb a4, 23(a2)
@@ -2364,11 +2364,11 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sb a4, 22(a2)
; RV32I-NEXT: srli s1, s1, 8
; RV32I-NEXT: sb s1, 21(a2)
-; RV32I-NEXT: srli a4, t2, 24
+; RV32I-NEXT: srli a4, t1, 24
; RV32I-NEXT: sb a4, 11(a2)
-; RV32I-NEXT: srli a4, t2, 16
+; RV32I-NEXT: srli a4, t1, 16
; RV32I-NEXT: sb a4, 10(a2)
-; RV32I-NEXT: srli a4, t2, 8
+; RV32I-NEXT: srli a4, t1, 8
; RV32I-NEXT: sb a4, 9(a2)
; RV32I-NEXT: srli a4, a3, 24
; RV32I-NEXT: sb a4, 15(a2)
@@ -2414,105 +2414,105 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a7, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 9(a0)
; RV64I-NEXT: lbu a5, 8(a0)
-; RV64I-NEXT: lbu a6, 10(a0)
+; RV64I-NEXT: lbu a6, 9(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 10(a0)
; RV64I-NEXT: lbu a7, 11(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 13(a0)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 12(a0)
-; RV64I-NEXT: lbu a7, 14(a0)
+; RV64I-NEXT: lbu a7, 13(a0)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 14(a0)
; RV64I-NEXT: lbu t0, 15(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: or a5, t0, a5
+; RV64I-NEXT: or a5, a5, a6
; RV64I-NEXT: slli a5, a5, 32
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 17(a0)
; RV64I-NEXT: lbu a6, 16(a0)
-; RV64I-NEXT: lbu a7, 18(a0)
+; RV64I-NEXT: lbu a7, 17(a0)
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: lbu a5, 18(a0)
; RV64I-NEXT: lbu t0, 19(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 21(a0)
+; RV64I-NEXT: or a5, t0, a5
; RV64I-NEXT: lbu a7, 20(a0)
-; RV64I-NEXT: lbu t0, 22(a0)
+; RV64I-NEXT: lbu t0, 21(a0)
+; RV64I-NEXT: or a5, a5, a6
+; RV64I-NEXT: lbu a6, 22(a0)
; RV64I-NEXT: lbu t1, 23(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: or a6, t1, a6
+; RV64I-NEXT: or a6, a6, a7
; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 25(a0)
; RV64I-NEXT: lbu a7, 24(a0)
-; RV64I-NEXT: lbu t0, 26(a0)
+; RV64I-NEXT: lbu t0, 25(a0)
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: lbu a6, 26(a0)
; RV64I-NEXT: lbu t1, 27(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 29(a0)
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 28(a0)
-; RV64I-NEXT: lbu t1, 30(a0)
+; RV64I-NEXT: lbu t1, 29(a0)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 30(a0)
; RV64I-NEXT: lbu a0, 31(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: or a0, a0, a6
-; RV64I-NEXT: lbu a6, 1(a1)
; RV64I-NEXT: lbu a7, 0(a1)
-; RV64I-NEXT: lbu t0, 2(a1)
+; RV64I-NEXT: lbu t0, 1(a1)
+; RV64I-NEXT: or a0, a0, a6
+; RV64I-NEXT: lbu a6, 2(a1)
; RV64I-NEXT: lbu t1, 3(a1)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 5(a1)
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 4(a1)
-; RV64I-NEXT: lbu t1, 6(a1)
+; RV64I-NEXT: lbu t1, 5(a1)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, t1
; RV64I-NEXT: or a1, a1, a7
+; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: sd zero, 56(sp)
@@ -2526,70 +2526,70 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: slli a0, a1, 2
; RV64I-NEXT: andi a0, a0, 24
; RV64I-NEXT: mv a3, sp
-; RV64I-NEXT: add a3, a3, a0
-; RV64I-NEXT: ld a4, 8(a3)
-; RV64I-NEXT: slli a5, a1, 5
-; RV64I-NEXT: srl a1, a4, a5
-; RV64I-NEXT: ld a6, 16(a3)
-; RV64I-NEXT: andi a0, a5, 32
-; RV64I-NEXT: xori a7, a0, 63
-; RV64I-NEXT: ld t0, 0(a3)
-; RV64I-NEXT: slli a0, a6, 1
-; RV64I-NEXT: sll a0, a0, a7
-; RV64I-NEXT: or a0, a1, a0
-; RV64I-NEXT: srl t0, t0, a5
+; RV64I-NEXT: add a0, a3, a0
+; RV64I-NEXT: ld a3, 0(a0)
+; RV64I-NEXT: ld a4, 8(a0)
+; RV64I-NEXT: slli a1, a1, 5
+; RV64I-NEXT: ld a5, 16(a0)
+; RV64I-NEXT: ld a6, 24(a0)
+; RV64I-NEXT: srl a7, a4, a1
+; RV64I-NEXT: andi a0, a1, 32
+; RV64I-NEXT: xori t0, a0, 63
+; RV64I-NEXT: slli a0, a5, 1
+; RV64I-NEXT: sll a0, a0, t0
+; RV64I-NEXT: or a0, a7, a0
+; RV64I-NEXT: srl a3, a3, a1
; RV64I-NEXT: slli a4, a4, 1
-; RV64I-NEXT: ld a3, 24(a3)
-; RV64I-NEXT: sll a4, a4, a7
-; RV64I-NEXT: or a4, t0, a4
-; RV64I-NEXT: srl a6, a6, a5
-; RV64I-NEXT: slli t1, a3, 1
-; RV64I-NEXT: sll a7, t1, a7
-; RV64I-NEXT: or a7, a6, a7
-; RV64I-NEXT: srl a3, a3, a5
-; RV64I-NEXT: sb a6, 16(a2)
-; RV64I-NEXT: sb a3, 24(a2)
-; RV64I-NEXT: sb t0, 0(a2)
-; RV64I-NEXT: sb a1, 8(a2)
-; RV64I-NEXT: srli a5, a6, 24
-; RV64I-NEXT: sb a5, 19(a2)
-; RV64I-NEXT: srli a5, a6, 16
-; RV64I-NEXT: sb a5, 18(a2)
-; RV64I-NEXT: srli a5, a6, 8
+; RV64I-NEXT: sll a4, a4, t0
+; RV64I-NEXT: or a4, a3, a4
+; RV64I-NEXT: srl a5, a5, a1
+; RV64I-NEXT: slli t1, a6, 1
+; RV64I-NEXT: sll t0, t1, t0
+; RV64I-NEXT: or t0, a5, t0
+; RV64I-NEXT: srl a1, a6, a1
+; RV64I-NEXT: sb a5, 16(a2)
+; RV64I-NEXT: sb a1, 24(a2)
+; RV64I-NEXT: sb a3, 0(a2)
+; RV64I-NEXT: sb a7, 8(a2)
+; RV64I-NEXT: srli a6, a5, 24
+; RV64I-NEXT: sb a6, 19(a2)
+; RV64I-NEXT: srli a6, a5, 16
+; RV64I-NEXT: sb a6, 18(a2)
+; RV64I-NEXT: srli a5, a5, 8
; RV64I-NEXT: sb a5, 17(a2)
-; RV64I-NEXT: srli a5, a3, 56
+; RV64I-NEXT: srli a5, a1, 56
; RV64I-NEXT: sb a5, 31(a2)
-; RV64I-NEXT: srli a5, a3, 48
+; RV64I-NEXT: srli a5, a1, 48
; RV64I-NEXT: sb a5, 30(a2)
-; RV64I-NEXT: srli a5, a3, 40
+; RV64I-NEXT: srli a5, a1, 40
; RV64I-NEXT: sb a5, 29(a2)
-; RV64I-NEXT: srli a5, a3, 32
+; RV64I-NEXT: srli a5, a1, 32
; RV64I-NEXT: sb a5, 28(a2)
-; RV64I-NEXT: srli a5, a3, 24
+; RV64I-NEXT: srli a5, a1, 24
; RV64I-NEXT: sb a5, 27(a2)
-; RV64I-NEXT: srli a5, a3, 16
+; RV64I-NEXT: srli a5, a1, 16
; RV64I-NEXT: sb a5, 26(a2)
+; RV64I-NEXT: srli a1, a1, 8
+; RV64I-NEXT: sb a1, 25(a2)
+; RV64I-NEXT: srli a1, a3, 24
+; RV64I-NEXT: sb a1, 3(a2)
+; RV64I-NEXT: srli a1, a3, 16
+; RV64I-NEXT: sb a1, 2(a2)
; RV64I-NEXT: srli a3, a3, 8
-; RV64I-NEXT: sb a3, 25(a2)
-; RV64I-NEXT: srli a3, t0, 24
-; RV64I-NEXT: sb a3, 3(a2)
-; RV64I-NEXT: srli a3, t0, 16
-; RV64I-NEXT: sb a3, 2(a2)
-; RV64I-NEXT: srli a3, t0, 8
; RV64I-NEXT: sb a3, 1(a2)
-; RV64I-NEXT: srli a3, a1, 24
-; RV64I-NEXT: sb a3, 11(a2)
-; RV64I-NEXT: srli a3, a1, 16
-; RV64I-NEXT: sb a3, 10(a2)
-; RV64I-NEXT: srli a1, a1, 8
+; RV64I-NEXT: srli a1, a7, 24
+; RV64I-NEXT: sb a1, 11(a2)
+; RV64I-NEXT: srli a1, a7, 16
+; RV64I-NEXT: sb a1, 10(a2)
+; RV64I-NEXT: srli a1, a7, 8
; RV64I-NEXT: sb a1, 9(a2)
-; RV64I-NEXT: srli a1, a7, 56
+; RV64I-NEXT: srli a1, t0, 56
; RV64I-NEXT: sb a1, 23(a2)
-; RV64I-NEXT: srli a1, a7, 48
+; RV64I-NEXT: srli a1, t0, 48
; RV64I-NEXT: sb a1, 22(a2)
-; RV64I-NEXT: srli a1, a7, 40
+; RV64I-NEXT: srli a1, t0, 40
; RV64I-NEXT: sb a1, 21(a2)
-; RV64I-NEXT: srli a1, a7, 32
+; RV64I-NEXT: srli a1, t0, 32
; RV64I-NEXT: sb a1, 20(a2)
; RV64I-NEXT: srli a1, a4, 56
; RV64I-NEXT: sb a1, 7(a2)
@@ -2622,77 +2622,77 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu t1, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli t1, t1, 24
-; RV32I-NEXT: or a7, t1, t0
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: lbu a7, 17(a0)
+; RV32I-NEXT: or a6, t1, a6
; RV32I-NEXT: lbu t0, 16(a0)
-; RV32I-NEXT: lbu t1, 18(a0)
+; RV32I-NEXT: lbu t1, 17(a0)
+; RV32I-NEXT: or a6, a6, a7
+; RV32I-NEXT: lbu a7, 18(a0)
; RV32I-NEXT: lbu t2, 19(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or a7, t0, a7
-; RV32I-NEXT: lbu t0, 21(a0)
+; RV32I-NEXT: or a7, t2, a7
; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 22(a0)
+; RV32I-NEXT: lbu t2, 21(a0)
+; RV32I-NEXT: or a7, a7, t0
+; RV32I-NEXT: lbu t0, 22(a0)
; RV32I-NEXT: lbu t3, 23(a0)
-; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: or t0, t0, t1
-; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli t2, t2, 8
+; RV32I-NEXT: or t1, t2, t1
+; RV32I-NEXT: slli t0, t0, 16
; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t3, t2
-; RV32I-NEXT: or t0, t1, t0
-; RV32I-NEXT: lbu t1, 25(a0)
+; RV32I-NEXT: or t0, t3, t0
; RV32I-NEXT: lbu t2, 24(a0)
-; RV32I-NEXT: lbu t3, 26(a0)
+; RV32I-NEXT: lbu t3, 25(a0)
+; RV32I-NEXT: or t0, t0, t1
+; RV32I-NEXT: lbu t1, 26(a0)
; RV32I-NEXT: lbu t4, 27(a0)
-; RV32I-NEXT: slli t1, t1, 8
-; RV32I-NEXT: or t1, t1, t2
-; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t3, t3, 8
+; RV32I-NEXT: or t2, t3, t2
+; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: or t1, t2, t1
-; RV32I-NEXT: lbu t2, 29(a0)
+; RV32I-NEXT: or t1, t4, t1
; RV32I-NEXT: lbu t3, 28(a0)
-; RV32I-NEXT: lbu t4, 30(a0)
+; RV32I-NEXT: lbu t4, 29(a0)
+; RV32I-NEXT: or t1, t1, t2
+; RV32I-NEXT: lbu t2, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: or t2, t2, t3
-; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: slli t2, t2, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a0, a0, t4
; RV32I-NEXT: or a0, a0, t2
+; RV32I-NEXT: or a0, a0, t3
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: sw zero, 60(sp)
; RV32I-NEXT: sw zero, 56(sp)
@@ -2713,64 +2713,64 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: andi a1, a1, 28
; RV32I-NEXT: mv a0, sp
-; RV32I-NEXT: add a3, a0, a1
-; RV32I-NEXT: lw a0, 4(a3)
-; RV32I-NEXT: lw a1, 0(a3)
-; RV32I-NEXT: lw a4, 12(a3)
-; RV32I-NEXT: lw a5, 8(a3)
-; RV32I-NEXT: lw a6, 24(a3)
-; RV32I-NEXT: lw a7, 28(a3)
-; RV32I-NEXT: lw t0, 16(a3)
-; RV32I-NEXT: lw a3, 20(a3)
-; RV32I-NEXT: sb a6, 24(a2)
-; RV32I-NEXT: sb a7, 28(a2)
-; RV32I-NEXT: sb t0, 16(a2)
-; RV32I-NEXT: sb a3, 20(a2)
-; RV32I-NEXT: sb a5, 8(a2)
-; RV32I-NEXT: sb a4, 12(a2)
-; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: lw a3, 16(a1)
+; RV32I-NEXT: lw a4, 20(a1)
+; RV32I-NEXT: lw a5, 24(a1)
+; RV32I-NEXT: lw a6, 28(a1)
+; RV32I-NEXT: lw a7, 0(a1)
+; RV32I-NEXT: lw a0, 4(a1)
+; RV32I-NEXT: lw t0, 8(a1)
+; RV32I-NEXT: lw a1, 12(a1)
+; RV32I-NEXT: sb a5, 24(a2)
+; RV32I-NEXT: sb a6, 28(a2)
+; RV32I-NEXT: sb a3, 16(a2)
+; RV32I-NEXT: sb a4, 20(a2)
+; RV32I-NEXT: sb t0, 8(a2)
+; RV32I-NEXT: sb a1, 12(a2)
+; RV32I-NEXT: sb a7, 0(a2)
; RV32I-NEXT: sb a0, 4(a2)
-; RV32I-NEXT: srli t1, a6, 24
+; RV32I-NEXT: srli t1, a5, 24
; RV32I-NEXT: sb t1, 27(a2)
-; RV32I-NEXT: srli t1, a6, 16
+; RV32I-NEXT: srli t1, a5, 16
; RV32I-NEXT: sb t1, 26(a2)
-; RV32I-NEXT: srli a6, a6, 8
-; RV32I-NEXT: sb a6, 25(a2)
-; RV32I-NEXT: srli a6, a7, 24
-; RV32I-NEXT: sb a6, 31(a2)
-; RV32I-NEXT: srli a6, a7, 16
-; RV32I-NEXT: sb a6, 30(a2)
-; RV32I-NEXT: srli a6, a7, 8
-; RV32I-NEXT: sb a6, 29(a2)
-; RV32I-NEXT: srli a6, t0, 24
-; RV32I-NEXT: sb a6, 19(a2)
-; RV32I-NEXT: srli a6, t0, 16
-; RV32I-NEXT: sb a6, 18(a2)
-; RV32I-NEXT: srli a6, t0, 8
-; RV32I-NEXT: sb a6, 17(a2)
-; RV32I-NEXT: srli a6, a3, 24
-; RV32I-NEXT: sb a6, 23(a2)
-; RV32I-NEXT: srli a6, a3, 16
-; RV32I-NEXT: sb a6, 22(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 21(a2)
-; RV32I-NEXT: srli a3, a5, 24
-; RV32I-NEXT: sb a3, 11(a2)
-; RV32I-NEXT: srli a3, a5, 16
-; RV32I-NEXT: sb a3, 10(a2)
; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 9(a2)
+; RV32I-NEXT: sb a5, 25(a2)
+; RV32I-NEXT: srli a5, a6, 24
+; RV32I-NEXT: sb a5, 31(a2)
+; RV32I-NEXT: srli a5, a6, 16
+; RV32I-NEXT: sb a5, 30(a2)
+; RV32I-NEXT: srli a5, a6, 8
+; RV32I-NEXT: sb a5, 29(a2)
+; RV32I-NEXT: srli a5, a3, 24
+; RV32I-NEXT: sb a5, 19(a2)
+; RV32I-NEXT: srli a5, a3, 16
+; RV32I-NEXT: sb a5, 18(a2)
+; RV32I-NEXT: srli a3, a3, 8
+; RV32I-NEXT: sb a3, 17(a2)
; RV32I-NEXT: srli a3, a4, 24
-; RV32I-NEXT: sb a3, 15(a2)
+; RV32I-NEXT: sb a3, 23(a2)
; RV32I-NEXT: srli a3, a4, 16
-; RV32I-NEXT: sb a3, 14(a2)
+; RV32I-NEXT: sb a3, 22(a2)
; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 13(a2)
+; RV32I-NEXT: sb a4, 21(a2)
+; RV32I-NEXT: srli a3, t0, 24
+; RV32I-NEXT: sb a3, 11(a2)
+; RV32I-NEXT: srli a3, t0, 16
+; RV32I-NEXT: sb a3, 10(a2)
+; RV32I-NEXT: srli a3, t0, 8
+; RV32I-NEXT: sb a3, 9(a2)
; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 3(a2)
+; RV32I-NEXT: sb a3, 15(a2)
; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 2(a2)
+; RV32I-NEXT: sb a3, 14(a2)
; RV32I-NEXT: srli a1, a1, 8
+; RV32I-NEXT: sb a1, 13(a2)
+; RV32I-NEXT: srli a1, a7, 24
+; RV32I-NEXT: sb a1, 3(a2)
+; RV32I-NEXT: srli a1, a7, 16
+; RV32I-NEXT: sb a1, 2(a2)
+; RV32I-NEXT: srli a1, a7, 8
; RV32I-NEXT: sb a1, 1(a2)
; RV32I-NEXT: srli a1, a0, 24
; RV32I-NEXT: sb a1, 7(a2)
@@ -2801,83 +2801,83 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a7, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 9(a0)
; RV64I-NEXT: lbu a5, 8(a0)
-; RV64I-NEXT: lbu a6, 10(a0)
+; RV64I-NEXT: lbu a6, 9(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 10(a0)
; RV64I-NEXT: lbu a7, 11(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 13(a0)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 12(a0)
-; RV64I-NEXT: lbu a7, 14(a0)
+; RV64I-NEXT: lbu a7, 13(a0)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 14(a0)
; RV64I-NEXT: lbu t0, 15(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: or a5, t0, a5
+; RV64I-NEXT: or a5, a5, a6
; RV64I-NEXT: slli a5, a5, 32
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 17(a0)
; RV64I-NEXT: lbu a6, 16(a0)
-; RV64I-NEXT: lbu a7, 18(a0)
+; RV64I-NEXT: lbu a7, 17(a0)
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: lbu a5, 18(a0)
; RV64I-NEXT: lbu t0, 19(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 21(a0)
+; RV64I-NEXT: or a5, t0, a5
; RV64I-NEXT: lbu a7, 20(a0)
-; RV64I-NEXT: lbu t0, 22(a0)
+; RV64I-NEXT: lbu t0, 21(a0)
+; RV64I-NEXT: or a5, a5, a6
+; RV64I-NEXT: lbu a6, 22(a0)
; RV64I-NEXT: lbu t1, 23(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: or a6, t1, a6
+; RV64I-NEXT: or a6, a6, a7
; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 25(a0)
; RV64I-NEXT: lbu a7, 24(a0)
-; RV64I-NEXT: lbu t0, 26(a0)
+; RV64I-NEXT: lbu t0, 25(a0)
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: lbu a6, 26(a0)
; RV64I-NEXT: lbu t1, 27(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 29(a0)
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 28(a0)
-; RV64I-NEXT: lbu t1, 30(a0)
+; RV64I-NEXT: lbu t1, 29(a0)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 30(a0)
; RV64I-NEXT: lbu a0, 31(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: lbu a1, 0(a1)
@@ -2972,77 +2972,77 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu t1, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli t1, t1, 24
-; RV32I-NEXT: or a7, t1, t0
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: lbu a7, 17(a0)
+; RV32I-NEXT: or a6, t1, a6
; RV32I-NEXT: lbu t0, 16(a0)
-; RV32I-NEXT: lbu t1, 18(a0)
+; RV32I-NEXT: lbu t1, 17(a0)
+; RV32I-NEXT: or a6, a6, a7
+; RV32I-NEXT: lbu a7, 18(a0)
; RV32I-NEXT: lbu t2, 19(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or a7, t0, a7
-; RV32I-NEXT: lbu t0, 21(a0)
+; RV32I-NEXT: or a7, t2, a7
; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 22(a0)
+; RV32I-NEXT: lbu t2, 21(a0)
+; RV32I-NEXT: or a7, a7, t0
+; RV32I-NEXT: lbu t0, 22(a0)
; RV32I-NEXT: lbu t3, 23(a0)
-; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: or t0, t0, t1
-; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli t2, t2, 8
+; RV32I-NEXT: or t1, t2, t1
+; RV32I-NEXT: slli t0, t0, 16
; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t3, t2
-; RV32I-NEXT: or t0, t1, t0
-; RV32I-NEXT: lbu t1, 25(a0)
+; RV32I-NEXT: or t0, t3, t0
; RV32I-NEXT: lbu t2, 24(a0)
-; RV32I-NEXT: lbu t3, 26(a0)
+; RV32I-NEXT: lbu t3, 25(a0)
+; RV32I-NEXT: or t0, t0, t1
+; RV32I-NEXT: lbu t1, 26(a0)
; RV32I-NEXT: lbu t4, 27(a0)
-; RV32I-NEXT: slli t1, t1, 8
-; RV32I-NEXT: or t1, t1, t2
-; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t3, t3, 8
+; RV32I-NEXT: or t2, t3, t2
+; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: or t1, t2, t1
-; RV32I-NEXT: lbu t2, 29(a0)
+; RV32I-NEXT: or t1, t4, t1
; RV32I-NEXT: lbu t3, 28(a0)
-; RV32I-NEXT: lbu t4, 30(a0)
+; RV32I-NEXT: lbu t4, 29(a0)
+; RV32I-NEXT: or t1, t1, t2
+; RV32I-NEXT: lbu t2, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: or t2, t2, t3
-; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: slli t2, t2, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a0, a0, t4
; RV32I-NEXT: or a0, a0, t2
+; RV32I-NEXT: or a0, a0, t3
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: sw zero, 60(sp)
; RV32I-NEXT: sw zero, 56(sp)
@@ -3063,64 +3063,64 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: andi a1, a1, 24
; RV32I-NEXT: mv a0, sp
-; RV32I-NEXT: add a3, a0, a1
-; RV32I-NEXT: lw a0, 4(a3)
-; RV32I-NEXT: lw a1, 0(a3)
-; RV32I-NEXT: lw a4, 12(a3)
-; RV32I-NEXT: lw a5, 8(a3)
-; RV32I-NEXT: lw a6, 24(a3)
-; RV32I-NEXT: lw a7, 28(a3)
-; RV32I-NEXT: lw t0, 16(a3)
-; RV32I-NEXT: lw a3, 20(a3)
-; RV32I-NEXT: sb a6, 24(a2)
-; RV32I-NEXT: sb a7, 28(a2)
-; RV32I-NEXT: sb t0, 16(a2)
-; RV32I-NEXT: sb a3, 20(a2)
-; RV32I-NEXT: sb a5, 8(a2)
-; RV32I-NEXT: sb a4, 12(a2)
-; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: lw a3, 16(a1)
+; RV32I-NEXT: lw a4, 20(a1)
+; RV32I-NEXT: lw a5, 24(a1)
+; RV32I-NEXT: lw a6, 28(a1)
+; RV32I-NEXT: lw a7, 0(a1)
+; RV32I-NEXT: lw a0, 4(a1)
+; RV32I-NEXT: lw t0, 8(a1)
+; RV32I-NEXT: lw a1, 12(a1)
+; RV32I-NEXT: sb a5, 24(a2)
+; RV32I-NEXT: sb a6, 28(a2)
+; RV32I-NEXT: sb a3, 16(a2)
+; RV32I-NEXT: sb a4, 20(a2)
+; RV32I-NEXT: sb t0, 8(a2)
+; RV32I-NEXT: sb a1, 12(a2)
+; RV32I-NEXT: sb a7, 0(a2)
; RV32I-NEXT: sb a0, 4(a2)
-; RV32I-NEXT: srli t1, a6, 24
+; RV32I-NEXT: srli t1, a5, 24
; RV32I-NEXT: sb t1, 27(a2)
-; RV32I-NEXT: srli t1, a6, 16
+; RV32I-NEXT: srli t1, a5, 16
; RV32I-NEXT: sb t1, 26(a2)
-; RV32I-NEXT: srli a6, a6, 8
-; RV32I-NEXT: sb a6, 25(a2)
-; RV32I-NEXT: srli a6, a7, 24
-; RV32I-NEXT: sb a6, 31(a2)
-; RV32I-NEXT: srli a6, a7, 16
-; RV32I-NEXT: sb a6, 30(a2)
-; RV32I-NEXT: srli a6, a7, 8
-; RV32I-NEXT: sb a6, 29(a2)
-; RV32I-NEXT: srli a6, t0, 24
-; RV32I-NEXT: sb a6, 19(a2)
-; RV32I-NEXT: srli a6, t0, 16
-; RV32I-NEXT: sb a6, 18(a2)
-; RV32I-NEXT: srli a6, t0, 8
-; RV32I-NEXT: sb a6, 17(a2)
-; RV32I-NEXT: srli a6, a3, 24
-; RV32I-NEXT: sb a6, 23(a2)
-; RV32I-NEXT: srli a6, a3, 16
-; RV32I-NEXT: sb a6, 22(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 21(a2)
-; RV32I-NEXT: srli a3, a5, 24
-; RV32I-NEXT: sb a3, 11(a2)
-; RV32I-NEXT: srli a3, a5, 16
-; RV32I-NEXT: sb a3, 10(a2)
; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 9(a2)
+; RV32I-NEXT: sb a5, 25(a2)
+; RV32I-NEXT: srli a5, a6, 24
+; RV32I-NEXT: sb a5, 31(a2)
+; RV32I-NEXT: srli a5, a6, 16
+; RV32I-NEXT: sb a5, 30(a2)
+; RV32I-NEXT: srli a5, a6, 8
+; RV32I-NEXT: sb a5, 29(a2)
+; RV32I-NEXT: srli a5, a3, 24
+; RV32I-NEXT: sb a5, 19(a2)
+; RV32I-NEXT: srli a5, a3, 16
+; RV32I-NEXT: sb a5, 18(a2)
+; RV32I-NEXT: srli a3, a3, 8
+; RV32I-NEXT: sb a3, 17(a2)
; RV32I-NEXT: srli a3, a4, 24
-; RV32I-NEXT: sb a3, 15(a2)
+; RV32I-NEXT: sb a3, 23(a2)
; RV32I-NEXT: srli a3, a4, 16
-; RV32I-NEXT: sb a3, 14(a2)
+; RV32I-NEXT: sb a3, 22(a2)
; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 13(a2)
+; RV32I-NEXT: sb a4, 21(a2)
+; RV32I-NEXT: srli a3, t0, 24
+; RV32I-NEXT: sb a3, 11(a2)
+; RV32I-NEXT: srli a3, t0, 16
+; RV32I-NEXT: sb a3, 10(a2)
+; RV32I-NEXT: srli a3, t0, 8
+; RV32I-NEXT: sb a3, 9(a2)
; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 3(a2)
+; RV32I-NEXT: sb a3, 15(a2)
; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 2(a2)
+; RV32I-NEXT: sb a3, 14(a2)
; RV32I-NEXT: srli a1, a1, 8
+; RV32I-NEXT: sb a1, 13(a2)
+; RV32I-NEXT: srli a1, a7, 24
+; RV32I-NEXT: sb a1, 3(a2)
+; RV32I-NEXT: srli a1, a7, 16
+; RV32I-NEXT: sb a1, 2(a2)
+; RV32I-NEXT: srli a1, a7, 8
; RV32I-NEXT: sb a1, 1(a2)
; RV32I-NEXT: srli a1, a0, 24
; RV32I-NEXT: sb a1, 7(a2)
@@ -3151,105 +3151,105 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a7, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 9(a0)
; RV64I-NEXT: lbu a5, 8(a0)
-; RV64I-NEXT: lbu a6, 10(a0)
+; RV64I-NEXT: lbu a6, 9(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 10(a0)
; RV64I-NEXT: lbu a7, 11(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 13(a0)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 12(a0)
-; RV64I-NEXT: lbu a7, 14(a0)
+; RV64I-NEXT: lbu a7, 13(a0)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 14(a0)
; RV64I-NEXT: lbu t0, 15(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: or a5, t0, a5
+; RV64I-NEXT: or a5, a5, a6
; RV64I-NEXT: slli a5, a5, 32
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 17(a0)
; RV64I-NEXT: lbu a6, 16(a0)
-; RV64I-NEXT: lbu a7, 18(a0)
+; RV64I-NEXT: lbu a7, 17(a0)
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: lbu a5, 18(a0)
; RV64I-NEXT: lbu t0, 19(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 21(a0)
+; RV64I-NEXT: or a5, t0, a5
; RV64I-NEXT: lbu a7, 20(a0)
-; RV64I-NEXT: lbu t0, 22(a0)
+; RV64I-NEXT: lbu t0, 21(a0)
+; RV64I-NEXT: or a5, a5, a6
+; RV64I-NEXT: lbu a6, 22(a0)
; RV64I-NEXT: lbu t1, 23(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: or a6, t1, a6
+; RV64I-NEXT: or a6, a6, a7
; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 25(a0)
; RV64I-NEXT: lbu a7, 24(a0)
-; RV64I-NEXT: lbu t0, 26(a0)
+; RV64I-NEXT: lbu t0, 25(a0)
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: lbu a6, 26(a0)
; RV64I-NEXT: lbu t1, 27(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 29(a0)
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 28(a0)
-; RV64I-NEXT: lbu t1, 30(a0)
+; RV64I-NEXT: lbu t1, 29(a0)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 30(a0)
; RV64I-NEXT: lbu a0, 31(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: or a0, a0, a6
-; RV64I-NEXT: lbu a6, 1(a1)
; RV64I-NEXT: lbu a7, 0(a1)
-; RV64I-NEXT: lbu t0, 2(a1)
+; RV64I-NEXT: lbu t0, 1(a1)
+; RV64I-NEXT: or a0, a0, a6
+; RV64I-NEXT: lbu a6, 2(a1)
; RV64I-NEXT: lbu t1, 3(a1)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 5(a1)
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 4(a1)
-; RV64I-NEXT: lbu t1, 6(a1)
+; RV64I-NEXT: lbu t1, 5(a1)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, t1
; RV64I-NEXT: or a1, a1, a7
+; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: sd zero, 24(sp)
@@ -3263,30 +3263,30 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: andi a0, a1, 24
; RV64I-NEXT: addi a3, sp, 32
; RV64I-NEXT: sub a3, a3, a0
-; RV64I-NEXT: ld a4, 8(a3)
+; RV64I-NEXT: ld a4, 0(a3)
+; RV64I-NEXT: ld a5, 8(a3)
; RV64I-NEXT: slli a1, a1, 3
-; RV64I-NEXT: ld a5, 0(a3)
-; RV64I-NEXT: sll a6, a4, a1
+; RV64I-NEXT: ld a6, 16(a3)
+; RV64I-NEXT: ld a3, 24(a3)
+; RV64I-NEXT: sll a7, a5, a1
; RV64I-NEXT: andi a0, a1, 56
-; RV64I-NEXT: xori a7, a0, 63
-; RV64I-NEXT: srli a0, a5, 1
-; RV64I-NEXT: ld t0, 24(a3)
-; RV64I-NEXT: ld a3, 16(a3)
-; RV64I-NEXT: srl a0, a0, a7
-; RV64I-NEXT: or a0, a6, a0
-; RV64I-NEXT: sll t0, t0, a1
-; RV64I-NEXT: srli t1, a3, 1
-; RV64I-NEXT: srl t1, t1, a7
-; RV64I-NEXT: or t1, t0, t1
+; RV64I-NEXT: xori t0, a0, 63
+; RV64I-NEXT: srli a0, a4, 1
+; RV64I-NEXT: srl a0, a0, t0
+; RV64I-NEXT: or a0, a7, a0
; RV64I-NEXT: sll a3, a3, a1
-; RV64I-NEXT: srli a4, a4, 1
-; RV64I-NEXT: srl a4, a4, a7
-; RV64I-NEXT: or a4, a3, a4
-; RV64I-NEXT: sll a1, a5, a1
+; RV64I-NEXT: srli t1, a6, 1
+; RV64I-NEXT: srl t1, t1, t0
+; RV64I-NEXT: or t1, a3, t1
+; RV64I-NEXT: sll a6, a6, a1
+; RV64I-NEXT: srli a5, a5, 1
+; RV64I-NEXT: srl a5, a5, t0
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: sll a1, a4, a1
; RV64I-NEXT: sb a1, 0(a2)
+; RV64I-NEXT: srli a4, a6, 56
+; RV64I-NEXT: sb a4, 23(a2)
; RV64I-NEXT: srli a3, a3, 56
-; RV64I-NEXT: sb a3, 23(a2)
-; RV64I-NEXT: srli a3, t0, 56
; RV64I-NEXT: sb a3, 31(a2)
; RV64I-NEXT: srli a3, a1, 56
; RV64I-NEXT: sb a3, 7(a2)
@@ -3302,23 +3302,23 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: sb a3, 2(a2)
; RV64I-NEXT: srli a1, a1, 8
; RV64I-NEXT: sb a1, 1(a2)
-; RV64I-NEXT: srli a1, a6, 56
+; RV64I-NEXT: srli a1, a7, 56
; RV64I-NEXT: sb a1, 15(a2)
-; RV64I-NEXT: sb a4, 16(a2)
+; RV64I-NEXT: sb a5, 16(a2)
; RV64I-NEXT: sb t1, 24(a2)
; RV64I-NEXT: sb a0, 8(a2)
-; RV64I-NEXT: srli a1, a4, 48
+; RV64I-NEXT: srli a1, a5, 48
; RV64I-NEXT: sb a1, 22(a2)
-; RV64I-NEXT: srli a1, a4, 40
+; RV64I-NEXT: srli a1, a5, 40
; RV64I-NEXT: sb a1, 21(a2)
-; RV64I-NEXT: srli a1, a4, 32
+; RV64I-NEXT: srli a1, a5, 32
; RV64I-NEXT: sb a1, 20(a2)
-; RV64I-NEXT: srli a1, a4, 24
+; RV64I-NEXT: srli a1, a5, 24
; RV64I-NEXT: sb a1, 19(a2)
-; RV64I-NEXT: srli a1, a4, 16
+; RV64I-NEXT: srli a1, a5, 16
; RV64I-NEXT: sb a1, 18(a2)
-; RV64I-NEXT: srli a4, a4, 8
-; RV64I-NEXT: sb a4, 17(a2)
+; RV64I-NEXT: srli a5, a5, 8
+; RV64I-NEXT: sb a5, 17(a2)
; RV64I-NEXT: srli a1, t1, 48
; RV64I-NEXT: sb a1, 30(a2)
; RV64I-NEXT: srli a1, t1, 40
@@ -3361,86 +3361,86 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu t1, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli t1, t1, 24
-; RV32I-NEXT: or a7, t1, t0
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: lbu a7, 17(a0)
+; RV32I-NEXT: or a6, t1, a6
; RV32I-NEXT: lbu t0, 16(a0)
-; RV32I-NEXT: lbu t1, 18(a0)
+; RV32I-NEXT: lbu t1, 17(a0)
+; RV32I-NEXT: or a6, a6, a7
+; RV32I-NEXT: lbu a7, 18(a0)
; RV32I-NEXT: lbu t2, 19(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or a7, t0, a7
-; RV32I-NEXT: lbu t0, 21(a0)
+; RV32I-NEXT: or a7, t2, a7
; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 22(a0)
+; RV32I-NEXT: lbu t2, 21(a0)
+; RV32I-NEXT: or a7, a7, t0
+; RV32I-NEXT: lbu t0, 22(a0)
; RV32I-NEXT: lbu t3, 23(a0)
-; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: or t0, t0, t1
-; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli t2, t2, 8
+; RV32I-NEXT: or t1, t2, t1
+; RV32I-NEXT: slli t0, t0, 16
; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t3, t2
-; RV32I-NEXT: or t0, t1, t0
-; RV32I-NEXT: lbu t1, 25(a0)
+; RV32I-NEXT: or t0, t3, t0
; RV32I-NEXT: lbu t2, 24(a0)
-; RV32I-NEXT: lbu t3, 26(a0)
+; RV32I-NEXT: lbu t3, 25(a0)
+; RV32I-NEXT: or t0, t0, t1
+; RV32I-NEXT: lbu t1, 26(a0)
; RV32I-NEXT: lbu t4, 27(a0)
-; RV32I-NEXT: slli t1, t1, 8
-; RV32I-NEXT: or t1, t1, t2
-; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t3, t3, 8
+; RV32I-NEXT: or t2, t3, t2
+; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: or t1, t2, t1
-; RV32I-NEXT: lbu t2, 29(a0)
+; RV32I-NEXT: or t1, t4, t1
; RV32I-NEXT: lbu t3, 28(a0)
-; RV32I-NEXT: lbu t4, 30(a0)
+; RV32I-NEXT: lbu t4, 29(a0)
+; RV32I-NEXT: or t1, t1, t2
+; RV32I-NEXT: lbu t2, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: or t2, t2, t3
-; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: slli t2, t2, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a0, a0, t4
; RV32I-NEXT: or a0, a0, t2
-; RV32I-NEXT: lbu t2, 1(a1)
-; RV32I-NEXT: lbu t3, 0(a1)
-; RV32I-NEXT: lbu t4, 2(a1)
+; RV32I-NEXT: lbu t2, 0(a1)
+; RV32I-NEXT: lbu t4, 1(a1)
+; RV32I-NEXT: or a0, a0, t3
+; RV32I-NEXT: lbu t3, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: or t2, t2, t3
-; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: or t2, t4, t2
+; RV32I-NEXT: slli t3, t3, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, t4
+; RV32I-NEXT: or a1, a1, t3
; RV32I-NEXT: or a1, a1, t2
; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw zero, 24(sp)
@@ -3460,91 +3460,91 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sw a3, 32(sp)
; RV32I-NEXT: andi a0, a1, 28
; RV32I-NEXT: addi a3, sp, 32
-; RV32I-NEXT: sub a6, a3, a0
-; RV32I-NEXT: lw a3, 4(a6)
+; RV32I-NEXT: sub a5, a3, a0
+; RV32I-NEXT: lw a6, 0(a5)
+; RV32I-NEXT: lw a3, 4(a5)
; RV32I-NEXT: slli a7, a1, 3
-; RV32I-NEXT: lw t0, 0(a6)
+; RV32I-NEXT: lw t0, 8(a5)
+; RV32I-NEXT: lw t1, 12(a5)
; RV32I-NEXT: sll a4, a3, a7
; RV32I-NEXT: andi a0, a7, 24
-; RV32I-NEXT: xori t1, a0, 31
-; RV32I-NEXT: srli a0, t0, 1
-; RV32I-NEXT: lw t2, 12(a6)
-; RV32I-NEXT: lw a5, 8(a6)
-; RV32I-NEXT: srl a0, a0, t1
+; RV32I-NEXT: xori t2, a0, 31
+; RV32I-NEXT: srli a0, a6, 1
+; RV32I-NEXT: srl a0, a0, t2
; RV32I-NEXT: or a0, a4, a0
-; RV32I-NEXT: sll t3, t2, a7
-; RV32I-NEXT: srli a1, a5, 1
-; RV32I-NEXT: srl a1, a1, t1
+; RV32I-NEXT: sll t3, t1, a7
+; RV32I-NEXT: srli a1, t0, 1
+; RV32I-NEXT: srl a1, a1, t2
; RV32I-NEXT: or a1, t3, a1
-; RV32I-NEXT: sll t4, a5, a7
+; RV32I-NEXT: sll t0, t0, a7
; RV32I-NEXT: srli a3, a3, 1
-; RV32I-NEXT: lw t5, 20(a6)
-; RV32I-NEXT: lw t6, 16(a6)
-; RV32I-NEXT: srl a3, a3, t1
-; RV32I-NEXT: or a3, t4, a3
+; RV32I-NEXT: srl a3, a3, t2
+; RV32I-NEXT: lw t4, 16(a5)
+; RV32I-NEXT: lw t5, 20(a5)
+; RV32I-NEXT: or a3, t0, a3
+; RV32I-NEXT: lw t6, 24(a5)
+; RV32I-NEXT: lw a5, 28(a5)
; RV32I-NEXT: sll s0, t5, a7
-; RV32I-NEXT: srli a5, t6, 1
-; RV32I-NEXT: srl a5, a5, t1
-; RV32I-NEXT: or a5, s0, a5
+; RV32I-NEXT: srli s1, t4, 1
+; RV32I-NEXT: srl s1, s1, t2
+; RV32I-NEXT: or s1, s0, s1
+; RV32I-NEXT: sll t4, t4, a7
+; RV32I-NEXT: srli t1, t1, 1
+; RV32I-NEXT: srl t1, t1, t2
+; RV32I-NEXT: or t1, t4, t1
+; RV32I-NEXT: sll a5, a5, a7
+; RV32I-NEXT: srli s2, t6, 1
+; RV32I-NEXT: srl s2, s2, t2
+; RV32I-NEXT: or s2, a5, s2
; RV32I-NEXT: sll t6, t6, a7
-; RV32I-NEXT: srli t2, t2, 1
-; RV32I-NEXT: lw s1, 28(a6)
-; RV32I-NEXT: lw a6, 24(a6)
-; RV32I-NEXT: srl t2, t2, t1
+; RV32I-NEXT: srli t5, t5, 1
+; RV32I-NEXT: srl t2, t5, t2
; RV32I-NEXT: or t2, t6, t2
-; RV32I-NEXT: sll s1, s1, a7
-; RV32I-NEXT: srli s2, a6, 1
-; RV32I-NEXT: srl s2, s2, t1
-; RV32I-NEXT: or s2, s1, s2
; RV32I-NEXT: sll a6, a6, a7
-; RV32I-NEXT: srli t5, t5, 1
-; RV32I-NEXT: srl t1, t5, t1
-; RV32I-NEXT: or t1, a6, t1
-; RV32I-NEXT: sll a7, t0, a7
-; RV32I-NEXT: sb a7, 0(a2)
-; RV32I-NEXT: srli a6, a6, 24
-; RV32I-NEXT: sb a6, 27(a2)
-; RV32I-NEXT: srli s1, s1, 24
-; RV32I-NEXT: sb s1, 31(a2)
-; RV32I-NEXT: srli a6, t6, 24
-; RV32I-NEXT: sb a6, 19(a2)
+; RV32I-NEXT: sb a6, 0(a2)
+; RV32I-NEXT: srli a7, t6, 24
+; RV32I-NEXT: sb a7, 27(a2)
+; RV32I-NEXT: srli a5, a5, 24
+; RV32I-NEXT: sb a5, 31(a2)
+; RV32I-NEXT: srli a5, t4, 24
+; RV32I-NEXT: sb a5, 19(a2)
; RV32I-NEXT: srli s0, s0, 24
; RV32I-NEXT: sb s0, 23(a2)
-; RV32I-NEXT: srli a6, t4, 24
-; RV32I-NEXT: sb a6, 11(a2)
-; RV32I-NEXT: srli a6, t3, 24
-; RV32I-NEXT: sb a6, 15(a2)
-; RV32I-NEXT: srli a6, a7, 24
-; RV32I-NEXT: sb a6, 3(a2)
-; RV32I-NEXT: srli a6, a7, 16
-; RV32I-NEXT: sb a6, 2(a2)
-; RV32I-NEXT: srli a6, a7, 8
-; RV32I-NEXT: sb a6, 1(a2)
+; RV32I-NEXT: srli a5, t0, 24
+; RV32I-NEXT: sb a5, 11(a2)
+; RV32I-NEXT: srli a5, t3, 24
+; RV32I-NEXT: sb a5, 15(a2)
+; RV32I-NEXT: srli a5, a6, 24
+; RV32I-NEXT: sb a5, 3(a2)
+; RV32I-NEXT: srli a5, a6, 16
+; RV32I-NEXT: sb a5, 2(a2)
+; RV32I-NEXT: srli a5, a6, 8
+; RV32I-NEXT: sb a5, 1(a2)
; RV32I-NEXT: srli a4, a4, 24
; RV32I-NEXT: sb a4, 7(a2)
-; RV32I-NEXT: sb t1, 24(a2)
+; RV32I-NEXT: sb t2, 24(a2)
; RV32I-NEXT: sb s2, 28(a2)
-; RV32I-NEXT: sb t2, 16(a2)
-; RV32I-NEXT: sb a5, 20(a2)
+; RV32I-NEXT: sb t1, 16(a2)
+; RV32I-NEXT: sb s1, 20(a2)
; RV32I-NEXT: sb a3, 8(a2)
; RV32I-NEXT: sb a1, 12(a2)
; RV32I-NEXT: sb a0, 4(a2)
-; RV32I-NEXT: srli a4, t1, 16
+; RV32I-NEXT: srli a4, t2, 16
; RV32I-NEXT: sb a4, 26(a2)
-; RV32I-NEXT: srli a4, t1, 8
+; RV32I-NEXT: srli a4, t2, 8
; RV32I-NEXT: sb a4, 25(a2)
; RV32I-NEXT: srli a4, s2, 16
; RV32I-NEXT: sb a4, 30(a2)
; RV32I-NEXT: srli a4, s2, 8
; RV32I-NEXT: sb a4, 29(a2)
-; RV32I-NEXT: srli a4, t2, 16
+; RV32I-NEXT: srli a4, t1, 16
; RV32I-NEXT: sb a4, 18(a2)
-; RV32I-NEXT: srli a4, t2, 8
+; RV32I-NEXT: srli a4, t1, 8
; RV32I-NEXT: sb a4, 17(a2)
-; RV32I-NEXT: srli a4, a5, 16
+; RV32I-NEXT: srli a4, s1, 16
; RV32I-NEXT: sb a4, 22(a2)
-; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 21(a2)
+; RV32I-NEXT: srli s1, s1, 8
+; RV32I-NEXT: sb s1, 21(a2)
; RV32I-NEXT: srli a4, a3, 16
; RV32I-NEXT: sb a4, 10(a2)
; RV32I-NEXT: srli a3, a3, 8
@@ -3583,105 +3583,105 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a7, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 9(a0)
; RV64I-NEXT: lbu a5, 8(a0)
-; RV64I-NEXT: lbu a6, 10(a0)
+; RV64I-NEXT: lbu a6, 9(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 10(a0)
; RV64I-NEXT: lbu a7, 11(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 13(a0)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 12(a0)
-; RV64I-NEXT: lbu a7, 14(a0)
+; RV64I-NEXT: lbu a7, 13(a0)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 14(a0)
; RV64I-NEXT: lbu t0, 15(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: or a5, t0, a5
+; RV64I-NEXT: or a5, a5, a6
; RV64I-NEXT: slli a5, a5, 32
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 17(a0)
; RV64I-NEXT: lbu a6, 16(a0)
-; RV64I-NEXT: lbu a7, 18(a0)
+; RV64I-NEXT: lbu a7, 17(a0)
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: lbu a5, 18(a0)
; RV64I-NEXT: lbu t0, 19(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 21(a0)
+; RV64I-NEXT: or a5, t0, a5
; RV64I-NEXT: lbu a7, 20(a0)
-; RV64I-NEXT: lbu t0, 22(a0)
+; RV64I-NEXT: lbu t0, 21(a0)
+; RV64I-NEXT: or a5, a5, a6
+; RV64I-NEXT: lbu a6, 22(a0)
; RV64I-NEXT: lbu t1, 23(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: or a6, t1, a6
+; RV64I-NEXT: or a6, a6, a7
; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 25(a0)
; RV64I-NEXT: lbu a7, 24(a0)
-; RV64I-NEXT: lbu t0, 26(a0)
+; RV64I-NEXT: lbu t0, 25(a0)
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: lbu a6, 26(a0)
; RV64I-NEXT: lbu t1, 27(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
-; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 29(a0)
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli t1, t1, 24
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 28(a0)
-; RV64I-NEXT: lbu t1, 30(a0)
+; RV64I-NEXT: lbu t1, 29(a0)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 30(a0)
; RV64I-NEXT: lbu a0, 31(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: or a0, a0, a6
-; RV64I-NEXT: lbu a6, 1(a1)
; RV64I-NEXT: lbu a7, 0(a1)
-; RV64I-NEXT: lbu t0, 2(a1)
+; RV64I-NEXT: lbu t0, 1(a1)
+; RV64I-NEXT: or a0, a0, a6
+; RV64I-NEXT: lbu a6, 2(a1)
; RV64I-NEXT: lbu t1, 3(a1)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 5(a1)
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 4(a1)
-; RV64I-NEXT: lbu t1, 6(a1)
+; RV64I-NEXT: lbu t1, 5(a1)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, t1
; RV64I-NEXT: or a1, a1, a7
+; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: sd zero, 24(sp)
@@ -3695,75 +3695,75 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV64I-NEXT: slli a0, a1, 2
; RV64I-NEXT: andi a0, a0, 24
; RV64I-NEXT: addi a3, sp, 32
-; RV64I-NEXT: sub a0, a3, a0
-; RV64I-NEXT: ld a4, 8(a0)
-; RV64I-NEXT: slli a5, a1, 5
-; RV64I-NEXT: ld a6, 0(a0)
-; RV64I-NEXT: sll a3, a4, a5
-; RV64I-NEXT: andi a1, a5, 32
-; RV64I-NEXT: xori a7, a1, 63
-; RV64I-NEXT: srli a1, a6, 1
-; RV64I-NEXT: ld t0, 24(a0)
-; RV64I-NEXT: ld t1, 16(a0)
-; RV64I-NEXT: srl a0, a1, a7
+; RV64I-NEXT: sub a3, a3, a0
+; RV64I-NEXT: ld a4, 0(a3)
+; RV64I-NEXT: ld a5, 8(a3)
+; RV64I-NEXT: slli a6, a1, 5
+; RV64I-NEXT: ld a7, 16(a3)
+; RV64I-NEXT: ld a1, 24(a3)
+; RV64I-NEXT: sll a3, a5, a6
+; RV64I-NEXT: andi a0, a6, 32
+; RV64I-NEXT: xori t0, a0, 63
+; RV64I-NEXT: srli a0, a4, 1
+; RV64I-NEXT: srl a0, a0, t0
; RV64I-NEXT: or a0, a3, a0
-; RV64I-NEXT: sll t0, t0, a5
-; RV64I-NEXT: srli a1, t1, 1
-; RV64I-NEXT: srl a1, a1, a7
-; RV64I-NEXT: or a1, t0, a1
-; RV64I-NEXT: sll t1, t1, a5
-; RV64I-NEXT: srli a4, a4, 1
-; RV64I-NEXT: srl a4, a4, a7
-; RV64I-NEXT: or a4, t1, a4
-; RV64I-NEXT: sll a5, a6, a5
-; RV64I-NEXT: sb a5, 0(a2)
-; RV64I-NEXT: srli a6, t1, 56
+; RV64I-NEXT: sll t1, a1, a6
+; RV64I-NEXT: srli a1, a7, 1
+; RV64I-NEXT: srl a1, a1, t0
+; RV64I-NEXT: or a1, t1, a1
+; RV64I-NEXT: sll a7, a7, a6
+; RV64I-NEXT: srli a5, a5, 1
+; RV64I-NEXT: srl a5, a5, t0
+; RV64I-NEXT: or a5, a7, a5
+; RV64I-NEXT: sll a4, a4, a6
+; RV64I-NEXT: sb a4, 0(a2)
+; RV64I-NEXT: srli a6, a7, 56
; RV64I-NEXT: sb a6, 23(a2)
-; RV64I-NEXT: srli a6, t1, 48
+; RV64I-NEXT: srli a6, a7, 48
; RV64I-NEXT: sb a6, 22(a2)
-; RV64I-NEXT: srli a6, t1, 40
+; RV64I-NEXT: srli a6, a7, 40
; RV64I-NEXT: sb a6, 21(a2)
-; RV64I-NEXT: srli a6, t1, 32
+; RV64I-NEXT: srli a6, a7, 32
; RV64I-NEXT: sb a6, 20(a2)
-; RV64I-NEXT: srli a6, t0, 56
+; RV64I-NEXT: srli a6, t1, 56
; RV64I-NEXT: sb a6, 31(a2)
-; RV64I-NEXT: srli a6, t0, 48
+; RV64I-NEXT: srli a6, t1, 48
; RV64I-NEXT: sb a6, 30(a2)
-; RV64I-NEXT: srli a6, t0, 40
+; RV64I-NEXT: srli a6, t1, 40
; RV64I-NEXT: sb a6, 29(a2)
-; RV64I-NEXT: srli a6, t0, 32
+; RV64I-NEXT: srli a6, t1, 32
; RV64I-NEXT: sb a6, 28(a2)
-; RV64I-NEXT: srli a6, a5, 56
+; RV64I-NEXT: srli a6, a4, 56
; RV64I-NEXT: sb a6, 7(a2)
-; RV64I-NEXT: srli a6, a5, 48
+; RV64I-NEXT: srli a6, a4, 48
; RV64I-NEXT: sb a6, 6(a2)
-; RV64I-NEXT: srli a6, a5, 40
+; RV64I-NEXT: srli a6, a4, 40
; RV64I-NEXT: sb a6, 5(a2)
-; RV64I-NEXT: srli a6, a5, 32
+; RV64I-NEXT: srli a6, a4, 32
; RV64I-NEXT: sb a6, 4(a2)
-; RV64I-NEXT: srli a6, a5, 24
+; RV64I-NEXT: srli a6, a4, 24
; RV64I-NEXT: sb a6, 3(a2)
-; RV64I-NEXT: srli a6, a5, 16
+; RV64I-NEXT: srli a6, a4, 16
; RV64I-NEXT: sb a6, 2(a2)
-; RV64I-NEXT: srli a5, a5, 8
-; RV64I-NEXT: sb a5, 1(a2)
-; RV64I-NEXT: srli a5, a3, 56
-; RV64I-NEXT: sb a5, 15(a2)
-; RV64I-NEXT: srli a5, a3, 48
-; RV64I-NEXT: sb a5, 14(a2)
-; RV64I-NEXT: srli a5, a3, 40
-; RV64I-NEXT: sb a5, 13(a2)
+; RV64I-NEXT: srli a4, a4, 8
+; RV64I-NEXT: sb a4, 1(a2)
+; RV64I-NEXT: srli a4, a3, 56
+; RV64I-NEXT: sb a4, 15(a2)
+; RV64I-NEXT: srli a4, a3, 48
+; RV64I-NEXT: sb a4, 14(a2)
+; RV64I-NEXT: srli a4, a3, 40
+; RV64I-NEXT: sb a4, 13(a2)
; RV64I-NEXT: srli a3, a3, 32
; RV64I-NEXT: sb a3, 12(a2)
-; RV64I-NEXT: sb a4, 16(a2)
+; RV64I-NEXT: sb a5, 16(a2)
; RV64I-NEXT: sb a1, 24(a2)
; RV64I-NEXT: sb a0, 8(a2)
-; RV64I-NEXT: srli a3, a4, 24
+; RV64I-NEXT: srli a3, a5, 24
; RV64I-NEXT: sb a3, 19(a2)
-; RV64I-NEXT: srli a3, a4, 16
+; RV64I-NEXT: srli a3, a5, 16
; RV64I-NEXT: sb a3, 18(a2)
-; RV64I-NEXT: srli a4, a4, 8
-; RV64I-NEXT: sb a4, 17(a2)
+; RV64I-NEXT: srli a5, a5, 8
+; RV64I-NEXT: sb a5, 17(a2)
; RV64I-NEXT: srli a3, a1, 24
; RV64I-NEXT: sb a3, 27(a2)
; RV64I-NEXT: srli a3, a1, 16
@@ -3791,77 +3791,77 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu t1, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli t1, t1, 24
-; RV32I-NEXT: or a7, t1, t0
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: lbu a7, 17(a0)
+; RV32I-NEXT: or a6, t1, a6
; RV32I-NEXT: lbu t0, 16(a0)
-; RV32I-NEXT: lbu t1, 18(a0)
+; RV32I-NEXT: lbu t1, 17(a0)
+; RV32I-NEXT: or a6, a6, a7
+; RV32I-NEXT: lbu a7, 18(a0)
; RV32I-NEXT: lbu t2, 19(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or a7, t0, a7
-; RV32I-NEXT: lbu t0, 21(a0)
+; RV32I-NEXT: or a7, t2, a7
; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 22(a0)
+; RV32I-NEXT: lbu t2, 21(a0)
+; RV32I-NEXT: or a7, a7, t0
+; RV32I-NEXT: lbu t0, 22(a0)
; RV32I-NEXT: lbu t3, 23(a0)
-; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: or t0, t0, t1
-; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli t2, t2, 8
+; RV32I-NEXT: or t1, t2, t1
+; RV32I-NEXT: slli t0, t0, 16
; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t3, t2
-; RV32I-NEXT: or t0, t1, t0
-; RV32I-NEXT: lbu t1, 25(a0)
+; RV32I-NEXT: or t0, t3, t0
; RV32I-NEXT: lbu t2, 24(a0)
-; RV32I-NEXT: lbu t3, 26(a0)
+; RV32I-NEXT: lbu t3, 25(a0)
+; RV32I-NEXT: or t0, t0, t1
+; RV32I-NEXT: lbu t1, 26(a0)
; RV32I-NEXT: lbu t4, 27(a0)
-; RV32I-NEXT: slli t1, t1, 8
-; RV32I-NEXT: or t1, t1, t2
-; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t3, t3, 8
+; RV32I-NEXT: or t2, t3, t2
+; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: or t1, t2, t1
-; RV32I-NEXT: lbu t2, 29(a0)
+; RV32I-NEXT: or t1, t4, t1
; RV32I-NEXT: lbu t3, 28(a0)
-; RV32I-NEXT: lbu t4, 30(a0)
+; RV32I-NEXT: lbu t4, 29(a0)
+; RV32I-NEXT: or t1, t1, t2
+; RV32I-NEXT: lbu t2, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: or t2, t2, t3
-; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: slli t2, t2, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a0, a0, t4
; RV32I-NEXT: or a0, a0, t2
+; RV32I-NEXT: or a0, a0, t3
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw zero, 24(sp)
@@ -3882,64 +3882,64 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: andi a1, a1, 28
; RV32I-NEXT: addi a0, sp, 32
-; RV32I-NEXT: sub a3, a0, a1
-; RV32I-NEXT: lw a0, 4(a3)
-; RV32I-NEXT: lw a1, 0(a3)
-; RV32I-NEXT: lw a4, 12(a3)
-; RV32I-NEXT: lw a5, 8(a3)
-; RV32I-NEXT: lw a6, 24(a3)
-; RV32I-NEXT: lw a7, 28(a3)
-; RV32I-NEXT: lw t0, 16(a3)
-; RV32I-NEXT: lw a3, 20(a3)
-; RV32I-NEXT: sb a6, 24(a2)
-; RV32I-NEXT: sb a7, 28(a2)
-; RV32I-NEXT: sb t0, 16(a2)
-; RV32I-NEXT: sb a3, 20(a2)
-; RV32I-NEXT: sb a5, 8(a2)
-; RV32I-NEXT: sb a4, 12(a2)
-; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: sub a1, a0, a1
+; RV32I-NEXT: lw a3, 16(a1)
+; RV32I-NEXT: lw a4, 20(a1)
+; RV32I-NEXT: lw a5, 24(a1)
+; RV32I-NEXT: lw a6, 28(a1)
+; RV32I-NEXT: lw a7, 0(a1)
+; RV32I-NEXT: lw a0, 4(a1)
+; RV32I-NEXT: lw t0, 8(a1)
+; RV32I-NEXT: lw a1, 12(a1)
+; RV32I-NEXT: sb a5, 24(a2)
+; RV32I-NEXT: sb a6, 28(a2)
+; RV32I-NEXT: sb a3, 16(a2)
+; RV32I-NEXT: sb a4, 20(a2)
+; RV32I-NEXT: sb t0, 8(a2)
+; RV32I-NEXT: sb a1, 12(a2)
+; RV32I-NEXT: sb a7, 0(a2)
; RV32I-NEXT: sb a0, 4(a2)
-; RV32I-NEXT: srli t1, a6, 24
+; RV32I-NEXT: srli t1, a5, 24
; RV32I-NEXT: sb t1, 27(a2)
-; RV32I-NEXT: srli t1, a6, 16
+; RV32I-NEXT: srli t1, a5, 16
; RV32I-NEXT: sb t1, 26(a2)
-; RV32I-NEXT: srli a6, a6, 8
-; RV32I-NEXT: sb a6, 25(a2)
-; RV32I-NEXT: srli a6, a7, 24
-; RV32I-NEXT: sb a6, 31(a2)
-; RV32I-NEXT: srli a6, a7, 16
-; RV32I-NEXT: sb a6, 30(a2)
-; RV32I-NEXT: srli a6, a7, 8
-; RV32I-NEXT: sb a6, 29(a2)
-; RV32I-NEXT: srli a6, t0, 24
-; RV32I-NEXT: sb a6, 19(a2)
-; RV32I-NEXT: srli a6, t0, 16
-; RV32I-NEXT: sb a6, 18(a2)
-; RV32I-NEXT: srli a6, t0, 8
-; RV32I-NEXT: sb a6, 17(a2)
-; RV32I-NEXT: srli a6, a3, 24
-; RV32I-NEXT: sb a6, 23(a2)
-; RV32I-NEXT: srli a6, a3, 16
-; RV32I-NEXT: sb a6, 22(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 21(a2)
-; RV32I-NEXT: srli a3, a5, 24
-; RV32I-NEXT: sb a3, 11(a2)
-; RV32I-NEXT: srli a3, a5, 16
-; RV32I-NEXT: sb a3, 10(a2)
; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 9(a2)
+; RV32I-NEXT: sb a5, 25(a2)
+; RV32I-NEXT: srli a5, a6, 24
+; RV32I-NEXT: sb a5, 31(a2)
+; RV32I-NEXT: srli a5, a6, 16
+; RV32I-NEXT: sb a5, 30(a2)
+; RV32I-NEXT: srli a5, a6, 8
+; RV32I-NEXT: sb a5, 29(a2)
+; RV32I-NEXT: srli a5, a3, 24
+; RV32I-NEXT: sb a5, 19(a2)
+; RV32I-NEXT: srli a5, a3, 16
+; RV32I-NEXT: sb a5, 18(a2)
+; RV32I-NEXT: srli a3, a3, 8
+; RV32I-NEXT: sb a3, 17(a2)
; RV32I-NEXT: srli a3, a4, 24
-; RV32I-NEXT: sb a3, 15(a2)
+; RV32I-NEXT: sb a3, 23(a2)
; RV32I-NEXT: srli a3, a4, 16
-; RV32I-NEXT: sb a3, 14(a2)
+; RV32I-NEXT: sb a3, 22(a2)
; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 13(a2)
+; RV32I-NEXT: sb a4, 21(a2)
+; RV32I-NEXT: srli a3, t0, 24
+; RV32I-NEXT: sb a3, 11(a2)
+; RV32I-NEXT: srli a3, t0, 16
+; RV32I-NEXT: sb a3, 10(a2)
+; RV32I-NEXT: srli a3, t0, 8
+; RV32I-NEXT: sb a3, 9(a2)
; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 3(a2)
+; RV32I-NEXT: sb a3, 15(a2)
; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 2(a2)
+; RV32I-NEXT: sb a3, 14(a2)
; RV32I-NEXT: srli a1, a1, 8
+; RV32I-NEXT: sb a1, 13(a2)
+; RV32I-NEXT: srli a1, a7, 24
+; RV32I-NEXT: sb a1, 3(a2)
+; RV32I-NEXT: srli a1, a7, 16
+; RV32I-NEXT: sb a1, 2(a2)
+; RV32I-NEXT: srli a1, a7, 8
; RV32I-NEXT: sb a1, 1(a2)
; RV32I-NEXT: srli a1, a0, 24
; RV32I-NEXT: sb a1, 7(a2)
@@ -3970,83 +3970,83 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a7, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 9(a0)
; RV64I-NEXT: lbu a5, 8(a0)
-; RV64I-NEXT: lbu a6, 10(a0)
+; RV64I-NEXT: lbu a6, 9(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 10(a0)
; RV64I-NEXT: lbu a7, 11(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 13(a0)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 12(a0)
-; RV64I-NEXT: lbu a7, 14(a0)
+; RV64I-NEXT: lbu a7, 13(a0)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 14(a0)
; RV64I-NEXT: lbu t0, 15(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: or a5, t0, a5
+; RV64I-NEXT: or a5, a5, a6
; RV64I-NEXT: slli a5, a5, 32
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 17(a0)
; RV64I-NEXT: lbu a6, 16(a0)
-; RV64I-NEXT: lbu a7, 18(a0)
+; RV64I-NEXT: lbu a7, 17(a0)
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: lbu a5, 18(a0)
; RV64I-NEXT: lbu t0, 19(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 21(a0)
+; RV64I-NEXT: or a5, t0, a5
; RV64I-NEXT: lbu a7, 20(a0)
-; RV64I-NEXT: lbu t0, 22(a0)
+; RV64I-NEXT: lbu t0, 21(a0)
+; RV64I-NEXT: or a5, a5, a6
+; RV64I-NEXT: lbu a6, 22(a0)
; RV64I-NEXT: lbu t1, 23(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: or a6, t1, a6
+; RV64I-NEXT: or a6, a6, a7
; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 25(a0)
; RV64I-NEXT: lbu a7, 24(a0)
-; RV64I-NEXT: lbu t0, 26(a0)
+; RV64I-NEXT: lbu t0, 25(a0)
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: lbu a6, 26(a0)
; RV64I-NEXT: lbu t1, 27(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 29(a0)
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 28(a0)
-; RV64I-NEXT: lbu t1, 30(a0)
+; RV64I-NEXT: lbu t1, 29(a0)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 30(a0)
; RV64I-NEXT: lbu a0, 31(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: lbu a1, 0(a1)
@@ -4141,77 +4141,77 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu t1, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli t1, t1, 24
-; RV32I-NEXT: or a7, t1, t0
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: lbu a7, 17(a0)
+; RV32I-NEXT: or a6, t1, a6
; RV32I-NEXT: lbu t0, 16(a0)
-; RV32I-NEXT: lbu t1, 18(a0)
+; RV32I-NEXT: lbu t1, 17(a0)
+; RV32I-NEXT: or a6, a6, a7
+; RV32I-NEXT: lbu a7, 18(a0)
; RV32I-NEXT: lbu t2, 19(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or a7, t0, a7
-; RV32I-NEXT: lbu t0, 21(a0)
+; RV32I-NEXT: or a7, t2, a7
; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 22(a0)
+; RV32I-NEXT: lbu t2, 21(a0)
+; RV32I-NEXT: or a7, a7, t0
+; RV32I-NEXT: lbu t0, 22(a0)
; RV32I-NEXT: lbu t3, 23(a0)
-; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: or t0, t0, t1
-; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli t2, t2, 8
+; RV32I-NEXT: or t1, t2, t1
+; RV32I-NEXT: slli t0, t0, 16
; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t3, t2
-; RV32I-NEXT: or t0, t1, t0
-; RV32I-NEXT: lbu t1, 25(a0)
+; RV32I-NEXT: or t0, t3, t0
; RV32I-NEXT: lbu t2, 24(a0)
-; RV32I-NEXT: lbu t3, 26(a0)
+; RV32I-NEXT: lbu t3, 25(a0)
+; RV32I-NEXT: or t0, t0, t1
+; RV32I-NEXT: lbu t1, 26(a0)
; RV32I-NEXT: lbu t4, 27(a0)
-; RV32I-NEXT: slli t1, t1, 8
-; RV32I-NEXT: or t1, t1, t2
-; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t3, t3, 8
+; RV32I-NEXT: or t2, t3, t2
+; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: or t1, t2, t1
-; RV32I-NEXT: lbu t2, 29(a0)
+; RV32I-NEXT: or t1, t4, t1
; RV32I-NEXT: lbu t3, 28(a0)
-; RV32I-NEXT: lbu t4, 30(a0)
+; RV32I-NEXT: lbu t4, 29(a0)
+; RV32I-NEXT: or t1, t1, t2
+; RV32I-NEXT: lbu t2, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: or t2, t2, t3
-; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: slli t2, t2, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a0, a0, t4
; RV32I-NEXT: or a0, a0, t2
+; RV32I-NEXT: or a0, a0, t3
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw zero, 24(sp)
@@ -4232,64 +4232,64 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: andi a1, a1, 24
; RV32I-NEXT: addi a0, sp, 32
-; RV32I-NEXT: sub a3, a0, a1
-; RV32I-NEXT: lw a0, 4(a3)
-; RV32I-NEXT: lw a1, 0(a3)
-; RV32I-NEXT: lw a4, 12(a3)
-; RV32I-NEXT: lw a5, 8(a3)
-; RV32I-NEXT: lw a6, 24(a3)
-; RV32I-NEXT: lw a7, 28(a3)
-; RV32I-NEXT: lw t0, 16(a3)
-; RV32I-NEXT: lw a3, 20(a3)
-; RV32I-NEXT: sb a6, 24(a2)
-; RV32I-NEXT: sb a7, 28(a2)
-; RV32I-NEXT: sb t0, 16(a2)
-; RV32I-NEXT: sb a3, 20(a2)
-; RV32I-NEXT: sb a5, 8(a2)
-; RV32I-NEXT: sb a4, 12(a2)
-; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: sub a1, a0, a1
+; RV32I-NEXT: lw a3, 16(a1)
+; RV32I-NEXT: lw a4, 20(a1)
+; RV32I-NEXT: lw a5, 24(a1)
+; RV32I-NEXT: lw a6, 28(a1)
+; RV32I-NEXT: lw a7, 0(a1)
+; RV32I-NEXT: lw a0, 4(a1)
+; RV32I-NEXT: lw t0, 8(a1)
+; RV32I-NEXT: lw a1, 12(a1)
+; RV32I-NEXT: sb a5, 24(a2)
+; RV32I-NEXT: sb a6, 28(a2)
+; RV32I-NEXT: sb a3, 16(a2)
+; RV32I-NEXT: sb a4, 20(a2)
+; RV32I-NEXT: sb t0, 8(a2)
+; RV32I-NEXT: sb a1, 12(a2)
+; RV32I-NEXT: sb a7, 0(a2)
; RV32I-NEXT: sb a0, 4(a2)
-; RV32I-NEXT: srli t1, a6, 24
+; RV32I-NEXT: srli t1, a5, 24
; RV32I-NEXT: sb t1, 27(a2)
-; RV32I-NEXT: srli t1, a6, 16
+; RV32I-NEXT: srli t1, a5, 16
; RV32I-NEXT: sb t1, 26(a2)
-; RV32I-NEXT: srli a6, a6, 8
-; RV32I-NEXT: sb a6, 25(a2)
-; RV32I-NEXT: srli a6, a7, 24
-; RV32I-NEXT: sb a6, 31(a2)
-; RV32I-NEXT: srli a6, a7, 16
-; RV32I-NEXT: sb a6, 30(a2)
-; RV32I-NEXT: srli a6, a7, 8
-; RV32I-NEXT: sb a6, 29(a2)
-; RV32I-NEXT: srli a6, t0, 24
-; RV32I-NEXT: sb a6, 19(a2)
-; RV32I-NEXT: srli a6, t0, 16
-; RV32I-NEXT: sb a6, 18(a2)
-; RV32I-NEXT: srli a6, t0, 8
-; RV32I-NEXT: sb a6, 17(a2)
-; RV32I-NEXT: srli a6, a3, 24
-; RV32I-NEXT: sb a6, 23(a2)
-; RV32I-NEXT: srli a6, a3, 16
-; RV32I-NEXT: sb a6, 22(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 21(a2)
-; RV32I-NEXT: srli a3, a5, 24
-; RV32I-NEXT: sb a3, 11(a2)
-; RV32I-NEXT: srli a3, a5, 16
-; RV32I-NEXT: sb a3, 10(a2)
; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 9(a2)
+; RV32I-NEXT: sb a5, 25(a2)
+; RV32I-NEXT: srli a5, a6, 24
+; RV32I-NEXT: sb a5, 31(a2)
+; RV32I-NEXT: srli a5, a6, 16
+; RV32I-NEXT: sb a5, 30(a2)
+; RV32I-NEXT: srli a5, a6, 8
+; RV32I-NEXT: sb a5, 29(a2)
+; RV32I-NEXT: srli a5, a3, 24
+; RV32I-NEXT: sb a5, 19(a2)
+; RV32I-NEXT: srli a5, a3, 16
+; RV32I-NEXT: sb a5, 18(a2)
+; RV32I-NEXT: srli a3, a3, 8
+; RV32I-NEXT: sb a3, 17(a2)
; RV32I-NEXT: srli a3, a4, 24
-; RV32I-NEXT: sb a3, 15(a2)
+; RV32I-NEXT: sb a3, 23(a2)
; RV32I-NEXT: srli a3, a4, 16
-; RV32I-NEXT: sb a3, 14(a2)
+; RV32I-NEXT: sb a3, 22(a2)
; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 13(a2)
+; RV32I-NEXT: sb a4, 21(a2)
+; RV32I-NEXT: srli a3, t0, 24
+; RV32I-NEXT: sb a3, 11(a2)
+; RV32I-NEXT: srli a3, t0, 16
+; RV32I-NEXT: sb a3, 10(a2)
+; RV32I-NEXT: srli a3, t0, 8
+; RV32I-NEXT: sb a3, 9(a2)
; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 3(a2)
+; RV32I-NEXT: sb a3, 15(a2)
; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 2(a2)
+; RV32I-NEXT: sb a3, 14(a2)
; RV32I-NEXT: srli a1, a1, 8
+; RV32I-NEXT: sb a1, 13(a2)
+; RV32I-NEXT: srli a1, a7, 24
+; RV32I-NEXT: sb a1, 3(a2)
+; RV32I-NEXT: srli a1, a7, 16
+; RV32I-NEXT: sb a1, 2(a2)
+; RV32I-NEXT: srli a1, a7, 8
; RV32I-NEXT: sb a1, 1(a2)
; RV32I-NEXT: srli a1, a0, 24
; RV32I-NEXT: sb a1, 7(a2)
@@ -4320,105 +4320,105 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a7, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 9(a0)
; RV64I-NEXT: lbu a5, 8(a0)
-; RV64I-NEXT: lbu a6, 10(a0)
+; RV64I-NEXT: lbu a6, 9(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 10(a0)
; RV64I-NEXT: lbu a7, 11(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 13(a0)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 12(a0)
-; RV64I-NEXT: lbu a7, 14(a0)
+; RV64I-NEXT: lbu a7, 13(a0)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 14(a0)
; RV64I-NEXT: lbu t0, 15(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: or a5, t0, a5
+; RV64I-NEXT: or a5, a5, a6
; RV64I-NEXT: slli a5, a5, 32
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 17(a0)
; RV64I-NEXT: lbu a6, 16(a0)
-; RV64I-NEXT: lbu a7, 18(a0)
+; RV64I-NEXT: lbu a7, 17(a0)
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: lbu a5, 18(a0)
; RV64I-NEXT: lbu t0, 19(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 21(a0)
+; RV64I-NEXT: or a5, t0, a5
; RV64I-NEXT: lbu a7, 20(a0)
-; RV64I-NEXT: lbu t0, 22(a0)
+; RV64I-NEXT: lbu t0, 21(a0)
+; RV64I-NEXT: or a5, a5, a6
+; RV64I-NEXT: lbu a6, 22(a0)
; RV64I-NEXT: lbu t1, 23(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: or a6, t1, a6
+; RV64I-NEXT: or a6, a6, a7
; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 25(a0)
; RV64I-NEXT: lbu a7, 24(a0)
-; RV64I-NEXT: lbu t0, 26(a0)
+; RV64I-NEXT: lbu t0, 25(a0)
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: lbu a6, 26(a0)
; RV64I-NEXT: lbu t1, 27(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 29(a0)
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 28(a0)
-; RV64I-NEXT: lbu t1, 30(a0)
+; RV64I-NEXT: lbu t1, 29(a0)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 30(a0)
; RV64I-NEXT: lbu a0, 31(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a7, a0, 32
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 1(a1)
; RV64I-NEXT: lbu t0, 0(a1)
-; RV64I-NEXT: lbu t1, 2(a1)
+; RV64I-NEXT: lbu t1, 1(a1)
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: lbu a7, 2(a1)
; RV64I-NEXT: lbu t2, 3(a1)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli t2, t2, 24
-; RV64I-NEXT: or t0, t2, t1
-; RV64I-NEXT: or a7, t0, a7
-; RV64I-NEXT: lbu t0, 5(a1)
+; RV64I-NEXT: or a7, t2, a7
; RV64I-NEXT: lbu t1, 4(a1)
-; RV64I-NEXT: lbu t2, 6(a1)
+; RV64I-NEXT: lbu t2, 5(a1)
+; RV64I-NEXT: or a7, a7, t0
+; RV64I-NEXT: lbu t0, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
-; RV64I-NEXT: slli t0, t0, 8
-; RV64I-NEXT: or t0, t0, t1
-; RV64I-NEXT: slli t2, t2, 16
+; RV64I-NEXT: slli t2, t2, 8
+; RV64I-NEXT: or t1, t2, t1
+; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, t2
; RV64I-NEXT: or a1, a1, t0
+; RV64I-NEXT: or a1, a1, t1
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a7
; RV64I-NEXT: sraiw a0, a0, 31
@@ -4432,31 +4432,31 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: sd a3, 0(sp)
; RV64I-NEXT: andi a0, a1, 24
; RV64I-NEXT: mv a3, sp
-; RV64I-NEXT: add a3, a3, a0
-; RV64I-NEXT: ld a4, 8(a3)
+; RV64I-NEXT: add a0, a3, a0
+; RV64I-NEXT: ld a3, 0(a0)
+; RV64I-NEXT: ld a4, 8(a0)
; RV64I-NEXT: slli a1, a1, 3
-; RV64I-NEXT: srl a5, a4, a1
-; RV64I-NEXT: ld a6, 16(a3)
+; RV64I-NEXT: ld a5, 16(a0)
+; RV64I-NEXT: ld a6, 24(a0)
+; RV64I-NEXT: srl a7, a4, a1
; RV64I-NEXT: andi a0, a1, 56
-; RV64I-NEXT: xori a7, a0, 63
-; RV64I-NEXT: ld t0, 0(a3)
-; RV64I-NEXT: slli a0, a6, 1
-; RV64I-NEXT: sll a0, a0, a7
-; RV64I-NEXT: or a0, a5, a0
-; RV64I-NEXT: srl t0, t0, a1
+; RV64I-NEXT: xori t0, a0, 63
+; RV64I-NEXT: slli a0, a5, 1
+; RV64I-NEXT: sll a0, a0, t0
+; RV64I-NEXT: or a0, a7, a0
+; RV64I-NEXT: srl a3, a3, a1
; RV64I-NEXT: slli a4, a4, 1
-; RV64I-NEXT: ld a3, 24(a3)
-; RV64I-NEXT: sll a4, a4, a7
-; RV64I-NEXT: or a4, t0, a4
-; RV64I-NEXT: srl a6, a6, a1
-; RV64I-NEXT: slli t1, a3, 1
-; RV64I-NEXT: sll a7, t1, a7
-; RV64I-NEXT: or a7, a6, a7
-; RV64I-NEXT: sra a1, a3, a1
-; RV64I-NEXT: sb a6, 16(a2)
+; RV64I-NEXT: sll a4, a4, t0
+; RV64I-NEXT: or a4, a3, a4
+; RV64I-NEXT: srl a5, a5, a1
+; RV64I-NEXT: slli t1, a6, 1
+; RV64I-NEXT: sll t0, t1, t0
+; RV64I-NEXT: or t0, a5, t0
+; RV64I-NEXT: sra a1, a6, a1
+; RV64I-NEXT: sb a5, 16(a2)
; RV64I-NEXT: sb a1, 24(a2)
-; RV64I-NEXT: sb t0, 0(a2)
-; RV64I-NEXT: sb a5, 8(a2)
+; RV64I-NEXT: sb a3, 0(a2)
+; RV64I-NEXT: sb a7, 8(a2)
; RV64I-NEXT: srli a3, a1, 56
; RV64I-NEXT: sb a3, 31(a2)
; RV64I-NEXT: srli a3, a1, 48
@@ -4471,19 +4471,19 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: sb a3, 26(a2)
; RV64I-NEXT: srli a1, a1, 8
; RV64I-NEXT: sb a1, 25(a2)
-; RV64I-NEXT: srli a1, a7, 56
+; RV64I-NEXT: srli a1, t0, 56
; RV64I-NEXT: sb a1, 23(a2)
-; RV64I-NEXT: srli a1, a7, 48
+; RV64I-NEXT: srli a1, t0, 48
; RV64I-NEXT: sb a1, 22(a2)
-; RV64I-NEXT: srli a1, a7, 40
+; RV64I-NEXT: srli a1, t0, 40
; RV64I-NEXT: sb a1, 21(a2)
-; RV64I-NEXT: srli a1, a7, 32
+; RV64I-NEXT: srli a1, t0, 32
; RV64I-NEXT: sb a1, 20(a2)
-; RV64I-NEXT: srli a1, a7, 24
+; RV64I-NEXT: srli a1, t0, 24
; RV64I-NEXT: sb a1, 19(a2)
-; RV64I-NEXT: srli a1, a7, 16
+; RV64I-NEXT: srli a1, t0, 16
; RV64I-NEXT: sb a1, 18(a2)
-; RV64I-NEXT: srli a1, a7, 8
+; RV64I-NEXT: srli a1, t0, 8
; RV64I-NEXT: sb a1, 17(a2)
; RV64I-NEXT: srli a1, a4, 56
; RV64I-NEXT: sb a1, 7(a2)
@@ -4531,87 +4531,87 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu t1, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli t1, t1, 24
-; RV32I-NEXT: or a7, t1, t0
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: lbu a7, 17(a0)
+; RV32I-NEXT: or a6, t1, a6
; RV32I-NEXT: lbu t0, 16(a0)
-; RV32I-NEXT: lbu t1, 18(a0)
+; RV32I-NEXT: lbu t1, 17(a0)
+; RV32I-NEXT: or a6, a6, a7
+; RV32I-NEXT: lbu a7, 18(a0)
; RV32I-NEXT: lbu t2, 19(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or a7, t0, a7
-; RV32I-NEXT: lbu t0, 21(a0)
+; RV32I-NEXT: or a7, t2, a7
; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 22(a0)
+; RV32I-NEXT: lbu t2, 21(a0)
+; RV32I-NEXT: or a7, a7, t0
+; RV32I-NEXT: lbu t0, 22(a0)
; RV32I-NEXT: lbu t3, 23(a0)
-; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: or t0, t0, t1
-; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli t2, t2, 8
+; RV32I-NEXT: or t1, t2, t1
+; RV32I-NEXT: slli t0, t0, 16
; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t3, t2
-; RV32I-NEXT: or t0, t1, t0
-; RV32I-NEXT: lbu t1, 25(a0)
+; RV32I-NEXT: or t0, t3, t0
; RV32I-NEXT: lbu t2, 24(a0)
-; RV32I-NEXT: lbu t3, 26(a0)
+; RV32I-NEXT: lbu t3, 25(a0)
+; RV32I-NEXT: or t0, t0, t1
+; RV32I-NEXT: lbu t1, 26(a0)
; RV32I-NEXT: lbu t4, 27(a0)
-; RV32I-NEXT: slli t1, t1, 8
-; RV32I-NEXT: or t1, t1, t2
-; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t3, t3, 8
+; RV32I-NEXT: or t2, t3, t2
+; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: or t1, t2, t1
-; RV32I-NEXT: lbu t2, 29(a0)
+; RV32I-NEXT: or t1, t4, t1
; RV32I-NEXT: lbu t3, 28(a0)
-; RV32I-NEXT: lbu t4, 30(a0)
+; RV32I-NEXT: lbu t4, 29(a0)
+; RV32I-NEXT: or t1, t1, t2
+; RV32I-NEXT: lbu t2, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: or t2, t2, t3
-; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: slli t2, t2, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or t3, a0, t4
-; RV32I-NEXT: or t2, t3, t2
-; RV32I-NEXT: lbu t3, 1(a1)
+; RV32I-NEXT: or t2, a0, t2
; RV32I-NEXT: lbu t4, 0(a1)
-; RV32I-NEXT: lbu t5, 2(a1)
+; RV32I-NEXT: lbu t5, 1(a1)
+; RV32I-NEXT: or t2, t2, t3
+; RV32I-NEXT: lbu t3, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli t3, t3, 8
-; RV32I-NEXT: or t3, t3, t4
-; RV32I-NEXT: slli t5, t5, 16
+; RV32I-NEXT: slli t5, t5, 8
+; RV32I-NEXT: or t4, t5, t4
+; RV32I-NEXT: slli t3, t3, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, t5
; RV32I-NEXT: or a1, a1, t3
+; RV32I-NEXT: or a1, a1, t4
; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: sw a0, 60(sp)
; RV32I-NEXT: sw a0, 56(sp)
@@ -4631,54 +4631,54 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sw a3, 0(sp)
; RV32I-NEXT: andi a0, a1, 28
; RV32I-NEXT: mv a3, sp
-; RV32I-NEXT: add a5, a3, a0
-; RV32I-NEXT: lw a3, 4(a5)
-; RV32I-NEXT: slli a6, a1, 3
-; RV32I-NEXT: srl a4, a3, a6
-; RV32I-NEXT: lw a7, 8(a5)
-; RV32I-NEXT: andi a0, a6, 24
-; RV32I-NEXT: xori t0, a0, 31
-; RV32I-NEXT: lw a1, 0(a5)
-; RV32I-NEXT: slli a0, a7, 1
-; RV32I-NEXT: sll a0, a0, t0
+; RV32I-NEXT: add a3, a3, a0
+; RV32I-NEXT: lw a6, 0(a3)
+; RV32I-NEXT: lw a7, 4(a3)
+; RV32I-NEXT: slli a5, a1, 3
+; RV32I-NEXT: lw t0, 8(a3)
+; RV32I-NEXT: lw t1, 12(a3)
+; RV32I-NEXT: srl a4, a7, a5
+; RV32I-NEXT: andi a0, a5, 24
+; RV32I-NEXT: xori t2, a0, 31
+; RV32I-NEXT: slli a0, t0, 1
+; RV32I-NEXT: sll a0, a0, t2
; RV32I-NEXT: or a0, a4, a0
-; RV32I-NEXT: srl t1, a1, a6
-; RV32I-NEXT: slli a3, a3, 1
-; RV32I-NEXT: lw t2, 12(a5)
-; RV32I-NEXT: lw t3, 16(a5)
-; RV32I-NEXT: sll a1, a3, t0
-; RV32I-NEXT: or a1, t1, a1
-; RV32I-NEXT: srl t4, t2, a6
+; RV32I-NEXT: srl a6, a6, a5
+; RV32I-NEXT: slli a7, a7, 1
+; RV32I-NEXT: sll a1, a7, t2
+; RV32I-NEXT: or a1, a6, a1
+; RV32I-NEXT: srl a7, t1, a5
+; RV32I-NEXT: lw t3, 16(a3)
+; RV32I-NEXT: lw t4, 20(a3)
+; RV32I-NEXT: lw t5, 24(a3)
+; RV32I-NEXT: lw t6, 28(a3)
; RV32I-NEXT: slli a3, t3, 1
-; RV32I-NEXT: sll a3, a3, t0
-; RV32I-NEXT: or a3, t4, a3
-; RV32I-NEXT: srl a7, a7, a6
-; RV32I-NEXT: slli t2, t2, 1
-; RV32I-NEXT: lw t5, 20(a5)
-; RV32I-NEXT: lw t6, 24(a5)
-; RV32I-NEXT: sll t2, t2, t0
-; RV32I-NEXT: or t2, a7, t2
-; RV32I-NEXT: srl s0, t5, a6
-; RV32I-NEXT: slli s1, t6, 1
-; RV32I-NEXT: sll s1, s1, t0
+; RV32I-NEXT: sll a3, a3, t2
+; RV32I-NEXT: or a3, a7, a3
+; RV32I-NEXT: srl t0, t0, a5
+; RV32I-NEXT: slli t1, t1, 1
+; RV32I-NEXT: sll t1, t1, t2
+; RV32I-NEXT: or t1, t0, t1
+; RV32I-NEXT: srl s0, t4, a5
+; RV32I-NEXT: slli s1, t5, 1
+; RV32I-NEXT: sll s1, s1, t2
; RV32I-NEXT: or s1, s0, s1
-; RV32I-NEXT: srl t3, t3, a6
-; RV32I-NEXT: slli t5, t5, 1
-; RV32I-NEXT: lw a5, 28(a5)
-; RV32I-NEXT: sll t5, t5, t0
-; RV32I-NEXT: or t5, t3, t5
-; RV32I-NEXT: srl t6, t6, a6
-; RV32I-NEXT: slli s2, a5, 1
-; RV32I-NEXT: sll t0, s2, t0
-; RV32I-NEXT: or t0, t6, t0
-; RV32I-NEXT: sra a5, a5, a6
-; RV32I-NEXT: sb t6, 24(a2)
+; RV32I-NEXT: srl t3, t3, a5
+; RV32I-NEXT: slli t4, t4, 1
+; RV32I-NEXT: sll t4, t4, t2
+; RV32I-NEXT: or t4, t3, t4
+; RV32I-NEXT: srl t5, t5, a5
+; RV32I-NEXT: slli s2, t6, 1
+; RV32I-NEXT: sll t2, s2, t2
+; RV32I-NEXT: or t2, t5, t2
+; RV32I-NEXT: sra a5, t6, a5
+; RV32I-NEXT: sb t5, 24(a2)
; RV32I-NEXT: sb a5, 28(a2)
; RV32I-NEXT: sb t3, 16(a2)
; RV32I-NEXT: sb s0, 20(a2)
-; RV32I-NEXT: sb a7, 8(a2)
-; RV32I-NEXT: sb t4, 12(a2)
-; RV32I-NEXT: sb t1, 0(a2)
+; RV32I-NEXT: sb t0, 8(a2)
+; RV32I-NEXT: sb a7, 12(a2)
+; RV32I-NEXT: sb a6, 0(a2)
; RV32I-NEXT: sb a4, 4(a2)
; RV32I-NEXT: srli a4, a5, 24
; RV32I-NEXT: sb a4, 31(a2)
@@ -4686,17 +4686,17 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sb a4, 30(a2)
; RV32I-NEXT: srli a5, a5, 8
; RV32I-NEXT: sb a5, 29(a2)
-; RV32I-NEXT: srli a4, t0, 24
+; RV32I-NEXT: srli a4, t2, 24
; RV32I-NEXT: sb a4, 27(a2)
-; RV32I-NEXT: srli a4, t0, 16
+; RV32I-NEXT: srli a4, t2, 16
; RV32I-NEXT: sb a4, 26(a2)
-; RV32I-NEXT: srli a4, t0, 8
+; RV32I-NEXT: srli a4, t2, 8
; RV32I-NEXT: sb a4, 25(a2)
-; RV32I-NEXT: srli a4, t5, 24
+; RV32I-NEXT: srli a4, t4, 24
; RV32I-NEXT: sb a4, 19(a2)
-; RV32I-NEXT: srli a4, t5, 16
+; RV32I-NEXT: srli a4, t4, 16
; RV32I-NEXT: sb a4, 18(a2)
-; RV32I-NEXT: srli a4, t5, 8
+; RV32I-NEXT: srli a4, t4, 8
; RV32I-NEXT: sb a4, 17(a2)
; RV32I-NEXT: srli a4, s1, 24
; RV32I-NEXT: sb a4, 23(a2)
@@ -4704,11 +4704,11 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sb a4, 22(a2)
; RV32I-NEXT: srli s1, s1, 8
; RV32I-NEXT: sb s1, 21(a2)
-; RV32I-NEXT: srli a4, t2, 24
+; RV32I-NEXT: srli a4, t1, 24
; RV32I-NEXT: sb a4, 11(a2)
-; RV32I-NEXT: srli a4, t2, 16
+; RV32I-NEXT: srli a4, t1, 16
; RV32I-NEXT: sb a4, 10(a2)
-; RV32I-NEXT: srli a4, t2, 8
+; RV32I-NEXT: srli a4, t1, 8
; RV32I-NEXT: sb a4, 9(a2)
; RV32I-NEXT: srli a4, a3, 24
; RV32I-NEXT: sb a4, 15(a2)
@@ -4754,105 +4754,105 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a7, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 9(a0)
; RV64I-NEXT: lbu a5, 8(a0)
-; RV64I-NEXT: lbu a6, 10(a0)
+; RV64I-NEXT: lbu a6, 9(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 10(a0)
; RV64I-NEXT: lbu a7, 11(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 13(a0)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 12(a0)
-; RV64I-NEXT: lbu a7, 14(a0)
+; RV64I-NEXT: lbu a7, 13(a0)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 14(a0)
; RV64I-NEXT: lbu t0, 15(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: or a5, t0, a5
+; RV64I-NEXT: or a5, a5, a6
; RV64I-NEXT: slli a5, a5, 32
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 17(a0)
; RV64I-NEXT: lbu a6, 16(a0)
-; RV64I-NEXT: lbu a7, 18(a0)
+; RV64I-NEXT: lbu a7, 17(a0)
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: lbu a5, 18(a0)
; RV64I-NEXT: lbu t0, 19(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 21(a0)
+; RV64I-NEXT: or a5, t0, a5
; RV64I-NEXT: lbu a7, 20(a0)
-; RV64I-NEXT: lbu t0, 22(a0)
+; RV64I-NEXT: lbu t0, 21(a0)
+; RV64I-NEXT: or a5, a5, a6
+; RV64I-NEXT: lbu a6, 22(a0)
; RV64I-NEXT: lbu t1, 23(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: or a6, t1, a6
+; RV64I-NEXT: or a6, a6, a7
; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 25(a0)
; RV64I-NEXT: lbu a7, 24(a0)
-; RV64I-NEXT: lbu t0, 26(a0)
+; RV64I-NEXT: lbu t0, 25(a0)
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: lbu a6, 26(a0)
; RV64I-NEXT: lbu t1, 27(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 29(a0)
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 28(a0)
-; RV64I-NEXT: lbu t1, 30(a0)
+; RV64I-NEXT: lbu t1, 29(a0)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 30(a0)
; RV64I-NEXT: lbu a0, 31(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a7, a0, 32
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 1(a1)
; RV64I-NEXT: lbu t0, 0(a1)
-; RV64I-NEXT: lbu t1, 2(a1)
+; RV64I-NEXT: lbu t1, 1(a1)
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: lbu a7, 2(a1)
; RV64I-NEXT: lbu t2, 3(a1)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli t2, t2, 24
-; RV64I-NEXT: or t0, t2, t1
-; RV64I-NEXT: or a7, t0, a7
-; RV64I-NEXT: lbu t0, 5(a1)
+; RV64I-NEXT: or a7, t2, a7
; RV64I-NEXT: lbu t1, 4(a1)
-; RV64I-NEXT: lbu t2, 6(a1)
+; RV64I-NEXT: lbu t2, 5(a1)
+; RV64I-NEXT: or a7, a7, t0
+; RV64I-NEXT: lbu t0, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
-; RV64I-NEXT: slli t0, t0, 8
-; RV64I-NEXT: or t0, t0, t1
-; RV64I-NEXT: slli t2, t2, 16
+; RV64I-NEXT: slli t2, t2, 8
+; RV64I-NEXT: or t1, t2, t1
+; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, t2
; RV64I-NEXT: or a1, a1, t0
+; RV64I-NEXT: or a1, a1, t1
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a7
; RV64I-NEXT: sraiw a0, a0, 31
@@ -4867,70 +4867,70 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: slli a0, a1, 2
; RV64I-NEXT: andi a0, a0, 24
; RV64I-NEXT: mv a3, sp
-; RV64I-NEXT: add a3, a3, a0
-; RV64I-NEXT: ld a4, 8(a3)
-; RV64I-NEXT: slli a5, a1, 5
-; RV64I-NEXT: srl a1, a4, a5
-; RV64I-NEXT: ld a6, 16(a3)
-; RV64I-NEXT: andi a0, a5, 32
-; RV64I-NEXT: xori a7, a0, 63
-; RV64I-NEXT: ld t0, 0(a3)
-; RV64I-NEXT: slli a0, a6, 1
-; RV64I-NEXT: sll a0, a0, a7
-; RV64I-NEXT: or a0, a1, a0
-; RV64I-NEXT: srl t0, t0, a5
-; RV64I-NEXT: slli a4, a4, 1
-; RV64I-NEXT: ld a3, 24(a3)
-; RV64I-NEXT: sll a4, a4, a7
-; RV64I-NEXT: or a4, t0, a4
-; RV64I-NEXT: srl a6, a6, a5
-; RV64I-NEXT: slli t1, a3, 1
-; RV64I-NEXT: sll a7, t1, a7
-; RV64I-NEXT: or a7, a6, a7
-; RV64I-NEXT: sra a3, a3, a5
-; RV64I-NEXT: sb a6, 16(a2)
-; RV64I-NEXT: sb a3, 24(a2)
-; RV64I-NEXT: sb t0, 0(a2)
-; RV64I-NEXT: sb a1, 8(a2)
-; RV64I-NEXT: srli a5, a6, 24
-; RV64I-NEXT: sb a5, 19(a2)
-; RV64I-NEXT: srli a5, a6, 16
-; RV64I-NEXT: sb a5, 18(a2)
-; RV64I-NEXT: srli a5, a6, 8
+; RV64I-NEXT: add a0, a3, a0
+; RV64I-NEXT: ld a3, 0(a0)
+; RV64I-NEXT: ld a4, 8(a0)
+; RV64I-NEXT: slli a1, a1, 5
+; RV64I-NEXT: ld a5, 16(a0)
+; RV64I-NEXT: ld a6, 24(a0)
+; RV64I-NEXT: srl a7, a4, a1
+; RV64I-NEXT: andi a0, a1, 32
+; RV64I-NEXT: xori t0, a0, 63
+; RV64I-NEXT: slli a0, a5, 1
+; RV64I-NEXT: sll a0, a0, t0
+; RV64I-NEXT: or a0, a7, a0
+; RV64I-NEXT: srl a3, a3, a1
+; RV64I-NEXT: slli a4, a4, 1
+; RV64I-NEXT: sll a4, a4, t0
+; RV64I-NEXT: or a4, a3, a4
+; RV64I-NEXT: srl a5, a5, a1
+; RV64I-NEXT: slli t1, a6, 1
+; RV64I-NEXT: sll t0, t1, t0
+; RV64I-NEXT: or t0, a5, t0
+; RV64I-NEXT: sra a1, a6, a1
+; RV64I-NEXT: sb a5, 16(a2)
+; RV64I-NEXT: sb a1, 24(a2)
+; RV64I-NEXT: sb a3, 0(a2)
+; RV64I-NEXT: sb a7, 8(a2)
+; RV64I-NEXT: srli a6, a5, 24
+; RV64I-NEXT: sb a6, 19(a2)
+; RV64I-NEXT: srli a6, a5, 16
+; RV64I-NEXT: sb a6, 18(a2)
+; RV64I-NEXT: srli a5, a5, 8
; RV64I-NEXT: sb a5, 17(a2)
-; RV64I-NEXT: srli a5, a3, 56
+; RV64I-NEXT: srli a5, a1, 56
; RV64I-NEXT: sb a5, 31(a2)
-; RV64I-NEXT: srli a5, a3, 48
+; RV64I-NEXT: srli a5, a1, 48
; RV64I-NEXT: sb a5, 30(a2)
-; RV64I-NEXT: srli a5, a3, 40
+; RV64I-NEXT: srli a5, a1, 40
; RV64I-NEXT: sb a5, 29(a2)
-; RV64I-NEXT: srli a5, a3, 32
+; RV64I-NEXT: srli a5, a1, 32
; RV64I-NEXT: sb a5, 28(a2)
-; RV64I-NEXT: srli a5, a3, 24
+; RV64I-NEXT: srli a5, a1, 24
; RV64I-NEXT: sb a5, 27(a2)
-; RV64I-NEXT: srli a5, a3, 16
+; RV64I-NEXT: srli a5, a1, 16
; RV64I-NEXT: sb a5, 26(a2)
+; RV64I-NEXT: srli a1, a1, 8
+; RV64I-NEXT: sb a1, 25(a2)
+; RV64I-NEXT: srli a1, a3, 24
+; RV64I-NEXT: sb a1, 3(a2)
+; RV64I-NEXT: srli a1, a3, 16
+; RV64I-NEXT: sb a1, 2(a2)
; RV64I-NEXT: srli a3, a3, 8
-; RV64I-NEXT: sb a3, 25(a2)
-; RV64I-NEXT: srli a3, t0, 24
-; RV64I-NEXT: sb a3, 3(a2)
-; RV64I-NEXT: srli a3, t0, 16
-; RV64I-NEXT: sb a3, 2(a2)
-; RV64I-NEXT: srli a3, t0, 8
; RV64I-NEXT: sb a3, 1(a2)
-; RV64I-NEXT: srli a3, a1, 24
-; RV64I-NEXT: sb a3, 11(a2)
-; RV64I-NEXT: srli a3, a1, 16
-; RV64I-NEXT: sb a3, 10(a2)
-; RV64I-NEXT: srli a1, a1, 8
+; RV64I-NEXT: srli a1, a7, 24
+; RV64I-NEXT: sb a1, 11(a2)
+; RV64I-NEXT: srli a1, a7, 16
+; RV64I-NEXT: sb a1, 10(a2)
+; RV64I-NEXT: srli a1, a7, 8
; RV64I-NEXT: sb a1, 9(a2)
-; RV64I-NEXT: srli a1, a7, 56
+; RV64I-NEXT: srli a1, t0, 56
; RV64I-NEXT: sb a1, 23(a2)
-; RV64I-NEXT: srli a1, a7, 48
+; RV64I-NEXT: srli a1, t0, 48
; RV64I-NEXT: sb a1, 22(a2)
-; RV64I-NEXT: srli a1, a7, 40
+; RV64I-NEXT: srli a1, t0, 40
; RV64I-NEXT: sb a1, 21(a2)
-; RV64I-NEXT: srli a1, a7, 32
+; RV64I-NEXT: srli a1, t0, 32
; RV64I-NEXT: sb a1, 20(a2)
; RV64I-NEXT: srli a1, a4, 56
; RV64I-NEXT: sb a1, 7(a2)
@@ -4963,77 +4963,77 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu t1, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli t1, t1, 24
-; RV32I-NEXT: or a7, t1, t0
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: lbu a7, 17(a0)
+; RV32I-NEXT: or a6, t1, a6
; RV32I-NEXT: lbu t0, 16(a0)
-; RV32I-NEXT: lbu t1, 18(a0)
+; RV32I-NEXT: lbu t1, 17(a0)
+; RV32I-NEXT: or a6, a6, a7
+; RV32I-NEXT: lbu a7, 18(a0)
; RV32I-NEXT: lbu t2, 19(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or a7, t0, a7
-; RV32I-NEXT: lbu t0, 21(a0)
+; RV32I-NEXT: or a7, t2, a7
; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 22(a0)
+; RV32I-NEXT: lbu t2, 21(a0)
+; RV32I-NEXT: or a7, a7, t0
+; RV32I-NEXT: lbu t0, 22(a0)
; RV32I-NEXT: lbu t3, 23(a0)
-; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: or t0, t0, t1
-; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli t2, t2, 8
+; RV32I-NEXT: or t1, t2, t1
+; RV32I-NEXT: slli t0, t0, 16
; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t3, t2
-; RV32I-NEXT: or t0, t1, t0
-; RV32I-NEXT: lbu t1, 25(a0)
+; RV32I-NEXT: or t0, t3, t0
; RV32I-NEXT: lbu t2, 24(a0)
-; RV32I-NEXT: lbu t3, 26(a0)
+; RV32I-NEXT: lbu t3, 25(a0)
+; RV32I-NEXT: or t0, t0, t1
+; RV32I-NEXT: lbu t1, 26(a0)
; RV32I-NEXT: lbu t4, 27(a0)
-; RV32I-NEXT: slli t1, t1, 8
-; RV32I-NEXT: or t1, t1, t2
-; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t3, t3, 8
+; RV32I-NEXT: or t2, t3, t2
+; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: or t1, t2, t1
-; RV32I-NEXT: lbu t2, 29(a0)
+; RV32I-NEXT: or t1, t4, t1
; RV32I-NEXT: lbu t3, 28(a0)
-; RV32I-NEXT: lbu t4, 30(a0)
+; RV32I-NEXT: lbu t4, 29(a0)
+; RV32I-NEXT: or t1, t1, t2
+; RV32I-NEXT: lbu t2, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: or t2, t2, t3
-; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: slli t2, t2, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or t3, a0, t4
-; RV32I-NEXT: or t2, t3, t2
+; RV32I-NEXT: or t2, a0, t2
+; RV32I-NEXT: or t2, t2, t3
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: sw a0, 60(sp)
@@ -5055,64 +5055,64 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV32I-NEXT: slli a1, a1, 2
; RV32I-NEXT: andi a1, a1, 28
; RV32I-NEXT: mv a0, sp
-; RV32I-NEXT: add a3, a0, a1
-; RV32I-NEXT: lw a0, 4(a3)
-; RV32I-NEXT: lw a1, 0(a3)
-; RV32I-NEXT: lw a4, 12(a3)
-; RV32I-NEXT: lw a5, 8(a3)
-; RV32I-NEXT: lw a6, 24(a3)
-; RV32I-NEXT: lw a7, 28(a3)
-; RV32I-NEXT: lw t0, 16(a3)
-; RV32I-NEXT: lw a3, 20(a3)
-; RV32I-NEXT: sb a6, 24(a2)
-; RV32I-NEXT: sb a7, 28(a2)
-; RV32I-NEXT: sb t0, 16(a2)
-; RV32I-NEXT: sb a3, 20(a2)
-; RV32I-NEXT: sb a5, 8(a2)
-; RV32I-NEXT: sb a4, 12(a2)
-; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: lw a3, 16(a1)
+; RV32I-NEXT: lw a4, 20(a1)
+; RV32I-NEXT: lw a5, 24(a1)
+; RV32I-NEXT: lw a6, 28(a1)
+; RV32I-NEXT: lw a7, 0(a1)
+; RV32I-NEXT: lw a0, 4(a1)
+; RV32I-NEXT: lw t0, 8(a1)
+; RV32I-NEXT: lw a1, 12(a1)
+; RV32I-NEXT: sb a5, 24(a2)
+; RV32I-NEXT: sb a6, 28(a2)
+; RV32I-NEXT: sb a3, 16(a2)
+; RV32I-NEXT: sb a4, 20(a2)
+; RV32I-NEXT: sb t0, 8(a2)
+; RV32I-NEXT: sb a1, 12(a2)
+; RV32I-NEXT: sb a7, 0(a2)
; RV32I-NEXT: sb a0, 4(a2)
-; RV32I-NEXT: srli t1, a6, 24
+; RV32I-NEXT: srli t1, a5, 24
; RV32I-NEXT: sb t1, 27(a2)
-; RV32I-NEXT: srli t1, a6, 16
+; RV32I-NEXT: srli t1, a5, 16
; RV32I-NEXT: sb t1, 26(a2)
-; RV32I-NEXT: srli a6, a6, 8
-; RV32I-NEXT: sb a6, 25(a2)
-; RV32I-NEXT: srli a6, a7, 24
-; RV32I-NEXT: sb a6, 31(a2)
-; RV32I-NEXT: srli a6, a7, 16
-; RV32I-NEXT: sb a6, 30(a2)
-; RV32I-NEXT: srli a6, a7, 8
-; RV32I-NEXT: sb a6, 29(a2)
-; RV32I-NEXT: srli a6, t0, 24
-; RV32I-NEXT: sb a6, 19(a2)
-; RV32I-NEXT: srli a6, t0, 16
-; RV32I-NEXT: sb a6, 18(a2)
-; RV32I-NEXT: srli a6, t0, 8
-; RV32I-NEXT: sb a6, 17(a2)
-; RV32I-NEXT: srli a6, a3, 24
-; RV32I-NEXT: sb a6, 23(a2)
-; RV32I-NEXT: srli a6, a3, 16
-; RV32I-NEXT: sb a6, 22(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 21(a2)
-; RV32I-NEXT: srli a3, a5, 24
-; RV32I-NEXT: sb a3, 11(a2)
-; RV32I-NEXT: srli a3, a5, 16
-; RV32I-NEXT: sb a3, 10(a2)
; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 9(a2)
+; RV32I-NEXT: sb a5, 25(a2)
+; RV32I-NEXT: srli a5, a6, 24
+; RV32I-NEXT: sb a5, 31(a2)
+; RV32I-NEXT: srli a5, a6, 16
+; RV32I-NEXT: sb a5, 30(a2)
+; RV32I-NEXT: srli a5, a6, 8
+; RV32I-NEXT: sb a5, 29(a2)
+; RV32I-NEXT: srli a5, a3, 24
+; RV32I-NEXT: sb a5, 19(a2)
+; RV32I-NEXT: srli a5, a3, 16
+; RV32I-NEXT: sb a5, 18(a2)
+; RV32I-NEXT: srli a3, a3, 8
+; RV32I-NEXT: sb a3, 17(a2)
; RV32I-NEXT: srli a3, a4, 24
-; RV32I-NEXT: sb a3, 15(a2)
+; RV32I-NEXT: sb a3, 23(a2)
; RV32I-NEXT: srli a3, a4, 16
-; RV32I-NEXT: sb a3, 14(a2)
+; RV32I-NEXT: sb a3, 22(a2)
; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 13(a2)
+; RV32I-NEXT: sb a4, 21(a2)
+; RV32I-NEXT: srli a3, t0, 24
+; RV32I-NEXT: sb a3, 11(a2)
+; RV32I-NEXT: srli a3, t0, 16
+; RV32I-NEXT: sb a3, 10(a2)
+; RV32I-NEXT: srli a3, t0, 8
+; RV32I-NEXT: sb a3, 9(a2)
; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 3(a2)
+; RV32I-NEXT: sb a3, 15(a2)
; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 2(a2)
+; RV32I-NEXT: sb a3, 14(a2)
; RV32I-NEXT: srli a1, a1, 8
+; RV32I-NEXT: sb a1, 13(a2)
+; RV32I-NEXT: srli a1, a7, 24
+; RV32I-NEXT: sb a1, 3(a2)
+; RV32I-NEXT: srli a1, a7, 16
+; RV32I-NEXT: sb a1, 2(a2)
+; RV32I-NEXT: srli a1, a7, 8
; RV32I-NEXT: sb a1, 1(a2)
; RV32I-NEXT: srli a1, a0, 24
; RV32I-NEXT: sb a1, 7(a2)
@@ -5143,83 +5143,83 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a7, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 9(a0)
; RV64I-NEXT: lbu a5, 8(a0)
-; RV64I-NEXT: lbu a6, 10(a0)
+; RV64I-NEXT: lbu a6, 9(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 10(a0)
; RV64I-NEXT: lbu a7, 11(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 13(a0)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 12(a0)
-; RV64I-NEXT: lbu a7, 14(a0)
+; RV64I-NEXT: lbu a7, 13(a0)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 14(a0)
; RV64I-NEXT: lbu t0, 15(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: or a5, t0, a5
+; RV64I-NEXT: or a5, a5, a6
; RV64I-NEXT: slli a5, a5, 32
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 17(a0)
; RV64I-NEXT: lbu a6, 16(a0)
-; RV64I-NEXT: lbu a7, 18(a0)
+; RV64I-NEXT: lbu a7, 17(a0)
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: lbu a5, 18(a0)
; RV64I-NEXT: lbu t0, 19(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 21(a0)
+; RV64I-NEXT: or a5, t0, a5
; RV64I-NEXT: lbu a7, 20(a0)
-; RV64I-NEXT: lbu t0, 22(a0)
+; RV64I-NEXT: lbu t0, 21(a0)
+; RV64I-NEXT: or a5, a5, a6
+; RV64I-NEXT: lbu a6, 22(a0)
; RV64I-NEXT: lbu t1, 23(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: or a6, t1, a6
+; RV64I-NEXT: or a6, a6, a7
; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 25(a0)
; RV64I-NEXT: lbu a7, 24(a0)
-; RV64I-NEXT: lbu t0, 26(a0)
+; RV64I-NEXT: lbu t0, 25(a0)
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: lbu a6, 26(a0)
; RV64I-NEXT: lbu t1, 27(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 29(a0)
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 28(a0)
-; RV64I-NEXT: lbu t1, 30(a0)
+; RV64I-NEXT: lbu t1, 29(a0)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 30(a0)
; RV64I-NEXT: lbu a0, 31(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a7, a0, 32
; RV64I-NEXT: or a6, a7, a6
; RV64I-NEXT: lbu a1, 0(a1)
@@ -5315,77 +5315,77 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu t1, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli t1, t1, 24
-; RV32I-NEXT: or a7, t1, t0
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: lbu a7, 17(a0)
+; RV32I-NEXT: or a6, t1, a6
; RV32I-NEXT: lbu t0, 16(a0)
-; RV32I-NEXT: lbu t1, 18(a0)
+; RV32I-NEXT: lbu t1, 17(a0)
+; RV32I-NEXT: or a6, a6, a7
+; RV32I-NEXT: lbu a7, 18(a0)
; RV32I-NEXT: lbu t2, 19(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or a7, t0, a7
-; RV32I-NEXT: lbu t0, 21(a0)
+; RV32I-NEXT: or a7, t2, a7
; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 22(a0)
+; RV32I-NEXT: lbu t2, 21(a0)
+; RV32I-NEXT: or a7, a7, t0
+; RV32I-NEXT: lbu t0, 22(a0)
; RV32I-NEXT: lbu t3, 23(a0)
-; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: or t0, t0, t1
-; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli t2, t2, 8
+; RV32I-NEXT: or t1, t2, t1
+; RV32I-NEXT: slli t0, t0, 16
; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t3, t2
-; RV32I-NEXT: or t0, t1, t0
-; RV32I-NEXT: lbu t1, 25(a0)
+; RV32I-NEXT: or t0, t3, t0
; RV32I-NEXT: lbu t2, 24(a0)
-; RV32I-NEXT: lbu t3, 26(a0)
+; RV32I-NEXT: lbu t3, 25(a0)
+; RV32I-NEXT: or t0, t0, t1
+; RV32I-NEXT: lbu t1, 26(a0)
; RV32I-NEXT: lbu t4, 27(a0)
-; RV32I-NEXT: slli t1, t1, 8
-; RV32I-NEXT: or t1, t1, t2
-; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t3, t3, 8
+; RV32I-NEXT: or t2, t3, t2
+; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: or t1, t2, t1
-; RV32I-NEXT: lbu t2, 29(a0)
+; RV32I-NEXT: or t1, t4, t1
; RV32I-NEXT: lbu t3, 28(a0)
-; RV32I-NEXT: lbu t4, 30(a0)
+; RV32I-NEXT: lbu t4, 29(a0)
+; RV32I-NEXT: or t1, t1, t2
+; RV32I-NEXT: lbu t2, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: or t2, t2, t3
-; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: slli t2, t2, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or t3, a0, t4
-; RV32I-NEXT: or t2, t3, t2
+; RV32I-NEXT: or t2, a0, t2
+; RV32I-NEXT: or t2, t2, t3
; RV32I-NEXT: lbu a1, 0(a1)
; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: sw a0, 60(sp)
@@ -5407,64 +5407,64 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV32I-NEXT: slli a1, a1, 3
; RV32I-NEXT: andi a1, a1, 24
; RV32I-NEXT: mv a0, sp
-; RV32I-NEXT: add a3, a0, a1
-; RV32I-NEXT: lw a0, 4(a3)
-; RV32I-NEXT: lw a1, 0(a3)
-; RV32I-NEXT: lw a4, 12(a3)
-; RV32I-NEXT: lw a5, 8(a3)
-; RV32I-NEXT: lw a6, 24(a3)
-; RV32I-NEXT: lw a7, 28(a3)
-; RV32I-NEXT: lw t0, 16(a3)
-; RV32I-NEXT: lw a3, 20(a3)
-; RV32I-NEXT: sb a6, 24(a2)
-; RV32I-NEXT: sb a7, 28(a2)
-; RV32I-NEXT: sb t0, 16(a2)
-; RV32I-NEXT: sb a3, 20(a2)
-; RV32I-NEXT: sb a5, 8(a2)
-; RV32I-NEXT: sb a4, 12(a2)
-; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: add a1, a0, a1
+; RV32I-NEXT: lw a3, 16(a1)
+; RV32I-NEXT: lw a4, 20(a1)
+; RV32I-NEXT: lw a5, 24(a1)
+; RV32I-NEXT: lw a6, 28(a1)
+; RV32I-NEXT: lw a7, 0(a1)
+; RV32I-NEXT: lw a0, 4(a1)
+; RV32I-NEXT: lw t0, 8(a1)
+; RV32I-NEXT: lw a1, 12(a1)
+; RV32I-NEXT: sb a5, 24(a2)
+; RV32I-NEXT: sb a6, 28(a2)
+; RV32I-NEXT: sb a3, 16(a2)
+; RV32I-NEXT: sb a4, 20(a2)
+; RV32I-NEXT: sb t0, 8(a2)
+; RV32I-NEXT: sb a1, 12(a2)
+; RV32I-NEXT: sb a7, 0(a2)
; RV32I-NEXT: sb a0, 4(a2)
-; RV32I-NEXT: srli t1, a6, 24
+; RV32I-NEXT: srli t1, a5, 24
; RV32I-NEXT: sb t1, 27(a2)
-; RV32I-NEXT: srli t1, a6, 16
+; RV32I-NEXT: srli t1, a5, 16
; RV32I-NEXT: sb t1, 26(a2)
-; RV32I-NEXT: srli a6, a6, 8
-; RV32I-NEXT: sb a6, 25(a2)
-; RV32I-NEXT: srli a6, a7, 24
-; RV32I-NEXT: sb a6, 31(a2)
-; RV32I-NEXT: srli a6, a7, 16
-; RV32I-NEXT: sb a6, 30(a2)
-; RV32I-NEXT: srli a6, a7, 8
-; RV32I-NEXT: sb a6, 29(a2)
-; RV32I-NEXT: srli a6, t0, 24
-; RV32I-NEXT: sb a6, 19(a2)
-; RV32I-NEXT: srli a6, t0, 16
-; RV32I-NEXT: sb a6, 18(a2)
-; RV32I-NEXT: srli a6, t0, 8
-; RV32I-NEXT: sb a6, 17(a2)
-; RV32I-NEXT: srli a6, a3, 24
-; RV32I-NEXT: sb a6, 23(a2)
-; RV32I-NEXT: srli a6, a3, 16
-; RV32I-NEXT: sb a6, 22(a2)
-; RV32I-NEXT: srli a3, a3, 8
-; RV32I-NEXT: sb a3, 21(a2)
-; RV32I-NEXT: srli a3, a5, 24
-; RV32I-NEXT: sb a3, 11(a2)
-; RV32I-NEXT: srli a3, a5, 16
-; RV32I-NEXT: sb a3, 10(a2)
; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 9(a2)
+; RV32I-NEXT: sb a5, 25(a2)
+; RV32I-NEXT: srli a5, a6, 24
+; RV32I-NEXT: sb a5, 31(a2)
+; RV32I-NEXT: srli a5, a6, 16
+; RV32I-NEXT: sb a5, 30(a2)
+; RV32I-NEXT: srli a5, a6, 8
+; RV32I-NEXT: sb a5, 29(a2)
+; RV32I-NEXT: srli a5, a3, 24
+; RV32I-NEXT: sb a5, 19(a2)
+; RV32I-NEXT: srli a5, a3, 16
+; RV32I-NEXT: sb a5, 18(a2)
+; RV32I-NEXT: srli a3, a3, 8
+; RV32I-NEXT: sb a3, 17(a2)
; RV32I-NEXT: srli a3, a4, 24
-; RV32I-NEXT: sb a3, 15(a2)
+; RV32I-NEXT: sb a3, 23(a2)
; RV32I-NEXT: srli a3, a4, 16
-; RV32I-NEXT: sb a3, 14(a2)
+; RV32I-NEXT: sb a3, 22(a2)
; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 13(a2)
+; RV32I-NEXT: sb a4, 21(a2)
+; RV32I-NEXT: srli a3, t0, 24
+; RV32I-NEXT: sb a3, 11(a2)
+; RV32I-NEXT: srli a3, t0, 16
+; RV32I-NEXT: sb a3, 10(a2)
+; RV32I-NEXT: srli a3, t0, 8
+; RV32I-NEXT: sb a3, 9(a2)
; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 3(a2)
+; RV32I-NEXT: sb a3, 15(a2)
; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 2(a2)
+; RV32I-NEXT: sb a3, 14(a2)
; RV32I-NEXT: srli a1, a1, 8
+; RV32I-NEXT: sb a1, 13(a2)
+; RV32I-NEXT: srli a1, a7, 24
+; RV32I-NEXT: sb a1, 3(a2)
+; RV32I-NEXT: srli a1, a7, 16
+; RV32I-NEXT: sb a1, 2(a2)
+; RV32I-NEXT: srli a1, a7, 8
; RV32I-NEXT: sb a1, 1(a2)
; RV32I-NEXT: srli a1, a0, 24
; RV32I-NEXT: sb a1, 7(a2)
diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll
index 7e879b137b4f0d..190d67a5d8c118 100644
--- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll
+++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll
@@ -8,8 +8,8 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: lbu a3, 1(a0)
; RV64I-NEXT: lbu a4, 0(a0)
; RV64I-NEXT: lbu a5, 2(a0)
-; RV64I-NEXT: slli a3, a3, 8
; RV64I-NEXT: lb a0, 3(a0)
+; RV64I-NEXT: slli a3, a3, 8
; RV64I-NEXT: or a3, a3, a4
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: lbu a1, 0(a1)
@@ -37,17 +37,17 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: or a0, a0, a5
-; RV32I-NEXT: or a0, a0, a3
-; RV32I-NEXT: lbu a3, 1(a1)
; RV32I-NEXT: lbu a4, 0(a1)
-; RV32I-NEXT: lbu a5, 2(a1)
+; RV32I-NEXT: lbu a5, 1(a1)
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: lbu a3, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a3, a3, a4
-; RV32I-NEXT: slli a5, a5, 16
+; RV32I-NEXT: slli a5, a5, 8
+; RV32I-NEXT: or a4, a5, a4
+; RV32I-NEXT: slli a3, a3, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, a5
; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: srl a0, a0, a1
; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: srli a1, a0, 16
@@ -69,8 +69,8 @@ define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: lbu a3, 1(a0)
; RV64I-NEXT: lbu a4, 0(a0)
; RV64I-NEXT: lbu a5, 2(a0)
-; RV64I-NEXT: slli a3, a3, 8
; RV64I-NEXT: lb a0, 3(a0)
+; RV64I-NEXT: slli a3, a3, 8
; RV64I-NEXT: or a3, a3, a4
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: lbu a1, 0(a1)
@@ -98,17 +98,17 @@ define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: or a0, a0, a5
-; RV32I-NEXT: or a0, a0, a3
-; RV32I-NEXT: lbu a3, 1(a1)
; RV32I-NEXT: lbu a4, 0(a1)
-; RV32I-NEXT: lbu a5, 2(a1)
+; RV32I-NEXT: lbu a5, 1(a1)
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: lbu a3, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a3, a3, a4
-; RV32I-NEXT: slli a5, a5, 16
+; RV32I-NEXT: slli a5, a5, 8
+; RV32I-NEXT: or a4, a5, a4
+; RV32I-NEXT: slli a3, a3, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, a5
; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: sll a0, a0, a1
; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: srli a1, a0, 16
@@ -130,8 +130,8 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: lbu a3, 1(a0)
; RV64I-NEXT: lbu a4, 0(a0)
; RV64I-NEXT: lbu a5, 2(a0)
-; RV64I-NEXT: slli a3, a3, 8
; RV64I-NEXT: lb a0, 3(a0)
+; RV64I-NEXT: slli a3, a3, 8
; RV64I-NEXT: or a3, a3, a4
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: lbu a1, 0(a1)
@@ -159,17 +159,17 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a0, a0, 24
; RV32I-NEXT: or a0, a0, a5
-; RV32I-NEXT: or a0, a0, a3
-; RV32I-NEXT: lbu a3, 1(a1)
; RV32I-NEXT: lbu a4, 0(a1)
-; RV32I-NEXT: lbu a5, 2(a1)
+; RV32I-NEXT: lbu a5, 1(a1)
+; RV32I-NEXT: or a0, a0, a3
+; RV32I-NEXT: lbu a3, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a3, a3, 8
-; RV32I-NEXT: or a3, a3, a4
-; RV32I-NEXT: slli a5, a5, 16
+; RV32I-NEXT: slli a5, a5, 8
+; RV32I-NEXT: or a4, a5, a4
+; RV32I-NEXT: slli a3, a3, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, a5
; RV32I-NEXT: or a1, a1, a3
+; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: sra a0, a0, a1
; RV32I-NEXT: sb a0, 0(a2)
; RV32I-NEXT: srli a1, a0, 16
@@ -198,39 +198,39 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: or a0, a0, a4
+; RV64I-NEXT: or a0, a0, a5
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: or a0, a0, a3
-; RV64I-NEXT: lbu a3, 1(a1)
; RV64I-NEXT: lbu a4, 0(a1)
-; RV64I-NEXT: lbu a5, 2(a1)
+; RV64I-NEXT: lbu a5, 1(a1)
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: lbu a3, 2(a1)
; RV64I-NEXT: lbu a6, 3(a1)
-; RV64I-NEXT: slli a3, a3, 8
-; RV64I-NEXT: or a3, a3, a4
-; RV64I-NEXT: slli a5, a5, 16
+; RV64I-NEXT: slli a5, a5, 8
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: slli a3, a3, 16
; RV64I-NEXT: slli a6, a6, 24
-; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a1)
+; RV64I-NEXT: or a3, a6, a3
; RV64I-NEXT: lbu a5, 4(a1)
-; RV64I-NEXT: lbu a6, 6(a1)
+; RV64I-NEXT: lbu a6, 5(a1)
+; RV64I-NEXT: or a3, a3, a4
+; RV64I-NEXT: lbu a4, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: or a1, a1, a4
+; RV64I-NEXT: or a1, a1, a5
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a3
; RV64I-NEXT: srl a0, a0, a1
@@ -262,17 +262,17 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 1(a1)
; RV32I-NEXT: lbu a5, 0(a1)
-; RV32I-NEXT: lbu a6, 2(a1)
+; RV32I-NEXT: lbu a6, 1(a1)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a5, a1, a6
-; RV32I-NEXT: or a5, a5, a4
+; RV32I-NEXT: or a1, a1, a4
+; RV32I-NEXT: or a5, a1, a5
; RV32I-NEXT: addi a4, a5, -32
; RV32I-NEXT: srl a1, a3, a5
; RV32I-NEXT: bltz a4, .LBB3_2
@@ -331,39 +331,39 @@ define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: or a0, a0, a4
+; RV64I-NEXT: or a0, a0, a5
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: or a0, a0, a3
-; RV64I-NEXT: lbu a3, 1(a1)
; RV64I-NEXT: lbu a4, 0(a1)
-; RV64I-NEXT: lbu a5, 2(a1)
+; RV64I-NEXT: lbu a5, 1(a1)
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: lbu a3, 2(a1)
; RV64I-NEXT: lbu a6, 3(a1)
-; RV64I-NEXT: slli a3, a3, 8
-; RV64I-NEXT: or a3, a3, a4
-; RV64I-NEXT: slli a5, a5, 16
+; RV64I-NEXT: slli a5, a5, 8
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: slli a3, a3, 16
; RV64I-NEXT: slli a6, a6, 24
-; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a1)
+; RV64I-NEXT: or a3, a6, a3
; RV64I-NEXT: lbu a5, 4(a1)
-; RV64I-NEXT: lbu a6, 6(a1)
+; RV64I-NEXT: lbu a6, 5(a1)
+; RV64I-NEXT: or a3, a3, a4
+; RV64I-NEXT: lbu a4, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: or a1, a1, a4
+; RV64I-NEXT: or a1, a1, a5
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a3
; RV64I-NEXT: sll a0, a0, a1
@@ -395,17 +395,17 @@ define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 1(a1)
; RV32I-NEXT: lbu a5, 0(a1)
-; RV32I-NEXT: lbu a6, 2(a1)
+; RV32I-NEXT: lbu a6, 1(a1)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a5, a1, a6
-; RV32I-NEXT: or a5, a5, a4
+; RV32I-NEXT: or a1, a1, a4
+; RV32I-NEXT: or a5, a1, a5
; RV32I-NEXT: addi a4, a5, -32
; RV32I-NEXT: sll a1, a3, a5
; RV32I-NEXT: bltz a4, .LBB4_2
@@ -464,39 +464,39 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: or a0, a0, a4
+; RV64I-NEXT: or a0, a0, a5
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: or a0, a0, a3
-; RV64I-NEXT: lbu a3, 1(a1)
; RV64I-NEXT: lbu a4, 0(a1)
-; RV64I-NEXT: lbu a5, 2(a1)
+; RV64I-NEXT: lbu a5, 1(a1)
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: lbu a3, 2(a1)
; RV64I-NEXT: lbu a6, 3(a1)
-; RV64I-NEXT: slli a3, a3, 8
-; RV64I-NEXT: or a3, a3, a4
-; RV64I-NEXT: slli a5, a5, 16
+; RV64I-NEXT: slli a5, a5, 8
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: slli a3, a3, 16
; RV64I-NEXT: slli a6, a6, 24
-; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a1)
+; RV64I-NEXT: or a3, a6, a3
; RV64I-NEXT: lbu a5, 4(a1)
-; RV64I-NEXT: lbu a6, 6(a1)
+; RV64I-NEXT: lbu a6, 5(a1)
+; RV64I-NEXT: or a3, a3, a4
+; RV64I-NEXT: lbu a4, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: or a1, a1, a4
+; RV64I-NEXT: or a1, a1, a5
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a3
; RV64I-NEXT: sra a0, a0, a1
@@ -528,17 +528,17 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a4, a6, 24
; RV32I-NEXT: or a5, a4, a5
-; RV32I-NEXT: or a3, a5, a3
-; RV32I-NEXT: lbu a5, 1(a1)
; RV32I-NEXT: lbu a6, 0(a1)
-; RV32I-NEXT: lbu a7, 2(a1)
+; RV32I-NEXT: lbu a7, 1(a1)
+; RV32I-NEXT: or a3, a5, a3
+; RV32I-NEXT: lbu a5, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, a7
-; RV32I-NEXT: or a5, a1, a5
+; RV32I-NEXT: or a1, a1, a5
+; RV32I-NEXT: or a5, a1, a6
; RV32I-NEXT: addi a6, a5, -32
; RV32I-NEXT: sra a1, a3, a5
; RV32I-NEXT: bltz a6, .LBB5_2
@@ -598,39 +598,39 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 13(a0)
; RV64I-NEXT: lbu a5, 12(a0)
-; RV64I-NEXT: lbu a6, 14(a0)
+; RV64I-NEXT: lbu a6, 13(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 14(a0)
; RV64I-NEXT: lbu a7, 15(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 1(a1)
; RV64I-NEXT: lbu a5, 0(a1)
-; RV64I-NEXT: lbu a6, 2(a1)
+; RV64I-NEXT: lbu a6, 1(a1)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 2(a1)
; RV64I-NEXT: lbu a7, 3(a1)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 5(a1)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 4(a1)
-; RV64I-NEXT: lbu a7, 6(a1)
+; RV64I-NEXT: lbu a7, 5(a1)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, a7
; RV64I-NEXT: or a1, a1, a5
+; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a5, a1, a4
; RV64I-NEXT: addi a4, a5, -64
@@ -649,17 +649,17 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli t1, t1, 24
; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 5(a0)
; RV64I-NEXT: lbu t0, 4(a0)
-; RV64I-NEXT: lbu t1, 6(a0)
+; RV64I-NEXT: lbu t1, 5(a0)
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: lbu a7, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: srl a0, a0, a5
@@ -714,46 +714,46 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu a0, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a0, a0, t0
; RV32I-NEXT: or a0, a0, a6
-; RV32I-NEXT: lbu a6, 1(a1)
-; RV32I-NEXT: lbu a7, 0(a1)
-; RV32I-NEXT: lbu t0, 2(a1)
+; RV32I-NEXT: lbu a6, 0(a1)
+; RV32I-NEXT: lbu t0, 1(a1)
+; RV32I-NEXT: or a0, a0, a7
+; RV32I-NEXT: lbu a7, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a6, t0, a6
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, t0
+; RV32I-NEXT: or a1, a1, a7
; RV32I-NEXT: or a1, a1, a6
; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw zero, 24(sp)
@@ -768,23 +768,23 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: mv a3, sp
; RV32I-NEXT: add a0, a3, a0
; RV32I-NEXT: lw a3, 4(a0)
-; RV32I-NEXT: srl a4, a3, a1
+; RV32I-NEXT: lw a4, 0(a0)
; RV32I-NEXT: lw a5, 8(a0)
-; RV32I-NEXT: andi a6, a1, 31
-; RV32I-NEXT: xori a6, a6, 31
-; RV32I-NEXT: lw a7, 0(a0)
+; RV32I-NEXT: lw a0, 12(a0)
+; RV32I-NEXT: srl a6, a3, a1
+; RV32I-NEXT: andi a7, a1, 31
+; RV32I-NEXT: xori a7, a7, 31
; RV32I-NEXT: slli t0, a5, 1
-; RV32I-NEXT: sll t0, t0, a6
-; RV32I-NEXT: or a4, a4, t0
-; RV32I-NEXT: srl a7, a7, a1
+; RV32I-NEXT: sll t0, t0, a7
+; RV32I-NEXT: or a6, a6, t0
+; RV32I-NEXT: srl a4, a4, a1
; RV32I-NEXT: slli a3, a3, 1
-; RV32I-NEXT: lw a0, 12(a0)
-; RV32I-NEXT: sll a3, a3, a6
-; RV32I-NEXT: or a3, a7, a3
-; RV32I-NEXT: srl a5, a5, a1
-; RV32I-NEXT: slli a7, a0, 1
-; RV32I-NEXT: sll a6, a7, a6
-; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: sll a3, a3, a7
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: srl a4, a5, a1
+; RV32I-NEXT: slli a5, a0, 1
+; RV32I-NEXT: sll a5, a5, a7
+; RV32I-NEXT: or a4, a4, a5
; RV32I-NEXT: srl a0, a0, a1
; RV32I-NEXT: sb a0, 12(a2)
; RV32I-NEXT: srli a1, a0, 16
@@ -793,27 +793,27 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sb a1, 15(a2)
; RV32I-NEXT: srli a0, a0, 8
; RV32I-NEXT: sb a0, 13(a2)
-; RV32I-NEXT: sb a5, 8(a2)
+; RV32I-NEXT: sb a4, 8(a2)
; RV32I-NEXT: sb a3, 0(a2)
-; RV32I-NEXT: sb a4, 4(a2)
-; RV32I-NEXT: srli a0, a5, 16
+; RV32I-NEXT: sb a6, 4(a2)
+; RV32I-NEXT: srli a0, a4, 16
; RV32I-NEXT: sb a0, 10(a2)
-; RV32I-NEXT: srli a0, a5, 24
+; RV32I-NEXT: srli a0, a4, 24
; RV32I-NEXT: sb a0, 11(a2)
-; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 9(a2)
+; RV32I-NEXT: srli a4, a4, 8
+; RV32I-NEXT: sb a4, 9(a2)
; RV32I-NEXT: srli a0, a3, 16
; RV32I-NEXT: sb a0, 2(a2)
; RV32I-NEXT: srli a0, a3, 24
; RV32I-NEXT: sb a0, 3(a2)
; RV32I-NEXT: srli a3, a3, 8
; RV32I-NEXT: sb a3, 1(a2)
-; RV32I-NEXT: srli a0, a4, 16
+; RV32I-NEXT: srli a0, a6, 16
; RV32I-NEXT: sb a0, 6(a2)
-; RV32I-NEXT: srli a0, a4, 24
+; RV32I-NEXT: srli a0, a6, 24
; RV32I-NEXT: sb a0, 7(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 5(a2)
+; RV32I-NEXT: srli a0, a6, 8
+; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
@@ -834,39 +834,39 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a7, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 1(a1)
; RV64I-NEXT: lbu a5, 0(a1)
-; RV64I-NEXT: lbu a6, 2(a1)
+; RV64I-NEXT: lbu a6, 1(a1)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 2(a1)
; RV64I-NEXT: lbu a7, 3(a1)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 5(a1)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 4(a1)
-; RV64I-NEXT: lbu a7, 6(a1)
+; RV64I-NEXT: lbu a7, 5(a1)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, a7
; RV64I-NEXT: or a1, a1, a5
+; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a5, a1, a4
; RV64I-NEXT: addi a4, a5, -64
@@ -885,17 +885,17 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli t1, t1, 24
; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 13(a0)
; RV64I-NEXT: lbu t0, 12(a0)
-; RV64I-NEXT: lbu t1, 14(a0)
+; RV64I-NEXT: lbu t1, 13(a0)
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: lbu a7, 14(a0)
; RV64I-NEXT: lbu a0, 15(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a6
; RV64I-NEXT: sll a0, a0, a5
@@ -950,46 +950,46 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu a0, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a0, a0, t0
; RV32I-NEXT: or a0, a0, a6
-; RV32I-NEXT: lbu a6, 1(a1)
-; RV32I-NEXT: lbu a7, 0(a1)
-; RV32I-NEXT: lbu t0, 2(a1)
+; RV32I-NEXT: lbu a6, 0(a1)
+; RV32I-NEXT: lbu t0, 1(a1)
+; RV32I-NEXT: or a0, a0, a7
+; RV32I-NEXT: lbu a7, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a6, t0, a6
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, t0
+; RV32I-NEXT: or a1, a1, a7
; RV32I-NEXT: or a1, a1, a6
; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: sw zero, 8(sp)
@@ -1005,51 +1005,51 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sub a3, a3, a0
; RV32I-NEXT: lw a0, 4(a3)
; RV32I-NEXT: lw a4, 0(a3)
-; RV32I-NEXT: sll a5, a0, a1
-; RV32I-NEXT: andi a6, a1, 31
-; RV32I-NEXT: xori a6, a6, 31
-; RV32I-NEXT: srli a7, a4, 1
-; RV32I-NEXT: lw t0, 12(a3)
-; RV32I-NEXT: lw a3, 8(a3)
-; RV32I-NEXT: srl a7, a7, a6
-; RV32I-NEXT: or a5, a5, a7
-; RV32I-NEXT: sll a7, t0, a1
-; RV32I-NEXT: srli t0, a3, 1
-; RV32I-NEXT: srl t0, t0, a6
-; RV32I-NEXT: or a7, a7, t0
+; RV32I-NEXT: lw a5, 8(a3)
+; RV32I-NEXT: lw a3, 12(a3)
+; RV32I-NEXT: sll a6, a0, a1
+; RV32I-NEXT: andi a7, a1, 31
+; RV32I-NEXT: xori a7, a7, 31
+; RV32I-NEXT: srli t0, a4, 1
+; RV32I-NEXT: srl t0, t0, a7
+; RV32I-NEXT: or a6, a6, t0
; RV32I-NEXT: sll a3, a3, a1
+; RV32I-NEXT: srli t0, a5, 1
+; RV32I-NEXT: srl t0, t0, a7
+; RV32I-NEXT: or a3, a3, t0
+; RV32I-NEXT: sll a5, a5, a1
; RV32I-NEXT: srli a0, a0, 1
-; RV32I-NEXT: srl a0, a0, a6
-; RV32I-NEXT: or a0, a3, a0
+; RV32I-NEXT: srl a0, a0, a7
+; RV32I-NEXT: or a0, a5, a0
; RV32I-NEXT: sll a1, a4, a1
; RV32I-NEXT: sb a1, 0(a2)
-; RV32I-NEXT: srli a3, a1, 16
-; RV32I-NEXT: sb a3, 2(a2)
-; RV32I-NEXT: srli a3, a1, 24
-; RV32I-NEXT: sb a3, 3(a2)
+; RV32I-NEXT: srli a4, a1, 16
+; RV32I-NEXT: sb a4, 2(a2)
+; RV32I-NEXT: srli a4, a1, 24
+; RV32I-NEXT: sb a4, 3(a2)
; RV32I-NEXT: srli a1, a1, 8
; RV32I-NEXT: sb a1, 1(a2)
; RV32I-NEXT: sb a0, 8(a2)
-; RV32I-NEXT: sb a7, 12(a2)
-; RV32I-NEXT: sb a5, 4(a2)
+; RV32I-NEXT: sb a3, 12(a2)
+; RV32I-NEXT: sb a6, 4(a2)
; RV32I-NEXT: srli a1, a0, 16
; RV32I-NEXT: sb a1, 10(a2)
; RV32I-NEXT: srli a1, a0, 24
; RV32I-NEXT: sb a1, 11(a2)
; RV32I-NEXT: srli a0, a0, 8
; RV32I-NEXT: sb a0, 9(a2)
-; RV32I-NEXT: srli a0, a7, 16
+; RV32I-NEXT: srli a0, a3, 16
; RV32I-NEXT: sb a0, 14(a2)
-; RV32I-NEXT: srli a0, a7, 24
+; RV32I-NEXT: srli a0, a3, 24
; RV32I-NEXT: sb a0, 15(a2)
-; RV32I-NEXT: srli a0, a7, 8
-; RV32I-NEXT: sb a0, 13(a2)
-; RV32I-NEXT: srli a0, a5, 16
+; RV32I-NEXT: srli a3, a3, 8
+; RV32I-NEXT: sb a3, 13(a2)
+; RV32I-NEXT: srli a0, a6, 16
; RV32I-NEXT: sb a0, 6(a2)
-; RV32I-NEXT: srli a0, a5, 24
+; RV32I-NEXT: srli a0, a6, 24
; RV32I-NEXT: sb a0, 7(a2)
-; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 5(a2)
+; RV32I-NEXT: srli a0, a6, 8
+; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
@@ -1070,39 +1070,39 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 13(a0)
; RV64I-NEXT: lbu a5, 12(a0)
-; RV64I-NEXT: lbu a6, 14(a0)
+; RV64I-NEXT: lbu a6, 13(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 14(a0)
; RV64I-NEXT: lbu a7, 15(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a5, a4, 32
-; RV64I-NEXT: or a3, a5, a3
-; RV64I-NEXT: lbu a5, 1(a1)
; RV64I-NEXT: lbu a6, 0(a1)
-; RV64I-NEXT: lbu a7, 2(a1)
+; RV64I-NEXT: lbu a7, 1(a1)
+; RV64I-NEXT: or a3, a5, a3
+; RV64I-NEXT: lbu a5, 2(a1)
; RV64I-NEXT: lbu t0, 3(a1)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 5(a1)
+; RV64I-NEXT: or a5, t0, a5
; RV64I-NEXT: lbu a7, 4(a1)
-; RV64I-NEXT: lbu t0, 6(a1)
+; RV64I-NEXT: lbu t0, 5(a1)
+; RV64I-NEXT: or a5, a5, a6
+; RV64I-NEXT: lbu a6, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: or a1, a1, a6
+; RV64I-NEXT: or a1, a1, a7
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a5, a1, a5
; RV64I-NEXT: addi a6, a5, -64
@@ -1123,17 +1123,17 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli t0, t0, 24
; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a4, a6, a4
-; RV64I-NEXT: lbu a6, 5(a0)
; RV64I-NEXT: lbu a7, 4(a0)
-; RV64I-NEXT: lbu t0, 6(a0)
+; RV64I-NEXT: lbu t0, 5(a0)
+; RV64I-NEXT: or a4, a6, a4
+; RV64I-NEXT: lbu a6, 6(a0)
; RV64I-NEXT: lbu a0, 7(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: or a0, a0, a6
+; RV64I-NEXT: or a0, a0, a7
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a4
; RV64I-NEXT: srl a0, a0, a5
@@ -1186,47 +1186,47 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu a0, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a7, a0, t0
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: lbu a7, 1(a1)
+; RV32I-NEXT: or a6, a0, a6
; RV32I-NEXT: lbu t0, 0(a1)
-; RV32I-NEXT: lbu t1, 2(a1)
+; RV32I-NEXT: lbu t1, 1(a1)
+; RV32I-NEXT: or a6, a6, a7
+; RV32I-NEXT: lbu a7, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, t1
; RV32I-NEXT: or a1, a1, a7
+; RV32I-NEXT: or a1, a1, t0
; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: sw a0, 24(sp)
@@ -1241,23 +1241,23 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: mv a3, sp
; RV32I-NEXT: add a0, a3, a0
; RV32I-NEXT: lw a3, 4(a0)
-; RV32I-NEXT: srl a4, a3, a1
+; RV32I-NEXT: lw a4, 0(a0)
; RV32I-NEXT: lw a5, 8(a0)
-; RV32I-NEXT: andi a6, a1, 31
-; RV32I-NEXT: xori a6, a6, 31
-; RV32I-NEXT: lw a7, 0(a0)
+; RV32I-NEXT: lw a0, 12(a0)
+; RV32I-NEXT: srl a6, a3, a1
+; RV32I-NEXT: andi a7, a1, 31
+; RV32I-NEXT: xori a7, a7, 31
; RV32I-NEXT: slli t0, a5, 1
-; RV32I-NEXT: sll t0, t0, a6
-; RV32I-NEXT: or a4, a4, t0
-; RV32I-NEXT: srl a7, a7, a1
+; RV32I-NEXT: sll t0, t0, a7
+; RV32I-NEXT: or a6, a6, t0
+; RV32I-NEXT: srl a4, a4, a1
; RV32I-NEXT: slli a3, a3, 1
-; RV32I-NEXT: lw a0, 12(a0)
-; RV32I-NEXT: sll a3, a3, a6
-; RV32I-NEXT: or a3, a7, a3
-; RV32I-NEXT: srl a5, a5, a1
-; RV32I-NEXT: slli a7, a0, 1
-; RV32I-NEXT: sll a6, a7, a6
-; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: sll a3, a3, a7
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: srl a4, a5, a1
+; RV32I-NEXT: slli a5, a0, 1
+; RV32I-NEXT: sll a5, a5, a7
+; RV32I-NEXT: or a4, a4, a5
; RV32I-NEXT: sra a0, a0, a1
; RV32I-NEXT: sb a0, 12(a2)
; RV32I-NEXT: srli a1, a0, 16
@@ -1266,27 +1266,27 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sb a1, 15(a2)
; RV32I-NEXT: srli a0, a0, 8
; RV32I-NEXT: sb a0, 13(a2)
-; RV32I-NEXT: sb a5, 8(a2)
+; RV32I-NEXT: sb a4, 8(a2)
; RV32I-NEXT: sb a3, 0(a2)
-; RV32I-NEXT: sb a4, 4(a2)
-; RV32I-NEXT: srli a0, a5, 16
+; RV32I-NEXT: sb a6, 4(a2)
+; RV32I-NEXT: srli a0, a4, 16
; RV32I-NEXT: sb a0, 10(a2)
-; RV32I-NEXT: srli a0, a5, 24
+; RV32I-NEXT: srli a0, a4, 24
; RV32I-NEXT: sb a0, 11(a2)
-; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 9(a2)
+; RV32I-NEXT: srli a4, a4, 8
+; RV32I-NEXT: sb a4, 9(a2)
; RV32I-NEXT: srli a0, a3, 16
; RV32I-NEXT: sb a0, 2(a2)
; RV32I-NEXT: srli a0, a3, 24
; RV32I-NEXT: sb a0, 3(a2)
; RV32I-NEXT: srli a3, a3, 8
; RV32I-NEXT: sb a3, 1(a2)
-; RV32I-NEXT: srli a0, a4, 16
+; RV32I-NEXT: srli a0, a6, 16
; RV32I-NEXT: sb a0, 6(a2)
-; RV32I-NEXT: srli a0, a4, 24
+; RV32I-NEXT: srli a0, a6, 24
; RV32I-NEXT: sb a0, 7(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 5(a2)
+; RV32I-NEXT: srli a0, a6, 8
+; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
%src = load i128, ptr %src.ptr, align 1
@@ -1309,105 +1309,105 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a7, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 9(a0)
; RV64I-NEXT: lbu a5, 8(a0)
-; RV64I-NEXT: lbu a6, 10(a0)
+; RV64I-NEXT: lbu a6, 9(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 10(a0)
; RV64I-NEXT: lbu a7, 11(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 13(a0)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 12(a0)
-; RV64I-NEXT: lbu a7, 14(a0)
+; RV64I-NEXT: lbu a7, 13(a0)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 14(a0)
; RV64I-NEXT: lbu t0, 15(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: or a5, t0, a5
+; RV64I-NEXT: or a5, a5, a6
; RV64I-NEXT: slli a5, a5, 32
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 17(a0)
; RV64I-NEXT: lbu a6, 16(a0)
-; RV64I-NEXT: lbu a7, 18(a0)
+; RV64I-NEXT: lbu a7, 17(a0)
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: lbu a5, 18(a0)
; RV64I-NEXT: lbu t0, 19(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 21(a0)
+; RV64I-NEXT: or a5, t0, a5
; RV64I-NEXT: lbu a7, 20(a0)
-; RV64I-NEXT: lbu t0, 22(a0)
+; RV64I-NEXT: lbu t0, 21(a0)
+; RV64I-NEXT: or a5, a5, a6
+; RV64I-NEXT: lbu a6, 22(a0)
; RV64I-NEXT: lbu t1, 23(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: or a6, t1, a6
+; RV64I-NEXT: or a6, a6, a7
; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 25(a0)
; RV64I-NEXT: lbu a7, 24(a0)
-; RV64I-NEXT: lbu t0, 26(a0)
+; RV64I-NEXT: lbu t0, 25(a0)
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: lbu a6, 26(a0)
; RV64I-NEXT: lbu t1, 27(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 29(a0)
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 28(a0)
-; RV64I-NEXT: lbu t1, 30(a0)
+; RV64I-NEXT: lbu t1, 29(a0)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 30(a0)
; RV64I-NEXT: lbu a0, 31(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: or a0, a0, a6
-; RV64I-NEXT: lbu a6, 1(a1)
; RV64I-NEXT: lbu a7, 0(a1)
-; RV64I-NEXT: lbu t0, 2(a1)
+; RV64I-NEXT: lbu t0, 1(a1)
+; RV64I-NEXT: or a0, a0, a6
+; RV64I-NEXT: lbu a6, 2(a1)
; RV64I-NEXT: lbu t1, 3(a1)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 5(a1)
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 4(a1)
-; RV64I-NEXT: lbu t1, 6(a1)
+; RV64I-NEXT: lbu t1, 5(a1)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, t1
; RV64I-NEXT: or a1, a1, a7
+; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: sd zero, 56(sp)
@@ -1421,72 +1421,72 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: srli a0, a1, 3
; RV64I-NEXT: andi a0, a0, 24
; RV64I-NEXT: mv a3, sp
-; RV64I-NEXT: add a3, a3, a0
-; RV64I-NEXT: ld a4, 8(a3)
-; RV64I-NEXT: srl a0, a4, a1
-; RV64I-NEXT: ld a5, 16(a3)
-; RV64I-NEXT: andi a6, a1, 63
-; RV64I-NEXT: xori a6, a6, 63
-; RV64I-NEXT: ld a7, 0(a3)
+; RV64I-NEXT: add a0, a3, a0
+; RV64I-NEXT: ld a3, 8(a0)
+; RV64I-NEXT: ld a4, 0(a0)
+; RV64I-NEXT: ld a5, 16(a0)
+; RV64I-NEXT: ld a6, 24(a0)
+; RV64I-NEXT: srl a0, a3, a1
+; RV64I-NEXT: andi a7, a1, 63
+; RV64I-NEXT: xori a7, a7, 63
; RV64I-NEXT: slli t0, a5, 1
-; RV64I-NEXT: sll t0, t0, a6
+; RV64I-NEXT: sll t0, t0, a7
; RV64I-NEXT: or a0, a0, t0
-; RV64I-NEXT: srl a7, a7, a1
-; RV64I-NEXT: slli a4, a4, 1
-; RV64I-NEXT: ld a3, 24(a3)
-; RV64I-NEXT: sll a4, a4, a6
-; RV64I-NEXT: or a4, a7, a4
-; RV64I-NEXT: srl a5, a5, a1
-; RV64I-NEXT: slli a7, a3, 1
-; RV64I-NEXT: sll a6, a7, a6
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: srl a1, a3, a1
+; RV64I-NEXT: srl a4, a4, a1
+; RV64I-NEXT: slli a3, a3, 1
+; RV64I-NEXT: sll a3, a3, a7
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: srl a4, a5, a1
+; RV64I-NEXT: slli a5, a6, 1
+; RV64I-NEXT: sll a5, a5, a7
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: srl a1, a6, a1
; RV64I-NEXT: sb a1, 24(a2)
-; RV64I-NEXT: srli a3, a1, 56
-; RV64I-NEXT: sb a3, 31(a2)
-; RV64I-NEXT: srli a3, a1, 48
-; RV64I-NEXT: sb a3, 30(a2)
-; RV64I-NEXT: srli a3, a1, 40
-; RV64I-NEXT: sb a3, 29(a2)
-; RV64I-NEXT: srli a3, a1, 32
-; RV64I-NEXT: sb a3, 28(a2)
-; RV64I-NEXT: srli a3, a1, 24
-; RV64I-NEXT: sb a3, 27(a2)
-; RV64I-NEXT: srli a3, a1, 16
-; RV64I-NEXT: sb a3, 26(a2)
+; RV64I-NEXT: srli a5, a1, 56
+; RV64I-NEXT: sb a5, 31(a2)
+; RV64I-NEXT: srli a5, a1, 48
+; RV64I-NEXT: sb a5, 30(a2)
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: sb a5, 29(a2)
+; RV64I-NEXT: srli a5, a1, 32
+; RV64I-NEXT: sb a5, 28(a2)
+; RV64I-NEXT: srli a5, a1, 24
+; RV64I-NEXT: sb a5, 27(a2)
+; RV64I-NEXT: srli a5, a1, 16
+; RV64I-NEXT: sb a5, 26(a2)
; RV64I-NEXT: srli a1, a1, 8
; RV64I-NEXT: sb a1, 25(a2)
-; RV64I-NEXT: sb a5, 16(a2)
-; RV64I-NEXT: sb a4, 0(a2)
+; RV64I-NEXT: sb a4, 16(a2)
+; RV64I-NEXT: sb a3, 0(a2)
; RV64I-NEXT: sb a0, 8(a2)
-; RV64I-NEXT: srli a1, a5, 56
+; RV64I-NEXT: srli a1, a4, 56
; RV64I-NEXT: sb a1, 23(a2)
-; RV64I-NEXT: srli a1, a5, 48
+; RV64I-NEXT: srli a1, a4, 48
; RV64I-NEXT: sb a1, 22(a2)
-; RV64I-NEXT: srli a1, a5, 40
+; RV64I-NEXT: srli a1, a4, 40
; RV64I-NEXT: sb a1, 21(a2)
-; RV64I-NEXT: srli a1, a5, 32
+; RV64I-NEXT: srli a1, a4, 32
; RV64I-NEXT: sb a1, 20(a2)
-; RV64I-NEXT: srli a1, a5, 24
+; RV64I-NEXT: srli a1, a4, 24
; RV64I-NEXT: sb a1, 19(a2)
-; RV64I-NEXT: srli a1, a5, 16
+; RV64I-NEXT: srli a1, a4, 16
; RV64I-NEXT: sb a1, 18(a2)
-; RV64I-NEXT: srli a5, a5, 8
-; RV64I-NEXT: sb a5, 17(a2)
-; RV64I-NEXT: srli a1, a4, 56
+; RV64I-NEXT: srli a4, a4, 8
+; RV64I-NEXT: sb a4, 17(a2)
+; RV64I-NEXT: srli a1, a3, 56
; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a4, 48
+; RV64I-NEXT: srli a1, a3, 48
; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a4, 40
+; RV64I-NEXT: srli a1, a3, 40
; RV64I-NEXT: sb a1, 5(a2)
-; RV64I-NEXT: srli a1, a4, 32
+; RV64I-NEXT: srli a1, a3, 32
; RV64I-NEXT: sb a1, 4(a2)
-; RV64I-NEXT: srli a1, a4, 24
+; RV64I-NEXT: srli a1, a3, 24
; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a1, a4, 16
+; RV64I-NEXT: srli a1, a3, 16
; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a4, a4, 8
-; RV64I-NEXT: sb a4, 1(a2)
+; RV64I-NEXT: srli a3, a3, 8
+; RV64I-NEXT: sb a3, 1(a2)
; RV64I-NEXT: srli a1, a0, 56
; RV64I-NEXT: sb a1, 15(a2)
; RV64I-NEXT: srli a1, a0, 48
@@ -1516,87 +1516,87 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu t1, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli t1, t1, 24
-; RV32I-NEXT: or a7, t1, t0
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: lbu a7, 17(a0)
+; RV32I-NEXT: or a6, t1, a6
; RV32I-NEXT: lbu t0, 16(a0)
-; RV32I-NEXT: lbu t1, 18(a0)
+; RV32I-NEXT: lbu t1, 17(a0)
+; RV32I-NEXT: or a7, a6, a7
+; RV32I-NEXT: lbu a6, 18(a0)
; RV32I-NEXT: lbu t2, 19(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or t0, t0, a7
-; RV32I-NEXT: lbu a7, 21(a0)
+; RV32I-NEXT: or a6, t2, a6
; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 22(a0)
+; RV32I-NEXT: lbu t2, 21(a0)
+; RV32I-NEXT: or t0, a6, t0
+; RV32I-NEXT: lbu a6, 22(a0)
; RV32I-NEXT: lbu t3, 23(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t1
-; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli t2, t2, 8
+; RV32I-NEXT: or t1, t2, t1
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t3, t2
-; RV32I-NEXT: or t1, t1, a7
-; RV32I-NEXT: lbu a7, 25(a0)
+; RV32I-NEXT: or a6, t3, a6
; RV32I-NEXT: lbu t2, 24(a0)
-; RV32I-NEXT: lbu t3, 26(a0)
+; RV32I-NEXT: lbu t3, 25(a0)
+; RV32I-NEXT: or t1, a6, t1
+; RV32I-NEXT: lbu a6, 26(a0)
; RV32I-NEXT: lbu t4, 27(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t2
-; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t3, t3, 8
+; RV32I-NEXT: or t2, t3, t2
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: or t2, t2, a7
-; RV32I-NEXT: lbu a7, 29(a0)
+; RV32I-NEXT: or a6, t4, a6
; RV32I-NEXT: lbu t3, 28(a0)
-; RV32I-NEXT: lbu t4, 30(a0)
+; RV32I-NEXT: lbu t4, 29(a0)
+; RV32I-NEXT: or t2, a6, t2
+; RV32I-NEXT: lbu a6, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t3
-; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a0, a0, t4
-; RV32I-NEXT: or a0, a0, a7
-; RV32I-NEXT: lbu a7, 1(a1)
-; RV32I-NEXT: lbu t3, 0(a1)
-; RV32I-NEXT: lbu t4, 2(a1)
+; RV32I-NEXT: or a0, a0, a6
+; RV32I-NEXT: lbu a6, 0(a1)
+; RV32I-NEXT: lbu t4, 1(a1)
+; RV32I-NEXT: or a0, a0, t3
+; RV32I-NEXT: lbu t3, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t3
-; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: or a6, t4, a6
+; RV32I-NEXT: slli t3, t3, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, t4
-; RV32I-NEXT: or a7, a1, a7
+; RV32I-NEXT: or a1, a1, t3
+; RV32I-NEXT: or a6, a1, a6
; RV32I-NEXT: sw zero, 60(sp)
; RV32I-NEXT: sw zero, 56(sp)
; RV32I-NEXT: sw zero, 52(sp)
@@ -1609,91 +1609,91 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sw t2, 24(sp)
; RV32I-NEXT: sw t1, 20(sp)
; RV32I-NEXT: sw t0, 16(sp)
-; RV32I-NEXT: sw a6, 12(sp)
+; RV32I-NEXT: sw a7, 12(sp)
; RV32I-NEXT: sw a5, 8(sp)
; RV32I-NEXT: sw a4, 4(sp)
; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: srli a0, a7, 3
+; RV32I-NEXT: srli a0, a6, 3
; RV32I-NEXT: andi a0, a0, 28
; RV32I-NEXT: mv a1, sp
-; RV32I-NEXT: add a4, a1, a0
-; RV32I-NEXT: lw a1, 4(a4)
-; RV32I-NEXT: srl a0, a1, a7
-; RV32I-NEXT: lw a5, 8(a4)
-; RV32I-NEXT: andi a3, a7, 31
-; RV32I-NEXT: xori a6, a3, 31
-; RV32I-NEXT: lw a3, 0(a4)
-; RV32I-NEXT: slli t0, a5, 1
-; RV32I-NEXT: sll t0, t0, a6
-; RV32I-NEXT: or a0, a0, t0
-; RV32I-NEXT: srl a3, a3, a7
+; RV32I-NEXT: add a3, a1, a0
+; RV32I-NEXT: lw a1, 4(a3)
+; RV32I-NEXT: lw a4, 0(a3)
+; RV32I-NEXT: lw a5, 8(a3)
+; RV32I-NEXT: lw a7, 12(a3)
+; RV32I-NEXT: srl a0, a1, a6
+; RV32I-NEXT: andi t0, a6, 31
+; RV32I-NEXT: xori t0, t0, 31
+; RV32I-NEXT: slli t1, a5, 1
+; RV32I-NEXT: sll t1, t1, t0
+; RV32I-NEXT: or a0, a0, t1
+; RV32I-NEXT: srl a4, a4, a6
; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: lw t0, 12(a4)
-; RV32I-NEXT: lw t1, 16(a4)
-; RV32I-NEXT: sll a1, a1, a6
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srl a3, t0, a7
-; RV32I-NEXT: slli t2, t1, 1
-; RV32I-NEXT: sll t2, t2, a6
-; RV32I-NEXT: or a3, a3, t2
-; RV32I-NEXT: srl a5, a5, a7
-; RV32I-NEXT: slli t0, t0, 1
-; RV32I-NEXT: lw t2, 20(a4)
-; RV32I-NEXT: lw t3, 24(a4)
-; RV32I-NEXT: sll t0, t0, a6
-; RV32I-NEXT: or a5, a5, t0
-; RV32I-NEXT: srl t0, t2, a7
-; RV32I-NEXT: slli t4, t3, 1
-; RV32I-NEXT: sll t4, t4, a6
-; RV32I-NEXT: or t0, t0, t4
-; RV32I-NEXT: srl t1, t1, a7
+; RV32I-NEXT: sll a1, a1, t0
+; RV32I-NEXT: or a1, a4, a1
+; RV32I-NEXT: srl a4, a7, a6
+; RV32I-NEXT: lw t1, 16(a3)
+; RV32I-NEXT: lw t2, 20(a3)
+; RV32I-NEXT: lw t3, 24(a3)
+; RV32I-NEXT: lw t4, 28(a3)
+; RV32I-NEXT: slli a3, t1, 1
+; RV32I-NEXT: sll a3, a3, t0
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: srl a4, a5, a6
+; RV32I-NEXT: slli a7, a7, 1
+; RV32I-NEXT: sll a5, a7, t0
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: srl a5, t2, a6
+; RV32I-NEXT: slli a7, t3, 1
+; RV32I-NEXT: sll a7, a7, t0
+; RV32I-NEXT: or a5, a5, a7
+; RV32I-NEXT: srl a7, t1, a6
; RV32I-NEXT: slli t2, t2, 1
-; RV32I-NEXT: lw a4, 28(a4)
-; RV32I-NEXT: sll t2, t2, a6
-; RV32I-NEXT: or t1, t1, t2
-; RV32I-NEXT: srl t2, t3, a7
-; RV32I-NEXT: slli t3, a4, 1
-; RV32I-NEXT: sll a6, t3, a6
-; RV32I-NEXT: or a6, t2, a6
-; RV32I-NEXT: srl a4, a4, a7
-; RV32I-NEXT: sb a4, 28(a2)
-; RV32I-NEXT: srli a7, a4, 24
-; RV32I-NEXT: sb a7, 31(a2)
-; RV32I-NEXT: srli a7, a4, 16
-; RV32I-NEXT: sb a7, 30(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 29(a2)
-; RV32I-NEXT: sb a6, 24(a2)
-; RV32I-NEXT: sb t1, 16(a2)
-; RV32I-NEXT: sb t0, 20(a2)
-; RV32I-NEXT: sb a5, 8(a2)
+; RV32I-NEXT: sll t1, t2, t0
+; RV32I-NEXT: or a7, a7, t1
+; RV32I-NEXT: srl t1, t3, a6
+; RV32I-NEXT: slli t2, t4, 1
+; RV32I-NEXT: sll t0, t2, t0
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: srl a6, t4, a6
+; RV32I-NEXT: sb a6, 28(a2)
+; RV32I-NEXT: srli t1, a6, 24
+; RV32I-NEXT: sb t1, 31(a2)
+; RV32I-NEXT: srli t1, a6, 16
+; RV32I-NEXT: sb t1, 30(a2)
+; RV32I-NEXT: srli a6, a6, 8
+; RV32I-NEXT: sb a6, 29(a2)
+; RV32I-NEXT: sb t0, 24(a2)
+; RV32I-NEXT: sb a7, 16(a2)
+; RV32I-NEXT: sb a5, 20(a2)
+; RV32I-NEXT: sb a4, 8(a2)
; RV32I-NEXT: sb a3, 12(a2)
; RV32I-NEXT: sb a1, 0(a2)
; RV32I-NEXT: sb a0, 4(a2)
-; RV32I-NEXT: srli a4, a6, 24
-; RV32I-NEXT: sb a4, 27(a2)
-; RV32I-NEXT: srli a4, a6, 16
-; RV32I-NEXT: sb a4, 26(a2)
-; RV32I-NEXT: srli a4, a6, 8
-; RV32I-NEXT: sb a4, 25(a2)
-; RV32I-NEXT: srli a4, t1, 24
-; RV32I-NEXT: sb a4, 19(a2)
-; RV32I-NEXT: srli a4, t1, 16
-; RV32I-NEXT: sb a4, 18(a2)
-; RV32I-NEXT: srli a4, t1, 8
-; RV32I-NEXT: sb a4, 17(a2)
-; RV32I-NEXT: srli a4, t0, 24
-; RV32I-NEXT: sb a4, 23(a2)
-; RV32I-NEXT: srli a4, t0, 16
-; RV32I-NEXT: sb a4, 22(a2)
-; RV32I-NEXT: srli a4, t0, 8
-; RV32I-NEXT: sb a4, 21(a2)
-; RV32I-NEXT: srli a4, a5, 24
-; RV32I-NEXT: sb a4, 11(a2)
-; RV32I-NEXT: srli a4, a5, 16
-; RV32I-NEXT: sb a4, 10(a2)
+; RV32I-NEXT: srli a6, t0, 24
+; RV32I-NEXT: sb a6, 27(a2)
+; RV32I-NEXT: srli a6, t0, 16
+; RV32I-NEXT: sb a6, 26(a2)
+; RV32I-NEXT: srli a6, t0, 8
+; RV32I-NEXT: sb a6, 25(a2)
+; RV32I-NEXT: srli a6, a7, 24
+; RV32I-NEXT: sb a6, 19(a2)
+; RV32I-NEXT: srli a6, a7, 16
+; RV32I-NEXT: sb a6, 18(a2)
+; RV32I-NEXT: srli a6, a7, 8
+; RV32I-NEXT: sb a6, 17(a2)
+; RV32I-NEXT: srli a6, a5, 24
+; RV32I-NEXT: sb a6, 23(a2)
+; RV32I-NEXT: srli a6, a5, 16
+; RV32I-NEXT: sb a6, 22(a2)
; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 9(a2)
+; RV32I-NEXT: sb a5, 21(a2)
+; RV32I-NEXT: srli a5, a4, 24
+; RV32I-NEXT: sb a5, 11(a2)
+; RV32I-NEXT: srli a5, a4, 16
+; RV32I-NEXT: sb a5, 10(a2)
+; RV32I-NEXT: srli a4, a4, 8
+; RV32I-NEXT: sb a4, 9(a2)
; RV32I-NEXT: srli a4, a3, 24
; RV32I-NEXT: sb a4, 15(a2)
; RV32I-NEXT: srli a4, a3, 16
@@ -1733,105 +1733,105 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a7, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 9(a0)
; RV64I-NEXT: lbu a5, 8(a0)
-; RV64I-NEXT: lbu a6, 10(a0)
+; RV64I-NEXT: lbu a6, 9(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 10(a0)
; RV64I-NEXT: lbu a7, 11(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 13(a0)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 12(a0)
-; RV64I-NEXT: lbu a7, 14(a0)
+; RV64I-NEXT: lbu a7, 13(a0)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 14(a0)
; RV64I-NEXT: lbu t0, 15(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: or a5, t0, a5
+; RV64I-NEXT: or a5, a5, a6
; RV64I-NEXT: slli a5, a5, 32
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 17(a0)
; RV64I-NEXT: lbu a6, 16(a0)
-; RV64I-NEXT: lbu a7, 18(a0)
+; RV64I-NEXT: lbu a7, 17(a0)
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: lbu a5, 18(a0)
; RV64I-NEXT: lbu t0, 19(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 21(a0)
+; RV64I-NEXT: or a5, t0, a5
; RV64I-NEXT: lbu a7, 20(a0)
-; RV64I-NEXT: lbu t0, 22(a0)
+; RV64I-NEXT: lbu t0, 21(a0)
+; RV64I-NEXT: or a5, a5, a6
+; RV64I-NEXT: lbu a6, 22(a0)
; RV64I-NEXT: lbu t1, 23(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: or a6, t1, a6
+; RV64I-NEXT: or a6, a6, a7
; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 25(a0)
; RV64I-NEXT: lbu a7, 24(a0)
-; RV64I-NEXT: lbu t0, 26(a0)
+; RV64I-NEXT: lbu t0, 25(a0)
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: lbu a6, 26(a0)
; RV64I-NEXT: lbu t1, 27(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 29(a0)
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 28(a0)
-; RV64I-NEXT: lbu t1, 30(a0)
+; RV64I-NEXT: lbu t1, 29(a0)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 30(a0)
; RV64I-NEXT: lbu a0, 31(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a0, a0, 32
-; RV64I-NEXT: or a0, a0, a6
-; RV64I-NEXT: lbu a6, 1(a1)
; RV64I-NEXT: lbu a7, 0(a1)
-; RV64I-NEXT: lbu t0, 2(a1)
+; RV64I-NEXT: lbu t0, 1(a1)
+; RV64I-NEXT: or a0, a0, a6
+; RV64I-NEXT: lbu a6, 2(a1)
; RV64I-NEXT: lbu t1, 3(a1)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 5(a1)
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 4(a1)
-; RV64I-NEXT: lbu t1, 6(a1)
+; RV64I-NEXT: lbu t1, 5(a1)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, t1
; RV64I-NEXT: or a1, a1, a7
+; RV64I-NEXT: or a1, a1, t0
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a6
; RV64I-NEXT: sd zero, 24(sp)
@@ -1848,69 +1848,69 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: sub a3, a3, a0
; RV64I-NEXT: ld a4, 8(a3)
; RV64I-NEXT: ld a5, 0(a3)
+; RV64I-NEXT: ld a6, 16(a3)
+; RV64I-NEXT: ld a3, 24(a3)
; RV64I-NEXT: sll a0, a4, a1
-; RV64I-NEXT: andi a6, a1, 63
-; RV64I-NEXT: xori a6, a6, 63
-; RV64I-NEXT: srli a7, a5, 1
-; RV64I-NEXT: ld t0, 24(a3)
-; RV64I-NEXT: ld a3, 16(a3)
-; RV64I-NEXT: srl a7, a7, a6
-; RV64I-NEXT: or a0, a0, a7
-; RV64I-NEXT: sll a7, t0, a1
-; RV64I-NEXT: srli t0, a3, 1
-; RV64I-NEXT: srl t0, t0, a6
-; RV64I-NEXT: or a7, a7, t0
+; RV64I-NEXT: andi a7, a1, 63
+; RV64I-NEXT: xori a7, a7, 63
+; RV64I-NEXT: srli t0, a5, 1
+; RV64I-NEXT: srl t0, t0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: sll a3, a3, a1
+; RV64I-NEXT: srli t0, a6, 1
+; RV64I-NEXT: srl t0, t0, a7
+; RV64I-NEXT: or a3, a3, t0
+; RV64I-NEXT: sll a6, a6, a1
; RV64I-NEXT: srli a4, a4, 1
-; RV64I-NEXT: srl a4, a4, a6
-; RV64I-NEXT: or a3, a3, a4
+; RV64I-NEXT: srl a4, a4, a7
+; RV64I-NEXT: or a4, a6, a4
; RV64I-NEXT: sll a1, a5, a1
; RV64I-NEXT: sb a1, 0(a2)
-; RV64I-NEXT: srli a4, a1, 56
-; RV64I-NEXT: sb a4, 7(a2)
-; RV64I-NEXT: srli a4, a1, 48
-; RV64I-NEXT: sb a4, 6(a2)
-; RV64I-NEXT: srli a4, a1, 40
-; RV64I-NEXT: sb a4, 5(a2)
-; RV64I-NEXT: srli a4, a1, 32
-; RV64I-NEXT: sb a4, 4(a2)
-; RV64I-NEXT: srli a4, a1, 24
-; RV64I-NEXT: sb a4, 3(a2)
-; RV64I-NEXT: srli a4, a1, 16
-; RV64I-NEXT: sb a4, 2(a2)
+; RV64I-NEXT: srli a5, a1, 56
+; RV64I-NEXT: sb a5, 7(a2)
+; RV64I-NEXT: srli a5, a1, 48
+; RV64I-NEXT: sb a5, 6(a2)
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: sb a5, 5(a2)
+; RV64I-NEXT: srli a5, a1, 32
+; RV64I-NEXT: sb a5, 4(a2)
+; RV64I-NEXT: srli a5, a1, 24
+; RV64I-NEXT: sb a5, 3(a2)
+; RV64I-NEXT: srli a5, a1, 16
+; RV64I-NEXT: sb a5, 2(a2)
; RV64I-NEXT: srli a1, a1, 8
; RV64I-NEXT: sb a1, 1(a2)
-; RV64I-NEXT: sb a3, 16(a2)
-; RV64I-NEXT: sb a7, 24(a2)
+; RV64I-NEXT: sb a4, 16(a2)
+; RV64I-NEXT: sb a3, 24(a2)
; RV64I-NEXT: sb a0, 8(a2)
-; RV64I-NEXT: srli a1, a3, 56
+; RV64I-NEXT: srli a1, a4, 56
; RV64I-NEXT: sb a1, 23(a2)
-; RV64I-NEXT: srli a1, a3, 48
+; RV64I-NEXT: srli a1, a4, 48
; RV64I-NEXT: sb a1, 22(a2)
-; RV64I-NEXT: srli a1, a3, 40
+; RV64I-NEXT: srli a1, a4, 40
; RV64I-NEXT: sb a1, 21(a2)
-; RV64I-NEXT: srli a1, a3, 32
+; RV64I-NEXT: srli a1, a4, 32
; RV64I-NEXT: sb a1, 20(a2)
-; RV64I-NEXT: srli a1, a3, 24
+; RV64I-NEXT: srli a1, a4, 24
; RV64I-NEXT: sb a1, 19(a2)
-; RV64I-NEXT: srli a1, a3, 16
+; RV64I-NEXT: srli a1, a4, 16
; RV64I-NEXT: sb a1, 18(a2)
-; RV64I-NEXT: srli a3, a3, 8
-; RV64I-NEXT: sb a3, 17(a2)
-; RV64I-NEXT: srli a1, a7, 56
+; RV64I-NEXT: srli a4, a4, 8
+; RV64I-NEXT: sb a4, 17(a2)
+; RV64I-NEXT: srli a1, a3, 56
; RV64I-NEXT: sb a1, 31(a2)
-; RV64I-NEXT: srli a1, a7, 48
+; RV64I-NEXT: srli a1, a3, 48
; RV64I-NEXT: sb a1, 30(a2)
-; RV64I-NEXT: srli a1, a7, 40
+; RV64I-NEXT: srli a1, a3, 40
; RV64I-NEXT: sb a1, 29(a2)
-; RV64I-NEXT: srli a1, a7, 32
+; RV64I-NEXT: srli a1, a3, 32
; RV64I-NEXT: sb a1, 28(a2)
-; RV64I-NEXT: srli a1, a7, 24
+; RV64I-NEXT: srli a1, a3, 24
; RV64I-NEXT: sb a1, 27(a2)
-; RV64I-NEXT: srli a1, a7, 16
+; RV64I-NEXT: srli a1, a3, 16
; RV64I-NEXT: sb a1, 26(a2)
-; RV64I-NEXT: srli a1, a7, 8
-; RV64I-NEXT: sb a1, 25(a2)
+; RV64I-NEXT: srli a3, a3, 8
+; RV64I-NEXT: sb a3, 25(a2)
; RV64I-NEXT: srli a1, a0, 56
; RV64I-NEXT: sb a1, 15(a2)
; RV64I-NEXT: srli a1, a0, 48
@@ -1940,86 +1940,86 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu t1, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli t1, t1, 24
-; RV32I-NEXT: or a7, t1, t0
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: lbu a7, 17(a0)
+; RV32I-NEXT: or a6, t1, a6
; RV32I-NEXT: lbu t0, 16(a0)
-; RV32I-NEXT: lbu t1, 18(a0)
+; RV32I-NEXT: lbu t1, 17(a0)
+; RV32I-NEXT: or a6, a6, a7
+; RV32I-NEXT: lbu a7, 18(a0)
; RV32I-NEXT: lbu t2, 19(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or t0, t0, a7
-; RV32I-NEXT: lbu a7, 21(a0)
+; RV32I-NEXT: or a7, t2, a7
; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 22(a0)
+; RV32I-NEXT: lbu t2, 21(a0)
+; RV32I-NEXT: or t0, a7, t0
+; RV32I-NEXT: lbu a7, 22(a0)
; RV32I-NEXT: lbu t3, 23(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t1
-; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli t2, t2, 8
+; RV32I-NEXT: or t1, t2, t1
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t3, t2
-; RV32I-NEXT: or t1, t1, a7
-; RV32I-NEXT: lbu a7, 25(a0)
+; RV32I-NEXT: or a7, t3, a7
; RV32I-NEXT: lbu t2, 24(a0)
-; RV32I-NEXT: lbu t3, 26(a0)
+; RV32I-NEXT: lbu t3, 25(a0)
+; RV32I-NEXT: or t1, a7, t1
+; RV32I-NEXT: lbu a7, 26(a0)
; RV32I-NEXT: lbu t4, 27(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t2
-; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t3, t3, 8
+; RV32I-NEXT: or t2, t3, t2
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: or t2, t2, a7
-; RV32I-NEXT: lbu a7, 29(a0)
+; RV32I-NEXT: or a7, t4, a7
; RV32I-NEXT: lbu t3, 28(a0)
-; RV32I-NEXT: lbu t4, 30(a0)
+; RV32I-NEXT: lbu t4, 29(a0)
+; RV32I-NEXT: or t2, a7, t2
+; RV32I-NEXT: lbu a7, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t3
-; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: slli a7, a7, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or a0, a0, t4
; RV32I-NEXT: or a0, a0, a7
-; RV32I-NEXT: lbu a7, 1(a1)
-; RV32I-NEXT: lbu t3, 0(a1)
-; RV32I-NEXT: lbu t4, 2(a1)
+; RV32I-NEXT: lbu a7, 0(a1)
+; RV32I-NEXT: lbu t4, 1(a1)
+; RV32I-NEXT: or a0, a0, t3
+; RV32I-NEXT: lbu t3, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t3
-; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: or a7, t4, a7
+; RV32I-NEXT: slli t3, t3, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, t4
+; RV32I-NEXT: or a1, a1, t3
; RV32I-NEXT: or a7, a1, a7
; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw zero, 24(sp)
@@ -2043,68 +2043,68 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sub a4, a1, a0
; RV32I-NEXT: lw a3, 4(a4)
; RV32I-NEXT: lw a5, 0(a4)
+; RV32I-NEXT: lw a6, 8(a4)
+; RV32I-NEXT: lw t0, 12(a4)
; RV32I-NEXT: sll a0, a3, a7
; RV32I-NEXT: andi a1, a7, 31
-; RV32I-NEXT: xori a6, a1, 31
+; RV32I-NEXT: xori t1, a1, 31
; RV32I-NEXT: srli a1, a5, 1
-; RV32I-NEXT: lw t0, 12(a4)
-; RV32I-NEXT: lw t1, 8(a4)
-; RV32I-NEXT: srl a1, a1, a6
+; RV32I-NEXT: srl a1, a1, t1
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: sll a1, t0, a7
-; RV32I-NEXT: srli t2, t1, 1
-; RV32I-NEXT: srl t2, t2, a6
+; RV32I-NEXT: srli t2, a6, 1
+; RV32I-NEXT: srl t2, t2, t1
; RV32I-NEXT: or a1, a1, t2
-; RV32I-NEXT: sll t1, t1, a7
+; RV32I-NEXT: sll a6, a6, a7
; RV32I-NEXT: srli a3, a3, 1
-; RV32I-NEXT: lw t2, 20(a4)
-; RV32I-NEXT: lw t3, 16(a4)
-; RV32I-NEXT: srl a3, a3, a6
-; RV32I-NEXT: or a3, t1, a3
-; RV32I-NEXT: sll t1, t2, a7
-; RV32I-NEXT: srli t4, t3, 1
-; RV32I-NEXT: srl t4, t4, a6
-; RV32I-NEXT: or t1, t1, t4
-; RV32I-NEXT: sll t3, t3, a7
+; RV32I-NEXT: srl a3, a3, t1
+; RV32I-NEXT: lw t2, 16(a4)
+; RV32I-NEXT: lw t3, 20(a4)
+; RV32I-NEXT: or a3, a6, a3
+; RV32I-NEXT: lw a6, 24(a4)
+; RV32I-NEXT: lw a4, 28(a4)
+; RV32I-NEXT: sll t4, t3, a7
+; RV32I-NEXT: srli t5, t2, 1
+; RV32I-NEXT: srl t5, t5, t1
+; RV32I-NEXT: or t4, t4, t5
+; RV32I-NEXT: sll t2, t2, a7
; RV32I-NEXT: srli t0, t0, 1
-; RV32I-NEXT: lw t4, 28(a4)
-; RV32I-NEXT: lw a4, 24(a4)
-; RV32I-NEXT: srl t0, t0, a6
-; RV32I-NEXT: or t0, t3, t0
-; RV32I-NEXT: sll t3, t4, a7
-; RV32I-NEXT: srli t4, a4, 1
-; RV32I-NEXT: srl t4, t4, a6
-; RV32I-NEXT: or t3, t3, t4
+; RV32I-NEXT: srl t0, t0, t1
+; RV32I-NEXT: or t0, t2, t0
; RV32I-NEXT: sll a4, a4, a7
-; RV32I-NEXT: srli t2, t2, 1
-; RV32I-NEXT: srl a6, t2, a6
-; RV32I-NEXT: or a4, a4, a6
+; RV32I-NEXT: srli t2, a6, 1
+; RV32I-NEXT: srl t2, t2, t1
+; RV32I-NEXT: or a4, a4, t2
+; RV32I-NEXT: sll a6, a6, a7
+; RV32I-NEXT: srli t2, t3, 1
+; RV32I-NEXT: srl t1, t2, t1
+; RV32I-NEXT: or a6, a6, t1
; RV32I-NEXT: sll a5, a5, a7
; RV32I-NEXT: sb a5, 0(a2)
-; RV32I-NEXT: srli a6, a5, 24
-; RV32I-NEXT: sb a6, 3(a2)
-; RV32I-NEXT: srli a6, a5, 16
-; RV32I-NEXT: sb a6, 2(a2)
+; RV32I-NEXT: srli a7, a5, 24
+; RV32I-NEXT: sb a7, 3(a2)
+; RV32I-NEXT: srli a7, a5, 16
+; RV32I-NEXT: sb a7, 2(a2)
; RV32I-NEXT: srli a5, a5, 8
; RV32I-NEXT: sb a5, 1(a2)
-; RV32I-NEXT: sb a4, 24(a2)
-; RV32I-NEXT: sb t3, 28(a2)
+; RV32I-NEXT: sb a6, 24(a2)
+; RV32I-NEXT: sb a4, 28(a2)
; RV32I-NEXT: sb t0, 16(a2)
-; RV32I-NEXT: sb t1, 20(a2)
+; RV32I-NEXT: sb t4, 20(a2)
; RV32I-NEXT: sb a3, 8(a2)
; RV32I-NEXT: sb a1, 12(a2)
; RV32I-NEXT: sb a0, 4(a2)
-; RV32I-NEXT: srli a5, a4, 24
+; RV32I-NEXT: srli a5, a6, 24
; RV32I-NEXT: sb a5, 27(a2)
-; RV32I-NEXT: srli a5, a4, 16
+; RV32I-NEXT: srli a5, a6, 16
; RV32I-NEXT: sb a5, 26(a2)
+; RV32I-NEXT: srli a5, a6, 8
+; RV32I-NEXT: sb a5, 25(a2)
+; RV32I-NEXT: srli a5, a4, 24
+; RV32I-NEXT: sb a5, 31(a2)
+; RV32I-NEXT: srli a5, a4, 16
+; RV32I-NEXT: sb a5, 30(a2)
; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 25(a2)
-; RV32I-NEXT: srli a4, t3, 24
-; RV32I-NEXT: sb a4, 31(a2)
-; RV32I-NEXT: srli a4, t3, 16
-; RV32I-NEXT: sb a4, 30(a2)
-; RV32I-NEXT: srli a4, t3, 8
; RV32I-NEXT: sb a4, 29(a2)
; RV32I-NEXT: srli a4, t0, 24
; RV32I-NEXT: sb a4, 19(a2)
@@ -2112,11 +2112,11 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sb a4, 18(a2)
; RV32I-NEXT: srli a4, t0, 8
; RV32I-NEXT: sb a4, 17(a2)
-; RV32I-NEXT: srli a4, t1, 24
+; RV32I-NEXT: srli a4, t4, 24
; RV32I-NEXT: sb a4, 23(a2)
-; RV32I-NEXT: srli a4, t1, 16
+; RV32I-NEXT: srli a4, t4, 16
; RV32I-NEXT: sb a4, 22(a2)
-; RV32I-NEXT: srli a4, t1, 8
+; RV32I-NEXT: srli a4, t4, 8
; RV32I-NEXT: sb a4, 21(a2)
; RV32I-NEXT: srli a4, a3, 24
; RV32I-NEXT: sb a4, 11(a2)
@@ -2157,105 +2157,105 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 5(a0)
; RV64I-NEXT: lbu a5, 4(a0)
-; RV64I-NEXT: lbu a6, 6(a0)
+; RV64I-NEXT: lbu a6, 5(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 6(a0)
; RV64I-NEXT: lbu a7, 7(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: lbu a4, 9(a0)
; RV64I-NEXT: lbu a5, 8(a0)
-; RV64I-NEXT: lbu a6, 10(a0)
+; RV64I-NEXT: lbu a6, 9(a0)
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: lbu a4, 10(a0)
; RV64I-NEXT: lbu a7, 11(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: or a4, a4, a5
-; RV64I-NEXT: slli a6, a6, 16
+; RV64I-NEXT: slli a6, a6, 8
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: slli a4, a4, 16
; RV64I-NEXT: slli a7, a7, 24
-; RV64I-NEXT: or a5, a7, a6
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 13(a0)
+; RV64I-NEXT: or a4, a7, a4
; RV64I-NEXT: lbu a6, 12(a0)
-; RV64I-NEXT: lbu a7, 14(a0)
+; RV64I-NEXT: lbu a7, 13(a0)
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: lbu a5, 14(a0)
; RV64I-NEXT: lbu t0, 15(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: or a5, t0, a5
+; RV64I-NEXT: or a5, a5, a6
; RV64I-NEXT: slli a5, a5, 32
-; RV64I-NEXT: or a4, a5, a4
-; RV64I-NEXT: lbu a5, 17(a0)
; RV64I-NEXT: lbu a6, 16(a0)
-; RV64I-NEXT: lbu a7, 18(a0)
+; RV64I-NEXT: lbu a7, 17(a0)
+; RV64I-NEXT: or a4, a5, a4
+; RV64I-NEXT: lbu a5, 18(a0)
; RV64I-NEXT: lbu t0, 19(a0)
-; RV64I-NEXT: slli a5, a5, 8
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: slli a7, a7, 16
+; RV64I-NEXT: slli a7, a7, 8
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: slli a5, a5, 16
; RV64I-NEXT: slli t0, t0, 24
-; RV64I-NEXT: or a6, t0, a7
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 21(a0)
+; RV64I-NEXT: or a5, t0, a5
; RV64I-NEXT: lbu a7, 20(a0)
-; RV64I-NEXT: lbu t0, 22(a0)
+; RV64I-NEXT: lbu t0, 21(a0)
+; RV64I-NEXT: or a5, a5, a6
+; RV64I-NEXT: lbu a6, 22(a0)
; RV64I-NEXT: lbu t1, 23(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: or a6, t1, a6
+; RV64I-NEXT: or a6, a6, a7
; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: or a5, a6, a5
-; RV64I-NEXT: lbu a6, 25(a0)
; RV64I-NEXT: lbu a7, 24(a0)
-; RV64I-NEXT: lbu t0, 26(a0)
+; RV64I-NEXT: lbu t0, 25(a0)
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: lbu a6, 26(a0)
; RV64I-NEXT: lbu t1, 27(a0)
-; RV64I-NEXT: slli a6, a6, 8
-; RV64I-NEXT: or a6, a6, a7
-; RV64I-NEXT: slli t0, t0, 16
+; RV64I-NEXT: slli t0, t0, 8
+; RV64I-NEXT: or a7, t0, a7
+; RV64I-NEXT: slli a6, a6, 16
; RV64I-NEXT: slli t1, t1, 24
-; RV64I-NEXT: or a7, t1, t0
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 29(a0)
+; RV64I-NEXT: or a6, t1, a6
; RV64I-NEXT: lbu t0, 28(a0)
-; RV64I-NEXT: lbu t1, 30(a0)
+; RV64I-NEXT: lbu t1, 29(a0)
+; RV64I-NEXT: or a6, a6, a7
+; RV64I-NEXT: lbu a7, 30(a0)
; RV64I-NEXT: lbu a0, 31(a0)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli a0, a0, 24
-; RV64I-NEXT: or a0, a0, t1
; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a0, a0, t0
; RV64I-NEXT: slli a7, a0, 32
-; RV64I-NEXT: or a6, a7, a6
-; RV64I-NEXT: lbu a7, 1(a1)
; RV64I-NEXT: lbu t0, 0(a1)
-; RV64I-NEXT: lbu t1, 2(a1)
+; RV64I-NEXT: lbu t1, 1(a1)
+; RV64I-NEXT: or a6, a7, a6
+; RV64I-NEXT: lbu a7, 2(a1)
; RV64I-NEXT: lbu t2, 3(a1)
-; RV64I-NEXT: slli a7, a7, 8
-; RV64I-NEXT: or a7, a7, t0
-; RV64I-NEXT: slli t1, t1, 16
+; RV64I-NEXT: slli t1, t1, 8
+; RV64I-NEXT: or t0, t1, t0
+; RV64I-NEXT: slli a7, a7, 16
; RV64I-NEXT: slli t2, t2, 24
-; RV64I-NEXT: or t0, t2, t1
-; RV64I-NEXT: or a7, t0, a7
-; RV64I-NEXT: lbu t0, 5(a1)
+; RV64I-NEXT: or a7, t2, a7
; RV64I-NEXT: lbu t1, 4(a1)
-; RV64I-NEXT: lbu t2, 6(a1)
+; RV64I-NEXT: lbu t2, 5(a1)
+; RV64I-NEXT: or a7, a7, t0
+; RV64I-NEXT: lbu t0, 6(a1)
; RV64I-NEXT: lbu a1, 7(a1)
-; RV64I-NEXT: slli t0, t0, 8
-; RV64I-NEXT: or t0, t0, t1
-; RV64I-NEXT: slli t2, t2, 16
+; RV64I-NEXT: slli t2, t2, 8
+; RV64I-NEXT: or t1, t2, t1
+; RV64I-NEXT: slli t0, t0, 16
; RV64I-NEXT: slli a1, a1, 24
-; RV64I-NEXT: or a1, a1, t2
; RV64I-NEXT: or a1, a1, t0
+; RV64I-NEXT: or a1, a1, t1
; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: or a1, a1, a7
; RV64I-NEXT: sraiw a0, a0, 31
@@ -2270,72 +2270,72 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: srli a0, a1, 3
; RV64I-NEXT: andi a0, a0, 24
; RV64I-NEXT: mv a3, sp
-; RV64I-NEXT: add a3, a3, a0
-; RV64I-NEXT: ld a4, 8(a3)
-; RV64I-NEXT: srl a0, a4, a1
-; RV64I-NEXT: ld a5, 16(a3)
-; RV64I-NEXT: andi a6, a1, 63
-; RV64I-NEXT: xori a6, a6, 63
-; RV64I-NEXT: ld a7, 0(a3)
+; RV64I-NEXT: add a0, a3, a0
+; RV64I-NEXT: ld a3, 8(a0)
+; RV64I-NEXT: ld a4, 0(a0)
+; RV64I-NEXT: ld a5, 16(a0)
+; RV64I-NEXT: ld a6, 24(a0)
+; RV64I-NEXT: srl a0, a3, a1
+; RV64I-NEXT: andi a7, a1, 63
+; RV64I-NEXT: xori a7, a7, 63
; RV64I-NEXT: slli t0, a5, 1
-; RV64I-NEXT: sll t0, t0, a6
+; RV64I-NEXT: sll t0, t0, a7
; RV64I-NEXT: or a0, a0, t0
-; RV64I-NEXT: srl a7, a7, a1
-; RV64I-NEXT: slli a4, a4, 1
-; RV64I-NEXT: ld a3, 24(a3)
-; RV64I-NEXT: sll a4, a4, a6
-; RV64I-NEXT: or a4, a7, a4
-; RV64I-NEXT: srl a5, a5, a1
-; RV64I-NEXT: slli a7, a3, 1
-; RV64I-NEXT: sll a6, a7, a6
-; RV64I-NEXT: or a5, a5, a6
-; RV64I-NEXT: sra a1, a3, a1
+; RV64I-NEXT: srl a4, a4, a1
+; RV64I-NEXT: slli a3, a3, 1
+; RV64I-NEXT: sll a3, a3, a7
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: srl a4, a5, a1
+; RV64I-NEXT: slli a5, a6, 1
+; RV64I-NEXT: sll a5, a5, a7
+; RV64I-NEXT: or a4, a4, a5
+; RV64I-NEXT: sra a1, a6, a1
; RV64I-NEXT: sb a1, 24(a2)
-; RV64I-NEXT: srli a3, a1, 56
-; RV64I-NEXT: sb a3, 31(a2)
-; RV64I-NEXT: srli a3, a1, 48
-; RV64I-NEXT: sb a3, 30(a2)
-; RV64I-NEXT: srli a3, a1, 40
-; RV64I-NEXT: sb a3, 29(a2)
-; RV64I-NEXT: srli a3, a1, 32
-; RV64I-NEXT: sb a3, 28(a2)
-; RV64I-NEXT: srli a3, a1, 24
-; RV64I-NEXT: sb a3, 27(a2)
-; RV64I-NEXT: srli a3, a1, 16
-; RV64I-NEXT: sb a3, 26(a2)
+; RV64I-NEXT: srli a5, a1, 56
+; RV64I-NEXT: sb a5, 31(a2)
+; RV64I-NEXT: srli a5, a1, 48
+; RV64I-NEXT: sb a5, 30(a2)
+; RV64I-NEXT: srli a5, a1, 40
+; RV64I-NEXT: sb a5, 29(a2)
+; RV64I-NEXT: srli a5, a1, 32
+; RV64I-NEXT: sb a5, 28(a2)
+; RV64I-NEXT: srli a5, a1, 24
+; RV64I-NEXT: sb a5, 27(a2)
+; RV64I-NEXT: srli a5, a1, 16
+; RV64I-NEXT: sb a5, 26(a2)
; RV64I-NEXT: srli a1, a1, 8
; RV64I-NEXT: sb a1, 25(a2)
-; RV64I-NEXT: sb a5, 16(a2)
-; RV64I-NEXT: sb a4, 0(a2)
+; RV64I-NEXT: sb a4, 16(a2)
+; RV64I-NEXT: sb a3, 0(a2)
; RV64I-NEXT: sb a0, 8(a2)
-; RV64I-NEXT: srli a1, a5, 56
+; RV64I-NEXT: srli a1, a4, 56
; RV64I-NEXT: sb a1, 23(a2)
-; RV64I-NEXT: srli a1, a5, 48
+; RV64I-NEXT: srli a1, a4, 48
; RV64I-NEXT: sb a1, 22(a2)
-; RV64I-NEXT: srli a1, a5, 40
+; RV64I-NEXT: srli a1, a4, 40
; RV64I-NEXT: sb a1, 21(a2)
-; RV64I-NEXT: srli a1, a5, 32
+; RV64I-NEXT: srli a1, a4, 32
; RV64I-NEXT: sb a1, 20(a2)
-; RV64I-NEXT: srli a1, a5, 24
+; RV64I-NEXT: srli a1, a4, 24
; RV64I-NEXT: sb a1, 19(a2)
-; RV64I-NEXT: srli a1, a5, 16
+; RV64I-NEXT: srli a1, a4, 16
; RV64I-NEXT: sb a1, 18(a2)
-; RV64I-NEXT: srli a5, a5, 8
-; RV64I-NEXT: sb a5, 17(a2)
-; RV64I-NEXT: srli a1, a4, 56
+; RV64I-NEXT: srli a4, a4, 8
+; RV64I-NEXT: sb a4, 17(a2)
+; RV64I-NEXT: srli a1, a3, 56
; RV64I-NEXT: sb a1, 7(a2)
-; RV64I-NEXT: srli a1, a4, 48
+; RV64I-NEXT: srli a1, a3, 48
; RV64I-NEXT: sb a1, 6(a2)
-; RV64I-NEXT: srli a1, a4, 40
+; RV64I-NEXT: srli a1, a3, 40
; RV64I-NEXT: sb a1, 5(a2)
-; RV64I-NEXT: srli a1, a4, 32
+; RV64I-NEXT: srli a1, a3, 32
; RV64I-NEXT: sb a1, 4(a2)
-; RV64I-NEXT: srli a1, a4, 24
+; RV64I-NEXT: srli a1, a3, 24
; RV64I-NEXT: sb a1, 3(a2)
-; RV64I-NEXT: srli a1, a4, 16
+; RV64I-NEXT: srli a1, a3, 16
; RV64I-NEXT: sb a1, 2(a2)
-; RV64I-NEXT: srli a4, a4, 8
-; RV64I-NEXT: sb a4, 1(a2)
+; RV64I-NEXT: srli a3, a3, 8
+; RV64I-NEXT: sb a3, 1(a2)
; RV64I-NEXT: srli a1, a0, 56
; RV64I-NEXT: sb a1, 15(a2)
; RV64I-NEXT: srli a1, a0, 48
@@ -2365,87 +2365,87 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: lbu a4, 5(a0)
; RV32I-NEXT: lbu a5, 4(a0)
-; RV32I-NEXT: lbu a6, 6(a0)
+; RV32I-NEXT: lbu a6, 5(a0)
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 6(a0)
; RV32I-NEXT: lbu a7, 7(a0)
-; RV32I-NEXT: slli a4, a4, 8
-; RV32I-NEXT: or a4, a4, a5
-; RV32I-NEXT: slli a6, a6, 16
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: slli a4, a4, 16
; RV32I-NEXT: slli a7, a7, 24
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: lbu a5, 9(a0)
+; RV32I-NEXT: or a4, a7, a4
; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 10(a0)
+; RV32I-NEXT: lbu a7, 9(a0)
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: lbu a5, 10(a0)
; RV32I-NEXT: lbu t0, 11(a0)
-; RV32I-NEXT: slli a5, a5, 8
-; RV32I-NEXT: or a5, a5, a6
-; RV32I-NEXT: slli a7, a7, 16
+; RV32I-NEXT: slli a7, a7, 8
+; RV32I-NEXT: or a6, a7, a6
+; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli t0, t0, 24
-; RV32I-NEXT: or a6, t0, a7
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: lbu a6, 13(a0)
+; RV32I-NEXT: or a5, t0, a5
; RV32I-NEXT: lbu a7, 12(a0)
-; RV32I-NEXT: lbu t0, 14(a0)
+; RV32I-NEXT: lbu t0, 13(a0)
+; RV32I-NEXT: or a5, a5, a6
+; RV32I-NEXT: lbu a6, 14(a0)
; RV32I-NEXT: lbu t1, 15(a0)
-; RV32I-NEXT: slli a6, a6, 8
-; RV32I-NEXT: or a6, a6, a7
-; RV32I-NEXT: slli t0, t0, 16
+; RV32I-NEXT: slli t0, t0, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli t1, t1, 24
-; RV32I-NEXT: or a7, t1, t0
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: lbu a7, 17(a0)
+; RV32I-NEXT: or a6, t1, a6
; RV32I-NEXT: lbu t0, 16(a0)
-; RV32I-NEXT: lbu t1, 18(a0)
+; RV32I-NEXT: lbu t1, 17(a0)
+; RV32I-NEXT: or a7, a6, a7
+; RV32I-NEXT: lbu a6, 18(a0)
; RV32I-NEXT: lbu t2, 19(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t0
-; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or t0, t0, a7
-; RV32I-NEXT: lbu a7, 21(a0)
+; RV32I-NEXT: or a6, t2, a6
; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 22(a0)
+; RV32I-NEXT: lbu t2, 21(a0)
+; RV32I-NEXT: or t0, a6, t0
+; RV32I-NEXT: lbu a6, 22(a0)
; RV32I-NEXT: lbu t3, 23(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t1
-; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli t2, t2, 8
+; RV32I-NEXT: or t1, t2, t1
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t3, t2
-; RV32I-NEXT: or t1, t1, a7
-; RV32I-NEXT: lbu a7, 25(a0)
+; RV32I-NEXT: or a6, t3, a6
; RV32I-NEXT: lbu t2, 24(a0)
-; RV32I-NEXT: lbu t3, 26(a0)
+; RV32I-NEXT: lbu t3, 25(a0)
+; RV32I-NEXT: or t1, a6, t1
+; RV32I-NEXT: lbu a6, 26(a0)
; RV32I-NEXT: lbu t4, 27(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t2
-; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t3, t3, 8
+; RV32I-NEXT: or t2, t3, t2
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: or t2, t2, a7
-; RV32I-NEXT: lbu a7, 29(a0)
+; RV32I-NEXT: or a6, t4, a6
; RV32I-NEXT: lbu t3, 28(a0)
-; RV32I-NEXT: lbu t4, 30(a0)
+; RV32I-NEXT: lbu t4, 29(a0)
+; RV32I-NEXT: or t2, a6, t2
+; RV32I-NEXT: lbu a6, 30(a0)
; RV32I-NEXT: lbu a0, 31(a0)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t3
-; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a0, a0, 24
-; RV32I-NEXT: or t3, a0, t4
-; RV32I-NEXT: or t3, t3, a7
-; RV32I-NEXT: lbu a7, 1(a1)
+; RV32I-NEXT: or a6, a0, a6
; RV32I-NEXT: lbu t4, 0(a1)
-; RV32I-NEXT: lbu t5, 2(a1)
+; RV32I-NEXT: lbu t5, 1(a1)
+; RV32I-NEXT: or t3, a6, t3
+; RV32I-NEXT: lbu a6, 2(a1)
; RV32I-NEXT: lbu a1, 3(a1)
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: or a7, a7, t4
-; RV32I-NEXT: slli t5, t5, 16
+; RV32I-NEXT: slli t5, t5, 8
+; RV32I-NEXT: or t4, t5, t4
+; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a1, a1, 24
-; RV32I-NEXT: or a1, a1, t5
-; RV32I-NEXT: or a7, a1, a7
+; RV32I-NEXT: or a1, a1, a6
+; RV32I-NEXT: or a6, a1, t4
; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: sw a0, 60(sp)
; RV32I-NEXT: sw a0, 56(sp)
@@ -2459,91 +2459,91 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sw t2, 24(sp)
; RV32I-NEXT: sw t1, 20(sp)
; RV32I-NEXT: sw t0, 16(sp)
-; RV32I-NEXT: sw a6, 12(sp)
+; RV32I-NEXT: sw a7, 12(sp)
; RV32I-NEXT: sw a5, 8(sp)
; RV32I-NEXT: sw a4, 4(sp)
; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: srli a0, a7, 3
+; RV32I-NEXT: srli a0, a6, 3
; RV32I-NEXT: andi a0, a0, 28
; RV32I-NEXT: mv a1, sp
-; RV32I-NEXT: add a4, a1, a0
-; RV32I-NEXT: lw a1, 4(a4)
-; RV32I-NEXT: srl a0, a1, a7
-; RV32I-NEXT: lw a5, 8(a4)
-; RV32I-NEXT: andi a3, a7, 31
-; RV32I-NEXT: xori a6, a3, 31
-; RV32I-NEXT: lw a3, 0(a4)
-; RV32I-NEXT: slli t0, a5, 1
-; RV32I-NEXT: sll t0, t0, a6
-; RV32I-NEXT: or a0, a0, t0
-; RV32I-NEXT: srl a3, a3, a7
+; RV32I-NEXT: add a3, a1, a0
+; RV32I-NEXT: lw a1, 4(a3)
+; RV32I-NEXT: lw a4, 0(a3)
+; RV32I-NEXT: lw a5, 8(a3)
+; RV32I-NEXT: lw a7, 12(a3)
+; RV32I-NEXT: srl a0, a1, a6
+; RV32I-NEXT: andi t0, a6, 31
+; RV32I-NEXT: xori t0, t0, 31
+; RV32I-NEXT: slli t1, a5, 1
+; RV32I-NEXT: sll t1, t1, t0
+; RV32I-NEXT: or a0, a0, t1
+; RV32I-NEXT: srl a4, a4, a6
; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: lw t0, 12(a4)
-; RV32I-NEXT: lw t1, 16(a4)
-; RV32I-NEXT: sll a1, a1, a6
-; RV32I-NEXT: or a1, a3, a1
-; RV32I-NEXT: srl a3, t0, a7
-; RV32I-NEXT: slli t2, t1, 1
-; RV32I-NEXT: sll t2, t2, a6
-; RV32I-NEXT: or a3, a3, t2
-; RV32I-NEXT: srl a5, a5, a7
-; RV32I-NEXT: slli t0, t0, 1
-; RV32I-NEXT: lw t2, 20(a4)
-; RV32I-NEXT: lw t3, 24(a4)
-; RV32I-NEXT: sll t0, t0, a6
-; RV32I-NEXT: or a5, a5, t0
-; RV32I-NEXT: srl t0, t2, a7
-; RV32I-NEXT: slli t4, t3, 1
-; RV32I-NEXT: sll t4, t4, a6
-; RV32I-NEXT: or t0, t0, t4
-; RV32I-NEXT: srl t1, t1, a7
+; RV32I-NEXT: sll a1, a1, t0
+; RV32I-NEXT: or a1, a4, a1
+; RV32I-NEXT: srl a4, a7, a6
+; RV32I-NEXT: lw t1, 16(a3)
+; RV32I-NEXT: lw t2, 20(a3)
+; RV32I-NEXT: lw t3, 24(a3)
+; RV32I-NEXT: lw t4, 28(a3)
+; RV32I-NEXT: slli a3, t1, 1
+; RV32I-NEXT: sll a3, a3, t0
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: srl a4, a5, a6
+; RV32I-NEXT: slli a7, a7, 1
+; RV32I-NEXT: sll a5, a7, t0
+; RV32I-NEXT: or a4, a4, a5
+; RV32I-NEXT: srl a5, t2, a6
+; RV32I-NEXT: slli a7, t3, 1
+; RV32I-NEXT: sll a7, a7, t0
+; RV32I-NEXT: or a5, a5, a7
+; RV32I-NEXT: srl a7, t1, a6
; RV32I-NEXT: slli t2, t2, 1
-; RV32I-NEXT: lw a4, 28(a4)
-; RV32I-NEXT: sll t2, t2, a6
-; RV32I-NEXT: or t1, t1, t2
-; RV32I-NEXT: srl t2, t3, a7
-; RV32I-NEXT: slli t3, a4, 1
-; RV32I-NEXT: sll a6, t3, a6
-; RV32I-NEXT: or a6, t2, a6
-; RV32I-NEXT: sra a4, a4, a7
-; RV32I-NEXT: sb a4, 28(a2)
-; RV32I-NEXT: srli a7, a4, 24
-; RV32I-NEXT: sb a7, 31(a2)
-; RV32I-NEXT: srli a7, a4, 16
-; RV32I-NEXT: sb a7, 30(a2)
-; RV32I-NEXT: srli a4, a4, 8
-; RV32I-NEXT: sb a4, 29(a2)
-; RV32I-NEXT: sb a6, 24(a2)
-; RV32I-NEXT: sb t1, 16(a2)
-; RV32I-NEXT: sb t0, 20(a2)
-; RV32I-NEXT: sb a5, 8(a2)
+; RV32I-NEXT: sll t1, t2, t0
+; RV32I-NEXT: or a7, a7, t1
+; RV32I-NEXT: srl t1, t3, a6
+; RV32I-NEXT: slli t2, t4, 1
+; RV32I-NEXT: sll t0, t2, t0
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: sra a6, t4, a6
+; RV32I-NEXT: sb a6, 28(a2)
+; RV32I-NEXT: srli t1, a6, 24
+; RV32I-NEXT: sb t1, 31(a2)
+; RV32I-NEXT: srli t1, a6, 16
+; RV32I-NEXT: sb t1, 30(a2)
+; RV32I-NEXT: srli a6, a6, 8
+; RV32I-NEXT: sb a6, 29(a2)
+; RV32I-NEXT: sb t0, 24(a2)
+; RV32I-NEXT: sb a7, 16(a2)
+; RV32I-NEXT: sb a5, 20(a2)
+; RV32I-NEXT: sb a4, 8(a2)
; RV32I-NEXT: sb a3, 12(a2)
; RV32I-NEXT: sb a1, 0(a2)
; RV32I-NEXT: sb a0, 4(a2)
-; RV32I-NEXT: srli a4, a6, 24
-; RV32I-NEXT: sb a4, 27(a2)
-; RV32I-NEXT: srli a4, a6, 16
-; RV32I-NEXT: sb a4, 26(a2)
-; RV32I-NEXT: srli a4, a6, 8
-; RV32I-NEXT: sb a4, 25(a2)
-; RV32I-NEXT: srli a4, t1, 24
-; RV32I-NEXT: sb a4, 19(a2)
-; RV32I-NEXT: srli a4, t1, 16
-; RV32I-NEXT: sb a4, 18(a2)
-; RV32I-NEXT: srli a4, t1, 8
-; RV32I-NEXT: sb a4, 17(a2)
-; RV32I-NEXT: srli a4, t0, 24
-; RV32I-NEXT: sb a4, 23(a2)
-; RV32I-NEXT: srli a4, t0, 16
-; RV32I-NEXT: sb a4, 22(a2)
-; RV32I-NEXT: srli a4, t0, 8
-; RV32I-NEXT: sb a4, 21(a2)
-; RV32I-NEXT: srli a4, a5, 24
-; RV32I-NEXT: sb a4, 11(a2)
-; RV32I-NEXT: srli a4, a5, 16
-; RV32I-NEXT: sb a4, 10(a2)
+; RV32I-NEXT: srli a6, t0, 24
+; RV32I-NEXT: sb a6, 27(a2)
+; RV32I-NEXT: srli a6, t0, 16
+; RV32I-NEXT: sb a6, 26(a2)
+; RV32I-NEXT: srli a6, t0, 8
+; RV32I-NEXT: sb a6, 25(a2)
+; RV32I-NEXT: srli a6, a7, 24
+; RV32I-NEXT: sb a6, 19(a2)
+; RV32I-NEXT: srli a6, a7, 16
+; RV32I-NEXT: sb a6, 18(a2)
+; RV32I-NEXT: srli a6, a7, 8
+; RV32I-NEXT: sb a6, 17(a2)
+; RV32I-NEXT: srli a6, a5, 24
+; RV32I-NEXT: sb a6, 23(a2)
+; RV32I-NEXT: srli a6, a5, 16
+; RV32I-NEXT: sb a6, 22(a2)
; RV32I-NEXT: srli a5, a5, 8
-; RV32I-NEXT: sb a5, 9(a2)
+; RV32I-NEXT: sb a5, 21(a2)
+; RV32I-NEXT: srli a5, a4, 24
+; RV32I-NEXT: sb a5, 11(a2)
+; RV32I-NEXT: srli a5, a4, 16
+; RV32I-NEXT: sb a5, 10(a2)
+; RV32I-NEXT: srli a4, a4, 8
+; RV32I-NEXT: sb a4, 9(a2)
; RV32I-NEXT: srli a4, a3, 24
; RV32I-NEXT: sb a4, 15(a2)
; RV32I-NEXT: srli a4, a3, 16
diff --git a/llvm/test/CodeGen/RISCV/xtheadmempair.ll b/llvm/test/CodeGen/RISCV/xtheadmempair.ll
index 333fd4c0472427..3a74bb66d9ec25 100644
--- a/llvm/test/CodeGen/RISCV/xtheadmempair.ll
+++ b/llvm/test/CodeGen/RISCV/xtheadmempair.ll
@@ -57,14 +57,14 @@ define i64 @lwud(ptr %a) {
define i64 @ldd(ptr %a) {
; RV32XTHEADMEMPAIR-LABEL: ldd:
; RV32XTHEADMEMPAIR: # %bb.0:
-; RV32XTHEADMEMPAIR-NEXT: lw a1, 32(a0)
-; RV32XTHEADMEMPAIR-NEXT: lw a2, 36(a0)
-; RV32XTHEADMEMPAIR-NEXT: lw a3, 44(a0)
+; RV32XTHEADMEMPAIR-NEXT: lw a1, 44(a0)
+; RV32XTHEADMEMPAIR-NEXT: lw a2, 32(a0)
+; RV32XTHEADMEMPAIR-NEXT: lw a3, 36(a0)
; RV32XTHEADMEMPAIR-NEXT: lw a0, 40(a0)
-; RV32XTHEADMEMPAIR-NEXT: add a2, a2, a3
-; RV32XTHEADMEMPAIR-NEXT: add a0, a1, a0
-; RV32XTHEADMEMPAIR-NEXT: sltu a1, a0, a1
-; RV32XTHEADMEMPAIR-NEXT: add a1, a2, a1
+; RV32XTHEADMEMPAIR-NEXT: add a1, a3, a1
+; RV32XTHEADMEMPAIR-NEXT: add a0, a2, a0
+; RV32XTHEADMEMPAIR-NEXT: sltu a2, a0, a2
+; RV32XTHEADMEMPAIR-NEXT: add a1, a1, a2
; RV32XTHEADMEMPAIR-NEXT: ret
;
; RV64XTHEADMEMPAIR-LABEL: ldd:
More information about the llvm-commits
mailing list