[llvm] [RISCV] Enable (non trivial) remat for most scalar instructions (PR #162311)

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 7 09:03:56 PDT 2025


https://github.com/preames created https://github.com/llvm/llvm-project/pull/162311

This is a follow up to the recent infrastructure work for to generally support non-trivial rematerialization.  This is the first in a small series to enable non-trivially agressively for the RISC-V backend.  It deliberately avoids both vector instructions and loads as those seem most likely to expose unexpected interactions.

Note that this isn't ready to land just yet.  We need to collect both compile time (in progress), and more perf numbers/stats on at least e.g. spec2017/test-suite.  I'm posting it mostly as a placeholder since multiple people were talking about this and I want us to avoid duplicating work.

>From 9a097cf9731c089644a3bf13ace4aa8ea63b5c9f Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Sat, 30 Aug 2025 12:36:12 -0700
Subject: [PATCH] [RISCV] Enable (non trivial) remat for most scalar
 instructions

This is a follow up to the recent infrastructure work for to
generally support non-trivial rematerialization.  This is the first
in a small series to enable non-trivially agressively for the
RISC-V backend.  It deliberately avoids both vector instructions
and loads as those seem most likely to expose unexpected
interactions.

Note that this isn't ready to land just yet.  We need to collect
both compile time (in progress), and more perf numbers/stats on
at least e.g. spec2017/test-suite.  I'm posting it mostly as
a placeholder since multiple people were talking about this and
I want us to avoid duplicating work.
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.td       |   20 +-
 ...lar-shift-by-byte-multiple-legalization.ll | 8925 +++++++++--------
 llvm/test/CodeGen/RISCV/add-before-shl.ll     |   10 +-
 llvm/test/CodeGen/RISCV/pr69586.ll            |  283 +-
 .../RISCV/rvv/nontemporal-vp-scalable.ll      |  410 +-
 .../RISCV/rvv/vxrm-insert-out-of-loop.ll      |   42 +-
 6 files changed, 4893 insertions(+), 4797 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 9855c47a63392..f1ac3a5b7e9a5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -780,21 +780,18 @@ def SB : Store_rri<0b000, "sb">, Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
 def SH : Store_rri<0b001, "sh">, Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
 def SW : Store_rri<0b010, "sw">, Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
 
-// ADDI isn't always rematerializable, but isReMaterializable will be used as
-// a hint which is verified in isReMaterializableImpl.
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
 def ADDI  : ALU_ri<0b000, "addi">;
+def XORI  : ALU_ri<0b100, "xori">;
+def ORI   : ALU_ri<0b110, "ori">;
+}
 
-let IsSignExtendingOpW = 1 in {
+let IsSignExtendingOpW = 1, isReMaterializable = 1 in {
 def SLTI  : ALU_ri<0b010, "slti">;
 def SLTIU : ALU_ri<0b011, "sltiu">;
 }
 
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-def XORI  : ALU_ri<0b100, "xori">;
-def ORI   : ALU_ri<0b110, "ori">;
-}
-
+let isReMaterializable = 1 in {
 def ANDI  : ALU_ri<0b111, "andi">;
 
 def SLLI : Shift_ri<0b00000, 0b001, "slli">,
@@ -826,6 +823,7 @@ def OR   : ALU_rr<0b0000000, 0b110, "or", Commutable=1>,
            Sched<[WriteIALU, ReadIALU, ReadIALU]>;
 def AND  : ALU_rr<0b0000000, 0b111, "and", Commutable=1>,
            Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+}
 
 let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
 def FENCE : RVInstI<0b000, OPC_MISC_MEM, (outs),
@@ -893,7 +891,7 @@ def LWU   : Load_ri<0b110, "lwu">, Sched<[WriteLDW, ReadMemBase]>;
 def LD    : Load_ri<0b011, "ld">, Sched<[WriteLDD, ReadMemBase]>;
 def SD    : Store_rri<0b011, "sd">, Sched<[WriteSTD, ReadStoreData, ReadMemBase]>;
 
-let IsSignExtendingOpW = 1 in {
+let IsSignExtendingOpW = 1, isReMaterializable = 1 in {
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 def ADDIW : RVInstI<0b000, OPC_OP_IMM_32, (outs GPR:$rd),
                     (ins GPR:$rs1, simm12_lo:$imm12),
@@ -917,7 +915,7 @@ def SRLW  : ALUW_rr<0b0000000, 0b101, "srlw">,
             Sched<[WriteShiftReg32, ReadShiftReg32, ReadShiftReg32]>;
 def SRAW  : ALUW_rr<0b0100000, 0b101, "sraw">,
             Sched<[WriteShiftReg32, ReadShiftReg32, ReadShiftReg32]>;
-} // IsSignExtendingOpW = 1
+} // IsSignExtendingOpW = 1, isReMaterializable = 1
 } // Predicates = [IsRV64]
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll
index ca9f7637388f7..74c31a229dad4 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll
@@ -3000,9 +3000,9 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    sw s9, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s10, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s11, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    li a4, 0
+; RV32I-NEXT:    li a5, 0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    lbu a5, 1(a0)
+; RV32I-NEXT:    lbu a4, 1(a0)
 ; RV32I-NEXT:    lbu a6, 2(a0)
 ; RV32I-NEXT:    lbu a7, 3(a0)
 ; RV32I-NEXT:    lbu t0, 4(a0)
@@ -3013,736 +3013,750 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    lbu t5, 9(a0)
 ; RV32I-NEXT:    lbu t6, 10(a0)
 ; RV32I-NEXT:    lbu s0, 11(a0)
-; RV32I-NEXT:    slli a5, a5, 8
+; RV32I-NEXT:    slli a4, a4, 8
 ; RV32I-NEXT:    slli a7, a7, 8
 ; RV32I-NEXT:    slli t1, t1, 8
-; RV32I-NEXT:    or a3, a5, a3
-; RV32I-NEXT:    or a7, a7, a6
-; RV32I-NEXT:    or t1, t1, t0
-; RV32I-NEXT:    lbu a6, 13(a0)
-; RV32I-NEXT:    lbu a5, 14(a0)
-; RV32I-NEXT:    lbu s1, 15(a0)
+; RV32I-NEXT:    or a3, a4, a3
+; RV32I-NEXT:    or a4, a7, a6
+; RV32I-NEXT:    or a7, t1, t0
+; RV32I-NEXT:    lbu t0, 13(a0)
+; RV32I-NEXT:    lbu a6, 14(a0)
+; RV32I-NEXT:    lbu t1, 15(a0)
 ; RV32I-NEXT:    slli t3, t3, 8
 ; RV32I-NEXT:    slli t5, t5, 8
 ; RV32I-NEXT:    slli s0, s0, 8
-; RV32I-NEXT:    or t3, t3, t2
-; RV32I-NEXT:    or t0, t5, t4
-; RV32I-NEXT:    or t5, s0, t6
-; RV32I-NEXT:    lbu t2, 1(a1)
-; RV32I-NEXT:    lbu t4, 0(a1)
+; RV32I-NEXT:    or s1, t3, t2
+; RV32I-NEXT:    or t2, t5, t4
+; RV32I-NEXT:    or t4, s0, t6
+; RV32I-NEXT:    lbu t3, 1(a1)
+; RV32I-NEXT:    lbu t5, 0(a1)
 ; RV32I-NEXT:    lbu t6, 2(a1)
 ; RV32I-NEXT:    lbu a1, 3(a1)
-; RV32I-NEXT:    slli t2, t2, 8
-; RV32I-NEXT:    or s0, t2, t4
-; RV32I-NEXT:    slli t2, s1, 8
+; RV32I-NEXT:    slli t3, t3, 8
+; RV32I-NEXT:    or t5, t3, t5
+; RV32I-NEXT:    slli t3, t1, 8
 ; RV32I-NEXT:    slli a1, a1, 8
 ; RV32I-NEXT:    or a1, a1, t6
-; RV32I-NEXT:    slli t4, a7, 16
-; RV32I-NEXT:    slli a7, t3, 16
-; RV32I-NEXT:    slli t3, t5, 16
-; RV32I-NEXT:    slli t5, a1, 16
-; RV32I-NEXT:    or a1, a7, t1
-; RV32I-NEXT:    or a7, t5, s0
+; RV32I-NEXT:    slli a4, a4, 16
+; RV32I-NEXT:    slli s1, s1, 16
+; RV32I-NEXT:    slli t4, t4, 16
+; RV32I-NEXT:    slli t1, a1, 16
+; RV32I-NEXT:    or s5, s1, a7
+; RV32I-NEXT:    or a7, t1, t5
 ; RV32I-NEXT:    slli a7, a7, 3
 ; RV32I-NEXT:    srli t1, a7, 5
 ; RV32I-NEXT:    andi t5, a7, 31
 ; RV32I-NEXT:    neg s3, t5
 ; RV32I-NEXT:    beqz t5, .LBB12_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sll a4, a1, s3
+; RV32I-NEXT:    sll a5, s5, s3
 ; RV32I-NEXT:  .LBB12_2:
-; RV32I-NEXT:    or s7, t4, a3
-; RV32I-NEXT:    lbu t4, 12(a0)
-; RV32I-NEXT:    lbu t6, 19(a0)
-; RV32I-NEXT:    slli s1, a6, 8
-; RV32I-NEXT:    or a5, t2, a5
-; RV32I-NEXT:    or a3, t3, t0
+; RV32I-NEXT:    or a4, a4, a3
+; RV32I-NEXT:    lbu t6, 12(a0)
+; RV32I-NEXT:    lbu s0, 19(a0)
+; RV32I-NEXT:    slli s1, t0, 8
+; RV32I-NEXT:    or t0, t3, a6
+; RV32I-NEXT:    or a1, t4, t2
 ; RV32I-NEXT:    beqz t1, .LBB12_4
 ; RV32I-NEXT:  # %bb.3:
-; RV32I-NEXT:    li s0, 0
+; RV32I-NEXT:    mv s11, a4
+; RV32I-NEXT:    li a4, 0
 ; RV32I-NEXT:    j .LBB12_5
 ; RV32I-NEXT:  .LBB12_4:
-; RV32I-NEXT:    srl s0, s7, a7
-; RV32I-NEXT:    or s0, s0, a4
+; RV32I-NEXT:    mv s11, a4
+; RV32I-NEXT:    srl a6, a4, a7
+; RV32I-NEXT:    or a4, a6, a5
 ; RV32I-NEXT:  .LBB12_5:
 ; RV32I-NEXT:    li a6, 0
-; RV32I-NEXT:    lbu t0, 17(a0)
-; RV32I-NEXT:    lbu a4, 18(a0)
-; RV32I-NEXT:    slli s4, t6, 8
-; RV32I-NEXT:    or s2, s1, t4
-; RV32I-NEXT:    slli a5, a5, 16
-; RV32I-NEXT:    li s5, 1
-; RV32I-NEXT:    sll t6, a3, s3
+; RV32I-NEXT:    lbu s2, 17(a0)
+; RV32I-NEXT:    lbu a5, 18(a0)
+; RV32I-NEXT:    slli s4, s0, 8
+; RV32I-NEXT:    or s1, s1, t6
+; RV32I-NEXT:    slli t0, t0, 16
+; RV32I-NEXT:    li t3, 1
+; RV32I-NEXT:    sll s6, a1, s3
 ; RV32I-NEXT:    beqz t5, .LBB12_7
 ; RV32I-NEXT:  # %bb.6:
-; RV32I-NEXT:    mv a6, t6
+; RV32I-NEXT:    mv a6, s6
 ; RV32I-NEXT:  .LBB12_7:
 ; RV32I-NEXT:    lbu t2, 16(a0)
-; RV32I-NEXT:    lbu t3, 23(a0)
-; RV32I-NEXT:    slli s1, t0, 8
-; RV32I-NEXT:    or t4, s4, a4
-; RV32I-NEXT:    srl a4, a1, a7
-; RV32I-NEXT:    or a5, a5, s2
-; RV32I-NEXT:    bne t1, s5, .LBB12_9
+; RV32I-NEXT:    lbu t4, 23(a0)
+; RV32I-NEXT:    slli s0, s2, 8
+; RV32I-NEXT:    or t6, s4, a5
+; RV32I-NEXT:    srl a3, s5, a7
+; RV32I-NEXT:    or a5, t0, s1
+; RV32I-NEXT:    sw a3, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    bne t1, t3, .LBB12_9
 ; RV32I-NEXT:  # %bb.8:
-; RV32I-NEXT:    or s0, a4, a6
+; RV32I-NEXT:    or a4, a3, a6
 ; RV32I-NEXT:  .LBB12_9:
 ; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    lbu s5, 21(a0)
+; RV32I-NEXT:    lbu s2, 21(a0)
 ; RV32I-NEXT:    lbu a6, 22(a0)
-; RV32I-NEXT:    slli s4, t3, 8
-; RV32I-NEXT:    or t2, s1, t2
-; RV32I-NEXT:    slli s6, t4, 16
-; RV32I-NEXT:    li s8, 2
-; RV32I-NEXT:    sll t3, a5, s3
+; RV32I-NEXT:    slli s1, t4, 8
+; RV32I-NEXT:    or t2, s0, t2
+; RV32I-NEXT:    slli s4, t6, 16
+; RV32I-NEXT:    li a3, 2
+; RV32I-NEXT:    sll s8, a5, s3
 ; RV32I-NEXT:    beqz t5, .LBB12_11
 ; RV32I-NEXT:  # %bb.10:
-; RV32I-NEXT:    mv t0, t3
+; RV32I-NEXT:    mv t0, s8
 ; RV32I-NEXT:  .LBB12_11:
-; RV32I-NEXT:    lbu s1, 20(a0)
-; RV32I-NEXT:    lbu s2, 27(a0)
-; RV32I-NEXT:    slli s5, s5, 8
-; RV32I-NEXT:    or s4, s4, a6
-; RV32I-NEXT:    srl t4, a3, a7
-; RV32I-NEXT:    or a6, s6, t2
-; RV32I-NEXT:    bne t1, s8, .LBB12_13
+; RV32I-NEXT:    lbu t6, 20(a0)
+; RV32I-NEXT:    lbu s0, 27(a0)
+; RV32I-NEXT:    slli s2, s2, 8
+; RV32I-NEXT:    or s1, s1, a6
+; RV32I-NEXT:    srl t3, a1, a7
+; RV32I-NEXT:    or a6, s4, t2
+; RV32I-NEXT:    sw s5, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    bne t1, a3, .LBB12_13
 ; RV32I-NEXT:  # %bb.12:
-; RV32I-NEXT:    or s0, t4, t0
+; RV32I-NEXT:    or a4, t3, t0
 ; RV32I-NEXT:  .LBB12_13:
-; RV32I-NEXT:    sw s7, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    li t2, 0
-; RV32I-NEXT:    lbu s6, 25(a0)
+; RV32I-NEXT:    lbu s4, 25(a0)
 ; RV32I-NEXT:    lbu t0, 26(a0)
-; RV32I-NEXT:    slli s8, s2, 8
-; RV32I-NEXT:    or s7, s5, s1
-; RV32I-NEXT:    slli s9, s4, 16
-; RV32I-NEXT:    sll s11, a6, s3
+; RV32I-NEXT:    slli s7, s0, 8
+; RV32I-NEXT:    or s5, s2, t6
+; RV32I-NEXT:    slli s9, s1, 16
+; RV32I-NEXT:    li t6, 3
+; RV32I-NEXT:    sll t4, a6, s3
 ; RV32I-NEXT:    beqz t5, .LBB12_15
 ; RV32I-NEXT:  # %bb.14:
-; RV32I-NEXT:    mv t2, s11
+; RV32I-NEXT:    mv t2, t4
 ; RV32I-NEXT:  .LBB12_15:
-; RV32I-NEXT:    lbu s1, 24(a0)
-; RV32I-NEXT:    lbu s2, 31(a0)
-; RV32I-NEXT:    slli s5, s6, 8
-; RV32I-NEXT:    or s4, s8, t0
-; RV32I-NEXT:    srl ra, a5, a7
-; RV32I-NEXT:    or t0, s9, s7
-; RV32I-NEXT:    li s6, 3
-; RV32I-NEXT:    bne t1, s6, .LBB12_17
+; RV32I-NEXT:    lbu s0, 24(a0)
+; RV32I-NEXT:    lbu s1, 31(a0)
+; RV32I-NEXT:    slli s4, s4, 8
+; RV32I-NEXT:    or s2, s7, t0
+; RV32I-NEXT:    srl a3, a5, a7
+; RV32I-NEXT:    or t0, s9, s5
+; RV32I-NEXT:    li s9, 3
+; RV32I-NEXT:    bne t1, t6, .LBB12_17
 ; RV32I-NEXT:  # %bb.16:
-; RV32I-NEXT:    or s0, ra, t2
+; RV32I-NEXT:    or a4, a3, t2
 ; RV32I-NEXT:  .LBB12_17:
+; RV32I-NEXT:    mv t6, t3
 ; RV32I-NEXT:    li t2, 0
 ; RV32I-NEXT:    lbu s7, 29(a0)
-; RV32I-NEXT:    lbu s6, 30(a0)
-; RV32I-NEXT:    slli s8, s2, 8
-; RV32I-NEXT:    or s2, s5, s1
-; RV32I-NEXT:    slli s5, s4, 16
-; RV32I-NEXT:    li s9, 4
-; RV32I-NEXT:    sll s1, t0, s3
-; RV32I-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lbu s5, 30(a0)
+; RV32I-NEXT:    slli s1, s1, 8
+; RV32I-NEXT:    or s10, s4, s0
+; RV32I-NEXT:    slli s2, s2, 16
+; RV32I-NEXT:    li a3, 4
+; RV32I-NEXT:    sll s0, t0, s3
 ; RV32I-NEXT:    beqz t5, .LBB12_19
 ; RV32I-NEXT:  # %bb.18:
-; RV32I-NEXT:    lw t2, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv t2, s0
 ; RV32I-NEXT:  .LBB12_19:
-; RV32I-NEXT:    lbu s1, 28(a0)
+; RV32I-NEXT:    lbu t3, 28(a0)
 ; RV32I-NEXT:    slli s7, s7, 8
-; RV32I-NEXT:    or s4, s8, s6
-; RV32I-NEXT:    srl s10, a6, a7
-; RV32I-NEXT:    or a0, s5, s2
-; RV32I-NEXT:    bne t1, s9, .LBB12_21
+; RV32I-NEXT:    or s4, s1, s5
+; RV32I-NEXT:    srl s1, a6, a7
+; RV32I-NEXT:    or a0, s2, s10
+; RV32I-NEXT:    beq t1, a3, .LBB12_21
 ; RV32I-NEXT:  # %bb.20:
-; RV32I-NEXT:    or s0, s10, t2
+; RV32I-NEXT:    mv a3, s1
+; RV32I-NEXT:    j .LBB12_22
 ; RV32I-NEXT:  .LBB12_21:
+; RV32I-NEXT:    mv a3, s1
+; RV32I-NEXT:    or a4, s1, t2
+; RV32I-NEXT:  .LBB12_22:
+; RV32I-NEXT:    li s10, 1
 ; RV32I-NEXT:    li s2, 0
-; RV32I-NEXT:    or t2, s7, s1
+; RV32I-NEXT:    or t2, s7, t3
 ; RV32I-NEXT:    slli s4, s4, 16
-; RV32I-NEXT:    li s9, 5
+; RV32I-NEXT:    li s1, 5
 ; RV32I-NEXT:    sll s7, a0, s3
-; RV32I-NEXT:    beqz t5, .LBB12_23
-; RV32I-NEXT:  # %bb.22:
+; RV32I-NEXT:    beqz t5, .LBB12_24
+; RV32I-NEXT:  # %bb.23:
 ; RV32I-NEXT:    mv s2, s7
-; RV32I-NEXT:  .LBB12_23:
-; RV32I-NEXT:    srl s8, t0, a7
+; RV32I-NEXT:  .LBB12_24:
+; RV32I-NEXT:    sw a1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    srl t3, t0, a7
 ; RV32I-NEXT:    or t2, s4, t2
-; RV32I-NEXT:    bne t1, s9, .LBB12_25
-; RV32I-NEXT:  # %bb.24:
-; RV32I-NEXT:    or s0, s8, s2
-; RV32I-NEXT:  .LBB12_25:
-; RV32I-NEXT:    li s4, 0
+; RV32I-NEXT:    beq t1, s1, .LBB12_26
+; RV32I-NEXT:  # %bb.25:
+; RV32I-NEXT:    mv a1, t3
+; RV32I-NEXT:    j .LBB12_27
+; RV32I-NEXT:  .LBB12_26:
+; RV32I-NEXT:    mv a1, t3
+; RV32I-NEXT:    or a4, t3, s2
+; RV32I-NEXT:  .LBB12_27:
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    li s2, 6
 ; RV32I-NEXT:    sll s5, t2, s3
-; RV32I-NEXT:    beqz t5, .LBB12_27
-; RV32I-NEXT:  # %bb.26:
-; RV32I-NEXT:    mv s4, s5
-; RV32I-NEXT:  .LBB12_27:
-; RV32I-NEXT:    srl s6, a0, a7
-; RV32I-NEXT:    bne t1, s2, .LBB12_29
+; RV32I-NEXT:    beqz t5, .LBB12_29
 ; RV32I-NEXT:  # %bb.28:
-; RV32I-NEXT:    or s0, s6, s4
+; RV32I-NEXT:    mv t3, s5
 ; RV32I-NEXT:  .LBB12_29:
-; RV32I-NEXT:    li s3, 7
-; RV32I-NEXT:    srl s1, t2, a7
-; RV32I-NEXT:    mv s4, s1
-; RV32I-NEXT:    bne t1, s3, .LBB12_34
+; RV32I-NEXT:    srl s3, a0, a7
+; RV32I-NEXT:    beq t1, s2, .LBB12_31
 ; RV32I-NEXT:  # %bb.30:
-; RV32I-NEXT:    bnez a7, .LBB12_35
+; RV32I-NEXT:    mv ra, s3
+; RV32I-NEXT:    j .LBB12_32
 ; RV32I-NEXT:  .LBB12_31:
-; RV32I-NEXT:    li s0, 0
-; RV32I-NEXT:    bnez t5, .LBB12_36
+; RV32I-NEXT:    mv ra, s3
+; RV32I-NEXT:    or a4, s3, t3
 ; RV32I-NEXT:  .LBB12_32:
-; RV32I-NEXT:    li s4, 2
-; RV32I-NEXT:    beqz t1, .LBB12_37
-; RV32I-NEXT:  .LBB12_33:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    j .LBB12_38
+; RV32I-NEXT:    li s3, 7
+; RV32I-NEXT:    srl s4, t2, a7
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, s3, .LBB12_34
+; RV32I-NEXT:  # %bb.33:
+; RV32I-NEXT:    mv t3, a4
 ; RV32I-NEXT:  .LBB12_34:
-; RV32I-NEXT:    mv s4, s0
-; RV32I-NEXT:    beqz a7, .LBB12_31
-; RV32I-NEXT:  .LBB12_35:
-; RV32I-NEXT:    sw s4, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    li s0, 0
-; RV32I-NEXT:    beqz t5, .LBB12_32
+; RV32I-NEXT:    mv a4, s11
+; RV32I-NEXT:    beqz a7, .LBB12_36
+; RV32I-NEXT:  # %bb.35:
+; RV32I-NEXT:    mv a4, t3
 ; RV32I-NEXT:  .LBB12_36:
-; RV32I-NEXT:    mv s0, t6
-; RV32I-NEXT:    li s4, 2
-; RV32I-NEXT:    bnez t1, .LBB12_33
-; RV32I-NEXT:  .LBB12_37:
-; RV32I-NEXT:    or a4, a4, s0
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    li s11, 2
+; RV32I-NEXT:    beqz t5, .LBB12_38
+; RV32I-NEXT:  # %bb.37:
+; RV32I-NEXT:    mv t3, s6
 ; RV32I-NEXT:  .LBB12_38:
-; RV32I-NEXT:    li s0, 1
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    bnez t5, .LBB12_57
+; RV32I-NEXT:    beqz t1, .LBB12_40
 ; RV32I-NEXT:  # %bb.39:
-; RV32I-NEXT:    beq t1, s0, .LBB12_58
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB12_41
+; RV32I-NEXT:    j .LBB12_42
 ; RV32I-NEXT:  .LBB12_40:
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    bnez t5, .LBB12_59
+; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or s6, s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB12_42
 ; RV32I-NEXT:  .LBB12_41:
-; RV32I-NEXT:    beq t1, s4, .LBB12_60
+; RV32I-NEXT:    mv t3, s8
 ; RV32I-NEXT:  .LBB12_42:
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    bnez t5, .LBB12_61
-; RV32I-NEXT:  .LBB12_43:
-; RV32I-NEXT:    li s4, 3
-; RV32I-NEXT:    bne t1, s4, .LBB12_45
+; RV32I-NEXT:    beq t1, s10, .LBB12_58
+; RV32I-NEXT:  # %bb.43:
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB12_59
 ; RV32I-NEXT:  .LBB12_44:
-; RV32I-NEXT:    or a4, s10, t6
+; RV32I-NEXT:    beq t1, s11, .LBB12_60
 ; RV32I-NEXT:  .LBB12_45:
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    li s4, 4
-; RV32I-NEXT:    bnez t5, .LBB12_62
-; RV32I-NEXT:  # %bb.46:
-; RV32I-NEXT:    beq t1, s4, .LBB12_63
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB12_61
+; RV32I-NEXT:  .LBB12_46:
+; RV32I-NEXT:    bne t1, s9, .LBB12_48
 ; RV32I-NEXT:  .LBB12_47:
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    bnez t5, .LBB12_64
+; RV32I-NEXT:    or s6, a3, t3
 ; RV32I-NEXT:  .LBB12_48:
-; RV32I-NEXT:    beq t1, s9, .LBB12_65
-; RV32I-NEXT:  .LBB12_49:
-; RV32I-NEXT:    mv t6, s1
-; RV32I-NEXT:    bne t1, s2, .LBB12_66
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    li s9, 4
+; RV32I-NEXT:    bnez t5, .LBB12_62
+; RV32I-NEXT:  # %bb.49:
+; RV32I-NEXT:    beq t1, s9, .LBB12_63
 ; RV32I-NEXT:  .LBB12_50:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s3, .LBB12_67
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB12_64
 ; RV32I-NEXT:  .LBB12_51:
-; RV32I-NEXT:    beqz a7, .LBB12_53
+; RV32I-NEXT:    beq t1, s1, .LBB12_65
 ; RV32I-NEXT:  .LBB12_52:
-; RV32I-NEXT:    mv a1, a4
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    bne t1, s2, .LBB12_66
 ; RV32I-NEXT:  .LBB12_53:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    li t6, 2
-; RV32I-NEXT:    beqz t5, .LBB12_55
-; RV32I-NEXT:  # %bb.54:
-; RV32I-NEXT:    mv a4, t3
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s3, .LBB12_67
+; RV32I-NEXT:  .LBB12_54:
+; RV32I-NEXT:    bnez a7, .LBB12_68
 ; RV32I-NEXT:  .LBB12_55:
-; RV32I-NEXT:    beqz t1, .LBB12_68
-; RV32I-NEXT:  # %bb.56:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    j .LBB12_69
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB12_69
+; RV32I-NEXT:  .LBB12_56:
+; RV32I-NEXT:    beqz t1, .LBB12_70
 ; RV32I-NEXT:  .LBB12_57:
-; RV32I-NEXT:    mv t6, t3
-; RV32I-NEXT:    bne t1, s0, .LBB12_40
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    j .LBB12_71
 ; RV32I-NEXT:  .LBB12_58:
-; RV32I-NEXT:    or a4, t4, t6
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    beqz t5, .LBB12_41
+; RV32I-NEXT:    or s6, t6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB12_44
 ; RV32I-NEXT:  .LBB12_59:
-; RV32I-NEXT:    mv t6, s11
-; RV32I-NEXT:    bne t1, s4, .LBB12_42
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    bne t1, s11, .LBB12_45
 ; RV32I-NEXT:  .LBB12_60:
-; RV32I-NEXT:    or a4, ra, t6
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    beqz t5, .LBB12_43
+; RV32I-NEXT:    srl s6, a5, a7
+; RV32I-NEXT:    or s6, s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB12_46
 ; RV32I-NEXT:  .LBB12_61:
-; RV32I-NEXT:    lw t6, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    li s4, 3
-; RV32I-NEXT:    beq t1, s4, .LBB12_44
-; RV32I-NEXT:    j .LBB12_45
+; RV32I-NEXT:    mv t3, s0
+; RV32I-NEXT:    beq t1, s9, .LBB12_47
+; RV32I-NEXT:    j .LBB12_48
 ; RV32I-NEXT:  .LBB12_62:
-; RV32I-NEXT:    mv t6, s7
-; RV32I-NEXT:    bne t1, s4, .LBB12_47
+; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:    bne t1, s9, .LBB12_50
 ; RV32I-NEXT:  .LBB12_63:
-; RV32I-NEXT:    or a4, s8, t6
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    beqz t5, .LBB12_48
+; RV32I-NEXT:    or s6, a1, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB12_51
 ; RV32I-NEXT:  .LBB12_64:
-; RV32I-NEXT:    mv t6, s5
-; RV32I-NEXT:    bne t1, s9, .LBB12_49
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    bne t1, s1, .LBB12_52
 ; RV32I-NEXT:  .LBB12_65:
-; RV32I-NEXT:    or a4, s6, t6
-; RV32I-NEXT:    mv t6, s1
-; RV32I-NEXT:    beq t1, s2, .LBB12_50
+; RV32I-NEXT:    or s6, ra, t3
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, s2, .LBB12_53
 ; RV32I-NEXT:  .LBB12_66:
-; RV32I-NEXT:    mv t6, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s3, .LBB12_51
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s3, .LBB12_54
 ; RV32I-NEXT:  .LBB12_67:
-; RV32I-NEXT:    mv a4, t6
-; RV32I-NEXT:    bnez a7, .LBB12_52
-; RV32I-NEXT:    j .LBB12_53
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    beqz a7, .LBB12_55
 ; RV32I-NEXT:  .LBB12_68:
-; RV32I-NEXT:    or a4, t4, a4
-; RV32I-NEXT:  .LBB12_69:
-; RV32I-NEXT:    li t4, 3
+; RV32I-NEXT:    sw s6, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB12_84
-; RV32I-NEXT:  # %bb.70:
-; RV32I-NEXT:    beq t1, s0, .LBB12_85
+; RV32I-NEXT:    beqz t5, .LBB12_56
+; RV32I-NEXT:  .LBB12_69:
+; RV32I-NEXT:    mv t3, s8
+; RV32I-NEXT:    bnez t1, .LBB12_57
+; RV32I-NEXT:  .LBB12_70:
+; RV32I-NEXT:    or s6, t6, t3
 ; RV32I-NEXT:  .LBB12_71:
+; RV32I-NEXT:    li t6, 3
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bnez t5, .LBB12_86
-; RV32I-NEXT:  .LBB12_72:
-; RV32I-NEXT:    beq t1, t6, .LBB12_87
+; RV32I-NEXT:  # %bb.72:
+; RV32I-NEXT:    beq t1, s10, .LBB12_87
 ; RV32I-NEXT:  .LBB12_73:
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bnez t5, .LBB12_88
 ; RV32I-NEXT:  .LBB12_74:
-; RV32I-NEXT:    beq t1, t4, .LBB12_89
+; RV32I-NEXT:    beq t1, s11, .LBB12_89
 ; RV32I-NEXT:  .LBB12_75:
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bnez t5, .LBB12_90
 ; RV32I-NEXT:  .LBB12_76:
-; RV32I-NEXT:    beq t1, s4, .LBB12_91
+; RV32I-NEXT:    beq t1, t6, .LBB12_91
 ; RV32I-NEXT:  .LBB12_77:
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    bne t1, s9, .LBB12_92
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB12_92
 ; RV32I-NEXT:  .LBB12_78:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s2, .LBB12_93
+; RV32I-NEXT:    beq t1, s9, .LBB12_93
 ; RV32I-NEXT:  .LBB12_79:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s3, .LBB12_94
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    bne t1, s1, .LBB12_94
 ; RV32I-NEXT:  .LBB12_80:
-; RV32I-NEXT:    bnez a7, .LBB12_95
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s2, .LBB12_95
 ; RV32I-NEXT:  .LBB12_81:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bnez t5, .LBB12_96
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bne t1, s3, .LBB12_96
 ; RV32I-NEXT:  .LBB12_82:
-; RV32I-NEXT:    beqz t1, .LBB12_97
+; RV32I-NEXT:    bnez a7, .LBB12_97
 ; RV32I-NEXT:  .LBB12_83:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    j .LBB12_98
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB12_98
 ; RV32I-NEXT:  .LBB12_84:
-; RV32I-NEXT:    mv t3, s11
-; RV32I-NEXT:    bne t1, s0, .LBB12_71
+; RV32I-NEXT:    beqz t1, .LBB12_99
 ; RV32I-NEXT:  .LBB12_85:
-; RV32I-NEXT:    or a4, ra, t3
+; RV32I-NEXT:    li s6, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beqz t5, .LBB12_72
+; RV32I-NEXT:    bnez t5, .LBB12_100
+; RV32I-NEXT:    j .LBB12_101
 ; RV32I-NEXT:  .LBB12_86:
-; RV32I-NEXT:    lw t3, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne t1, t6, .LBB12_73
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    bne t1, s10, .LBB12_73
 ; RV32I-NEXT:  .LBB12_87:
-; RV32I-NEXT:    or a4, s10, t3
+; RV32I-NEXT:    srl s6, a5, a7
+; RV32I-NEXT:    or s6, s6, t3
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t5, .LBB12_74
 ; RV32I-NEXT:  .LBB12_88:
-; RV32I-NEXT:    mv t3, s7
-; RV32I-NEXT:    bne t1, t4, .LBB12_75
+; RV32I-NEXT:    mv t3, s0
+; RV32I-NEXT:    bne t1, s11, .LBB12_75
 ; RV32I-NEXT:  .LBB12_89:
-; RV32I-NEXT:    or a4, s8, t3
+; RV32I-NEXT:    or s6, a3, t3
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t5, .LBB12_76
 ; RV32I-NEXT:  .LBB12_90:
-; RV32I-NEXT:    mv t3, s5
-; RV32I-NEXT:    bne t1, s4, .LBB12_77
+; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:    bne t1, t6, .LBB12_77
 ; RV32I-NEXT:  .LBB12_91:
-; RV32I-NEXT:    or a4, s6, t3
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    beq t1, s9, .LBB12_78
+; RV32I-NEXT:    or s6, a1, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB12_78
 ; RV32I-NEXT:  .LBB12_92:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s2, .LBB12_79
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    bne t1, s9, .LBB12_79
 ; RV32I-NEXT:  .LBB12_93:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s3, .LBB12_80
+; RV32I-NEXT:    or s6, ra, t3
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, s1, .LBB12_80
 ; RV32I-NEXT:  .LBB12_94:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    beqz a7, .LBB12_81
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s2, .LBB12_81
 ; RV32I-NEXT:  .LBB12_95:
-; RV32I-NEXT:    mv a3, t3
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beqz t5, .LBB12_82
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s3, .LBB12_82
 ; RV32I-NEXT:  .LBB12_96:
-; RV32I-NEXT:    mv a4, s11
-; RV32I-NEXT:    bnez t1, .LBB12_83
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    beqz a7, .LBB12_83
 ; RV32I-NEXT:  .LBB12_97:
-; RV32I-NEXT:    or a4, ra, a4
+; RV32I-NEXT:    sw t3, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB12_84
 ; RV32I-NEXT:  .LBB12_98:
-; RV32I-NEXT:    lw ra, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    bnez t1, .LBB12_85
+; RV32I-NEXT:  .LBB12_99:
+; RV32I-NEXT:    srl t4, a5, a7
+; RV32I-NEXT:    or s6, t4, t3
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB12_112
-; RV32I-NEXT:  # %bb.99:
-; RV32I-NEXT:    beq t1, s0, .LBB12_113
+; RV32I-NEXT:    beqz t5, .LBB12_101
 ; RV32I-NEXT:  .LBB12_100:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB12_114
+; RV32I-NEXT:    mv t3, s0
 ; RV32I-NEXT:  .LBB12_101:
-; RV32I-NEXT:    beq t1, t6, .LBB12_115
-; RV32I-NEXT:  .LBB12_102:
+; RV32I-NEXT:    beq t1, s10, .LBB12_114
+; RV32I-NEXT:  # %bb.102:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB12_116
+; RV32I-NEXT:    bnez t5, .LBB12_115
 ; RV32I-NEXT:  .LBB12_103:
-; RV32I-NEXT:    beq t1, t4, .LBB12_117
+; RV32I-NEXT:    beq t1, s11, .LBB12_116
 ; RV32I-NEXT:  .LBB12_104:
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    bne t1, s4, .LBB12_118
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB12_117
 ; RV32I-NEXT:  .LBB12_105:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s9, .LBB12_119
+; RV32I-NEXT:    beq t1, t6, .LBB12_118
 ; RV32I-NEXT:  .LBB12_106:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s2, .LBB12_120
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    bne t1, s9, .LBB12_119
 ; RV32I-NEXT:  .LBB12_107:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s3, .LBB12_121
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s1, .LBB12_120
 ; RV32I-NEXT:  .LBB12_108:
-; RV32I-NEXT:    bnez a7, .LBB12_122
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bne t1, s2, .LBB12_121
 ; RV32I-NEXT:  .LBB12_109:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bnez t5, .LBB12_123
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s3, .LBB12_122
 ; RV32I-NEXT:  .LBB12_110:
-; RV32I-NEXT:    beqz t1, .LBB12_124
+; RV32I-NEXT:    bnez a7, .LBB12_123
 ; RV32I-NEXT:  .LBB12_111:
-; RV32I-NEXT:    li a4, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB12_125
-; RV32I-NEXT:    j .LBB12_126
+; RV32I-NEXT:    bnez t5, .LBB12_124
 ; RV32I-NEXT:  .LBB12_112:
-; RV32I-NEXT:    lw t3, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne t1, s0, .LBB12_100
+; RV32I-NEXT:    beqz t1, .LBB12_125
 ; RV32I-NEXT:  .LBB12_113:
-; RV32I-NEXT:    or a4, s10, t3
+; RV32I-NEXT:    li s6, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beqz t5, .LBB12_101
+; RV32I-NEXT:    bnez t5, .LBB12_126
+; RV32I-NEXT:    j .LBB12_127
 ; RV32I-NEXT:  .LBB12_114:
-; RV32I-NEXT:    mv t3, s7
-; RV32I-NEXT:    bne t1, t6, .LBB12_102
-; RV32I-NEXT:  .LBB12_115:
-; RV32I-NEXT:    or a4, s8, t3
+; RV32I-NEXT:    or s6, a3, t3
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t5, .LBB12_103
+; RV32I-NEXT:  .LBB12_115:
+; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:    bne t1, s11, .LBB12_104
 ; RV32I-NEXT:  .LBB12_116:
-; RV32I-NEXT:    mv t3, s5
-; RV32I-NEXT:    bne t1, t4, .LBB12_104
+; RV32I-NEXT:    or s6, a1, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB12_105
 ; RV32I-NEXT:  .LBB12_117:
-; RV32I-NEXT:    or a4, s6, t3
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    beq t1, s4, .LBB12_105
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    bne t1, t6, .LBB12_106
 ; RV32I-NEXT:  .LBB12_118:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s9, .LBB12_106
+; RV32I-NEXT:    or s6, ra, t3
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, s9, .LBB12_107
 ; RV32I-NEXT:  .LBB12_119:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s2, .LBB12_107
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s1, .LBB12_108
 ; RV32I-NEXT:  .LBB12_120:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s3, .LBB12_108
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s2, .LBB12_109
 ; RV32I-NEXT:  .LBB12_121:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    beqz a7, .LBB12_109
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s3, .LBB12_110
 ; RV32I-NEXT:  .LBB12_122:
-; RV32I-NEXT:    mv a5, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beqz t5, .LBB12_110
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    beqz a7, .LBB12_111
 ; RV32I-NEXT:  .LBB12_123:
-; RV32I-NEXT:    lw a4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bnez t1, .LBB12_111
-; RV32I-NEXT:  .LBB12_124:
-; RV32I-NEXT:    or a4, s10, a4
+; RV32I-NEXT:    mv a5, s6
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beqz t5, .LBB12_126
+; RV32I-NEXT:    beqz t5, .LBB12_112
+; RV32I-NEXT:  .LBB12_124:
+; RV32I-NEXT:    mv t3, s0
+; RV32I-NEXT:    bnez t1, .LBB12_113
 ; RV32I-NEXT:  .LBB12_125:
-; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:    or s6, a3, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB12_127
 ; RV32I-NEXT:  .LBB12_126:
-; RV32I-NEXT:    beq t1, s0, .LBB12_138
-; RV32I-NEXT:  # %bb.127:
+; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:  .LBB12_127:
+; RV32I-NEXT:    beq t1, s10, .LBB12_139
+; RV32I-NEXT:  # %bb.128:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB12_139
-; RV32I-NEXT:  .LBB12_128:
-; RV32I-NEXT:    beq t1, t6, .LBB12_140
+; RV32I-NEXT:    bnez t5, .LBB12_140
 ; RV32I-NEXT:  .LBB12_129:
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    bne t1, t4, .LBB12_141
+; RV32I-NEXT:    beq t1, s11, .LBB12_141
 ; RV32I-NEXT:  .LBB12_130:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s4, .LBB12_142
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    bne t1, t6, .LBB12_142
 ; RV32I-NEXT:  .LBB12_131:
-; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    li s6, 0
 ; RV32I-NEXT:    bne t1, s9, .LBB12_143
 ; RV32I-NEXT:  .LBB12_132:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s2, .LBB12_144
-; RV32I-NEXT:  .LBB12_133:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s3, .LBB12_145
+; RV32I-NEXT:    bne t1, s1, .LBB12_144
+; RV32I-NEXT:  .LBB12_133:
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s2, .LBB12_145
 ; RV32I-NEXT:  .LBB12_134:
-; RV32I-NEXT:    bnez a7, .LBB12_146
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bne t1, s3, .LBB12_146
 ; RV32I-NEXT:  .LBB12_135:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bnez t5, .LBB12_147
+; RV32I-NEXT:    bnez a7, .LBB12_147
 ; RV32I-NEXT:  .LBB12_136:
-; RV32I-NEXT:    beqz t1, .LBB12_148
-; RV32I-NEXT:  .LBB12_137:
-; RV32I-NEXT:    li a4, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB12_149
-; RV32I-NEXT:    j .LBB12_150
+; RV32I-NEXT:    bnez t5, .LBB12_148
+; RV32I-NEXT:  .LBB12_137:
+; RV32I-NEXT:    beqz t1, .LBB12_149
 ; RV32I-NEXT:  .LBB12_138:
-; RV32I-NEXT:    or a4, s8, t3
+; RV32I-NEXT:    li s6, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beqz t5, .LBB12_128
+; RV32I-NEXT:    bnez t5, .LBB12_150
+; RV32I-NEXT:    j .LBB12_151
 ; RV32I-NEXT:  .LBB12_139:
-; RV32I-NEXT:    mv t3, s5
-; RV32I-NEXT:    bne t1, t6, .LBB12_129
+; RV32I-NEXT:    or s6, a1, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB12_129
 ; RV32I-NEXT:  .LBB12_140:
-; RV32I-NEXT:    or a4, s6, t3
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    beq t1, t4, .LBB12_130
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    bne t1, s11, .LBB12_130
 ; RV32I-NEXT:  .LBB12_141:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s4, .LBB12_131
+; RV32I-NEXT:    or s6, ra, t3
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, t6, .LBB12_131
 ; RV32I-NEXT:  .LBB12_142:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
 ; RV32I-NEXT:    beq t1, s9, .LBB12_132
 ; RV32I-NEXT:  .LBB12_143:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s2, .LBB12_133
-; RV32I-NEXT:  .LBB12_144:
-; RV32I-NEXT:    mv a4, t3
+; RV32I-NEXT:    mv s6, t3
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s3, .LBB12_134
+; RV32I-NEXT:    beq t1, s1, .LBB12_133
+; RV32I-NEXT:  .LBB12_144:
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s2, .LBB12_134
 ; RV32I-NEXT:  .LBB12_145:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    beqz a7, .LBB12_135
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s3, .LBB12_135
 ; RV32I-NEXT:  .LBB12_146:
-; RV32I-NEXT:    mv a6, t3
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beqz t5, .LBB12_136
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    beqz a7, .LBB12_136
 ; RV32I-NEXT:  .LBB12_147:
-; RV32I-NEXT:    mv a4, s7
-; RV32I-NEXT:    bnez t1, .LBB12_137
-; RV32I-NEXT:  .LBB12_148:
-; RV32I-NEXT:    or a4, s8, a4
+; RV32I-NEXT:    mv a6, t3
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beqz t5, .LBB12_150
+; RV32I-NEXT:    beqz t5, .LBB12_137
+; RV32I-NEXT:  .LBB12_148:
+; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:    bnez t1, .LBB12_138
 ; RV32I-NEXT:  .LBB12_149:
-; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    or s6, a1, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB12_151
 ; RV32I-NEXT:  .LBB12_150:
-; RV32I-NEXT:    beq t1, s0, .LBB12_161
-; RV32I-NEXT:  # %bb.151:
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    bne t1, t6, .LBB12_162
-; RV32I-NEXT:  .LBB12_152:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, t4, .LBB12_163
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:  .LBB12_151:
+; RV32I-NEXT:    beq t1, s10, .LBB12_162
+; RV32I-NEXT:  # %bb.152:
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    bne t1, s11, .LBB12_163
 ; RV32I-NEXT:  .LBB12_153:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s4, .LBB12_164
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, t6, .LBB12_164
 ; RV32I-NEXT:  .LBB12_154:
-; RV32I-NEXT:    li a4, 0
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bne t1, s9, .LBB12_165
 ; RV32I-NEXT:  .LBB12_155:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s2, .LBB12_166
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s1, .LBB12_166
 ; RV32I-NEXT:  .LBB12_156:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s3, .LBB12_167
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bne t1, s2, .LBB12_167
 ; RV32I-NEXT:  .LBB12_157:
-; RV32I-NEXT:    bnez a7, .LBB12_168
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s3, .LBB12_168
 ; RV32I-NEXT:  .LBB12_158:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bnez t5, .LBB12_169
+; RV32I-NEXT:    bnez a7, .LBB12_169
 ; RV32I-NEXT:  .LBB12_159:
-; RV32I-NEXT:    beqz t1, .LBB12_170
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB12_170
 ; RV32I-NEXT:  .LBB12_160:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    bne t1, s0, .LBB12_171
-; RV32I-NEXT:    j .LBB12_172
+; RV32I-NEXT:    beqz t1, .LBB12_171
 ; RV32I-NEXT:  .LBB12_161:
-; RV32I-NEXT:    or a4, s6, t3
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    beq t1, t6, .LBB12_152
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    j .LBB12_172
 ; RV32I-NEXT:  .LBB12_162:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, t4, .LBB12_153
+; RV32I-NEXT:    or s6, ra, t3
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, s11, .LBB12_153
 ; RV32I-NEXT:  .LBB12_163:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s4, .LBB12_154
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, t6, .LBB12_154
 ; RV32I-NEXT:  .LBB12_164:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beq t1, s9, .LBB12_155
 ; RV32I-NEXT:  .LBB12_165:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s2, .LBB12_156
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s1, .LBB12_156
 ; RV32I-NEXT:  .LBB12_166:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s3, .LBB12_157
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s2, .LBB12_157
 ; RV32I-NEXT:  .LBB12_167:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    beqz a7, .LBB12_158
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s3, .LBB12_158
 ; RV32I-NEXT:  .LBB12_168:
-; RV32I-NEXT:    mv t0, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beqz t5, .LBB12_159
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    beqz a7, .LBB12_159
 ; RV32I-NEXT:  .LBB12_169:
-; RV32I-NEXT:    mv a4, s5
-; RV32I-NEXT:    bnez t1, .LBB12_160
+; RV32I-NEXT:    mv t0, s6
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB12_160
 ; RV32I-NEXT:  .LBB12_170:
-; RV32I-NEXT:    or a4, s6, a4
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    beq t1, s0, .LBB12_172
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    bnez t1, .LBB12_161
 ; RV32I-NEXT:  .LBB12_171:
-; RV32I-NEXT:    mv t3, a4
+; RV32I-NEXT:    or t3, ra, t3
 ; RV32I-NEXT:  .LBB12_172:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, t6, .LBB12_190
+; RV32I-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv t5, s4
+; RV32I-NEXT:    bne t1, s10, .LBB12_191
 ; RV32I-NEXT:  # %bb.173:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, t4, .LBB12_191
+; RV32I-NEXT:    bne t1, s11, .LBB12_192
 ; RV32I-NEXT:  .LBB12_174:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s4, .LBB12_192
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    bne t1, t6, .LBB12_193
 ; RV32I-NEXT:  .LBB12_175:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s9, .LBB12_193
+; RV32I-NEXT:    bne t1, s9, .LBB12_194
 ; RV32I-NEXT:  .LBB12_176:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s2, .LBB12_194
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    bne t1, s1, .LBB12_195
 ; RV32I-NEXT:  .LBB12_177:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s3, .LBB12_195
+; RV32I-NEXT:    bne t1, s2, .LBB12_196
 ; RV32I-NEXT:  .LBB12_178:
-; RV32I-NEXT:    bnez a7, .LBB12_196
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    bne t1, s3, .LBB12_197
 ; RV32I-NEXT:  .LBB12_179:
-; RV32I-NEXT:    bnez t1, .LBB12_197
+; RV32I-NEXT:    bnez a7, .LBB12_198
 ; RV32I-NEXT:  .LBB12_180:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s0, .LBB12_198
+; RV32I-NEXT:    bnez t1, .LBB12_199
 ; RV32I-NEXT:  .LBB12_181:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, t6, .LBB12_199
+; RV32I-NEXT:    bne t1, s10, .LBB12_200
 ; RV32I-NEXT:  .LBB12_182:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, t4, .LBB12_200
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    bne t1, s11, .LBB12_201
 ; RV32I-NEXT:  .LBB12_183:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s4, .LBB12_201
+; RV32I-NEXT:    bne t1, t6, .LBB12_202
 ; RV32I-NEXT:  .LBB12_184:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s9, .LBB12_202
+; RV32I-NEXT:    li t4, 0
+; RV32I-NEXT:    bne t1, s9, .LBB12_203
 ; RV32I-NEXT:  .LBB12_185:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s2, .LBB12_203
+; RV32I-NEXT:    bne t1, s1, .LBB12_204
 ; RV32I-NEXT:  .LBB12_186:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s3, .LBB12_204
+; RV32I-NEXT:    li t4, 0
+; RV32I-NEXT:    bne t1, s2, .LBB12_205
 ; RV32I-NEXT:  .LBB12_187:
-; RV32I-NEXT:    beqz a7, .LBB12_189
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bne t1, s3, .LBB12_206
 ; RV32I-NEXT:  .LBB12_188:
-; RV32I-NEXT:    mv t2, a4
+; RV32I-NEXT:    beqz a7, .LBB12_190
 ; RV32I-NEXT:  .LBB12_189:
-; RV32I-NEXT:    srli a4, ra, 16
-; RV32I-NEXT:    lui t4, 16
-; RV32I-NEXT:    srli t3, ra, 24
-; RV32I-NEXT:    srli a7, a1, 16
-; RV32I-NEXT:    srli t6, a1, 24
-; RV32I-NEXT:    srli t1, a3, 16
-; RV32I-NEXT:    srli s2, a3, 24
-; RV32I-NEXT:    srli t5, a5, 16
-; RV32I-NEXT:    srli s3, a5, 24
-; RV32I-NEXT:    srli s1, a6, 16
-; RV32I-NEXT:    srli s6, a6, 24
-; RV32I-NEXT:    srli s0, t0, 16
-; RV32I-NEXT:    srli s5, t0, 24
-; RV32I-NEXT:    srli s4, a0, 16
-; RV32I-NEXT:    srli s7, a0, 24
-; RV32I-NEXT:    srli s8, t2, 16
-; RV32I-NEXT:    srli s9, t2, 24
-; RV32I-NEXT:    addi t4, t4, -1
-; RV32I-NEXT:    and s10, ra, t4
-; RV32I-NEXT:    and s11, a1, t4
-; RV32I-NEXT:    srli s10, s10, 8
-; RV32I-NEXT:    sb ra, 0(a2)
-; RV32I-NEXT:    sb s10, 1(a2)
-; RV32I-NEXT:    sb a4, 2(a2)
-; RV32I-NEXT:    sb t3, 3(a2)
-; RV32I-NEXT:    and a4, a3, t4
-; RV32I-NEXT:    srli t3, s11, 8
+; RV32I-NEXT:    mv t2, t3
+; RV32I-NEXT:  .LBB12_190:
+; RV32I-NEXT:    srli a7, a4, 16
+; RV32I-NEXT:    lui t5, 16
+; RV32I-NEXT:    srli t4, a4, 24
+; RV32I-NEXT:    srli t1, a1, 16
+; RV32I-NEXT:    srli s0, a1, 24
+; RV32I-NEXT:    lw a3, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    srli t3, a3, 16
+; RV32I-NEXT:    srli s3, a3, 24
+; RV32I-NEXT:    srli t6, a5, 16
+; RV32I-NEXT:    srli s4, a5, 24
+; RV32I-NEXT:    srli s2, a6, 16
+; RV32I-NEXT:    srli s7, a6, 24
+; RV32I-NEXT:    srli s1, t0, 16
+; RV32I-NEXT:    srli s6, t0, 24
+; RV32I-NEXT:    srli s5, a0, 16
+; RV32I-NEXT:    srli s8, a0, 24
+; RV32I-NEXT:    srli s9, t2, 16
+; RV32I-NEXT:    srli s10, t2, 24
+; RV32I-NEXT:    addi t5, t5, -1
+; RV32I-NEXT:    and s11, a4, t5
+; RV32I-NEXT:    and ra, a1, t5
+; RV32I-NEXT:    srli s11, s11, 8
+; RV32I-NEXT:    sb a4, 0(a2)
+; RV32I-NEXT:    sb s11, 1(a2)
+; RV32I-NEXT:    sb a7, 2(a2)
+; RV32I-NEXT:    sb t4, 3(a2)
+; RV32I-NEXT:    and a4, a3, t5
+; RV32I-NEXT:    srli a7, ra, 8
 ; RV32I-NEXT:    sb a1, 4(a2)
-; RV32I-NEXT:    sb t3, 5(a2)
-; RV32I-NEXT:    sb a7, 6(a2)
-; RV32I-NEXT:    sb t6, 7(a2)
-; RV32I-NEXT:    and a1, a5, t4
+; RV32I-NEXT:    sb a7, 5(a2)
+; RV32I-NEXT:    sb t1, 6(a2)
+; RV32I-NEXT:    sb s0, 7(a2)
+; RV32I-NEXT:    and a1, a5, t5
 ; RV32I-NEXT:    srli a4, a4, 8
 ; RV32I-NEXT:    sb a3, 8(a2)
 ; RV32I-NEXT:    sb a4, 9(a2)
-; RV32I-NEXT:    sb t1, 10(a2)
-; RV32I-NEXT:    sb s2, 11(a2)
-; RV32I-NEXT:    and a3, a6, t4
+; RV32I-NEXT:    sb t3, 10(a2)
+; RV32I-NEXT:    sb s3, 11(a2)
+; RV32I-NEXT:    and a3, a6, t5
 ; RV32I-NEXT:    srli a1, a1, 8
 ; RV32I-NEXT:    sb a5, 12(a2)
 ; RV32I-NEXT:    sb a1, 13(a2)
-; RV32I-NEXT:    sb t5, 14(a2)
-; RV32I-NEXT:    sb s3, 15(a2)
-; RV32I-NEXT:    and a1, t0, t4
+; RV32I-NEXT:    sb t6, 14(a2)
+; RV32I-NEXT:    sb s4, 15(a2)
+; RV32I-NEXT:    and a1, t0, t5
 ; RV32I-NEXT:    srli a3, a3, 8
 ; RV32I-NEXT:    sb a6, 16(a2)
 ; RV32I-NEXT:    sb a3, 17(a2)
-; RV32I-NEXT:    sb s1, 18(a2)
-; RV32I-NEXT:    sb s6, 19(a2)
-; RV32I-NEXT:    and a3, a0, t4
-; RV32I-NEXT:    and a4, t2, t4
+; RV32I-NEXT:    sb s2, 18(a2)
+; RV32I-NEXT:    sb s7, 19(a2)
+; RV32I-NEXT:    and a3, a0, t5
+; RV32I-NEXT:    and a4, t2, t5
 ; RV32I-NEXT:    srli a1, a1, 8
 ; RV32I-NEXT:    srli a3, a3, 8
 ; RV32I-NEXT:    srli a4, a4, 8
 ; RV32I-NEXT:    sb t0, 20(a2)
 ; RV32I-NEXT:    sb a1, 21(a2)
-; RV32I-NEXT:    sb s0, 22(a2)
-; RV32I-NEXT:    sb s5, 23(a2)
+; RV32I-NEXT:    sb s1, 22(a2)
+; RV32I-NEXT:    sb s6, 23(a2)
 ; RV32I-NEXT:    sb a0, 24(a2)
 ; RV32I-NEXT:    sb a3, 25(a2)
-; RV32I-NEXT:    sb s4, 26(a2)
-; RV32I-NEXT:    sb s7, 27(a2)
+; RV32I-NEXT:    sb s5, 26(a2)
+; RV32I-NEXT:    sb s8, 27(a2)
 ; RV32I-NEXT:    sb t2, 28(a2)
 ; RV32I-NEXT:    sb a4, 29(a2)
-; RV32I-NEXT:    sb s8, 30(a2)
-; RV32I-NEXT:    sb s9, 31(a2)
+; RV32I-NEXT:    sb s9, 30(a2)
+; RV32I-NEXT:    sb s10, 31(a2)
 ; RV32I-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 52(sp) # 4-byte Folded Reload
@@ -3758,64 +3772,68 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 64
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB12_190:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, t4, .LBB12_174
 ; RV32I-NEXT:  .LBB12_191:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s4, .LBB12_175
+; RV32I-NEXT:    mv t5, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s11, .LBB12_174
 ; RV32I-NEXT:  .LBB12_192:
-; RV32I-NEXT:    mv a4, t3
+; RV32I-NEXT:    mv t3, t5
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    beq t1, t6, .LBB12_175
+; RV32I-NEXT:  .LBB12_193:
+; RV32I-NEXT:    mv t5, t3
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beq t1, s9, .LBB12_176
-; RV32I-NEXT:  .LBB12_193:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s2, .LBB12_177
 ; RV32I-NEXT:  .LBB12_194:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s3, .LBB12_178
+; RV32I-NEXT:    mv t3, t5
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    beq t1, s1, .LBB12_177
 ; RV32I-NEXT:  .LBB12_195:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    beqz a7, .LBB12_179
+; RV32I-NEXT:    mv t5, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s2, .LBB12_178
 ; RV32I-NEXT:  .LBB12_196:
-; RV32I-NEXT:    mv a0, t3
-; RV32I-NEXT:    beqz t1, .LBB12_180
+; RV32I-NEXT:    mv t3, t5
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    beq t1, s3, .LBB12_179
 ; RV32I-NEXT:  .LBB12_197:
-; RV32I-NEXT:    li s1, 0
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s0, .LBB12_181
+; RV32I-NEXT:    mv t5, t3
+; RV32I-NEXT:    beqz a7, .LBB12_180
 ; RV32I-NEXT:  .LBB12_198:
-; RV32I-NEXT:    mv a4, s1
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, t6, .LBB12_182
+; RV32I-NEXT:    mv a0, t5
+; RV32I-NEXT:    beqz t1, .LBB12_181
 ; RV32I-NEXT:  .LBB12_199:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, t4, .LBB12_183
-; RV32I-NEXT:  .LBB12_200:
-; RV32I-NEXT:    mv a4, t3
+; RV32I-NEXT:    li s4, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s4, .LBB12_184
+; RV32I-NEXT:    beq t1, s10, .LBB12_182
+; RV32I-NEXT:  .LBB12_200:
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    beq t1, s11, .LBB12_183
 ; RV32I-NEXT:  .LBB12_201:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s9, .LBB12_185
-; RV32I-NEXT:  .LBB12_202:
-; RV32I-NEXT:    mv a4, t3
+; RV32I-NEXT:    mv t5, t3
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s2, .LBB12_186
+; RV32I-NEXT:    beq t1, t6, .LBB12_184
+; RV32I-NEXT:  .LBB12_202:
+; RV32I-NEXT:    mv t3, t5
+; RV32I-NEXT:    li t4, 0
+; RV32I-NEXT:    beq t1, s9, .LBB12_185
 ; RV32I-NEXT:  .LBB12_203:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s3, .LBB12_187
+; RV32I-NEXT:    mv t4, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s1, .LBB12_186
 ; RV32I-NEXT:  .LBB12_204:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    bnez a7, .LBB12_188
-; RV32I-NEXT:    j .LBB12_189
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    li t4, 0
+; RV32I-NEXT:    beq t1, s2, .LBB12_187
+; RV32I-NEXT:  .LBB12_205:
+; RV32I-NEXT:    mv t4, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s3, .LBB12_188
+; RV32I-NEXT:  .LBB12_206:
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    bnez a7, .LBB12_189
+; RV32I-NEXT:    j .LBB12_190
   %src = load i256, ptr %src.ptr, align 1
   %byteOff = load i256, ptr %byteOff.ptr, align 1
   %bitOff = shl i256 %byteOff, 3
@@ -4150,9 +4168,9 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
 ; RV32I-NEXT:    sw s9, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s10, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s11, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    li a4, 0
+; RV32I-NEXT:    li a5, 0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    lbu a5, 1(a0)
+; RV32I-NEXT:    lbu a4, 1(a0)
 ; RV32I-NEXT:    lbu a6, 2(a0)
 ; RV32I-NEXT:    lbu a7, 3(a0)
 ; RV32I-NEXT:    lbu t0, 4(a0)
@@ -4163,736 +4181,750 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
 ; RV32I-NEXT:    lbu t5, 9(a0)
 ; RV32I-NEXT:    lbu t6, 10(a0)
 ; RV32I-NEXT:    lbu s0, 11(a0)
-; RV32I-NEXT:    slli a5, a5, 8
+; RV32I-NEXT:    slli a4, a4, 8
 ; RV32I-NEXT:    slli a7, a7, 8
 ; RV32I-NEXT:    slli t1, t1, 8
-; RV32I-NEXT:    or a3, a5, a3
-; RV32I-NEXT:    or a7, a7, a6
-; RV32I-NEXT:    or t1, t1, t0
-; RV32I-NEXT:    lbu a6, 13(a0)
-; RV32I-NEXT:    lbu a5, 14(a0)
-; RV32I-NEXT:    lbu s1, 15(a0)
+; RV32I-NEXT:    or a3, a4, a3
+; RV32I-NEXT:    or a4, a7, a6
+; RV32I-NEXT:    or a7, t1, t0
+; RV32I-NEXT:    lbu t0, 13(a0)
+; RV32I-NEXT:    lbu a6, 14(a0)
+; RV32I-NEXT:    lbu t1, 15(a0)
 ; RV32I-NEXT:    slli t3, t3, 8
 ; RV32I-NEXT:    slli t5, t5, 8
 ; RV32I-NEXT:    slli s0, s0, 8
-; RV32I-NEXT:    or t3, t3, t2
-; RV32I-NEXT:    or t0, t5, t4
-; RV32I-NEXT:    or t5, s0, t6
-; RV32I-NEXT:    lbu t2, 1(a1)
-; RV32I-NEXT:    lbu t4, 0(a1)
+; RV32I-NEXT:    or s1, t3, t2
+; RV32I-NEXT:    or t2, t5, t4
+; RV32I-NEXT:    or t4, s0, t6
+; RV32I-NEXT:    lbu t3, 1(a1)
+; RV32I-NEXT:    lbu t5, 0(a1)
 ; RV32I-NEXT:    lbu t6, 2(a1)
 ; RV32I-NEXT:    lbu a1, 3(a1)
-; RV32I-NEXT:    slli t2, t2, 8
-; RV32I-NEXT:    or s0, t2, t4
-; RV32I-NEXT:    slli t2, s1, 8
+; RV32I-NEXT:    slli t3, t3, 8
+; RV32I-NEXT:    or t5, t3, t5
+; RV32I-NEXT:    slli t3, t1, 8
 ; RV32I-NEXT:    slli a1, a1, 8
 ; RV32I-NEXT:    or a1, a1, t6
-; RV32I-NEXT:    slli t4, a7, 16
-; RV32I-NEXT:    slli a7, t3, 16
-; RV32I-NEXT:    slli t3, t5, 16
-; RV32I-NEXT:    slli t5, a1, 16
-; RV32I-NEXT:    or a1, a7, t1
-; RV32I-NEXT:    or a7, t5, s0
+; RV32I-NEXT:    slli a4, a4, 16
+; RV32I-NEXT:    slli s1, s1, 16
+; RV32I-NEXT:    slli t4, t4, 16
+; RV32I-NEXT:    slli t1, a1, 16
+; RV32I-NEXT:    or s5, s1, a7
+; RV32I-NEXT:    or a7, t1, t5
 ; RV32I-NEXT:    slli a7, a7, 5
 ; RV32I-NEXT:    srli t1, a7, 5
 ; RV32I-NEXT:    andi t5, a7, 31
 ; RV32I-NEXT:    neg s3, t5
 ; RV32I-NEXT:    beqz t5, .LBB13_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sll a4, a1, s3
+; RV32I-NEXT:    sll a5, s5, s3
 ; RV32I-NEXT:  .LBB13_2:
-; RV32I-NEXT:    or s7, t4, a3
-; RV32I-NEXT:    lbu t4, 12(a0)
-; RV32I-NEXT:    lbu t6, 19(a0)
-; RV32I-NEXT:    slli s1, a6, 8
-; RV32I-NEXT:    or a5, t2, a5
-; RV32I-NEXT:    or a3, t3, t0
+; RV32I-NEXT:    or a4, a4, a3
+; RV32I-NEXT:    lbu t6, 12(a0)
+; RV32I-NEXT:    lbu s0, 19(a0)
+; RV32I-NEXT:    slli s1, t0, 8
+; RV32I-NEXT:    or t0, t3, a6
+; RV32I-NEXT:    or a1, t4, t2
 ; RV32I-NEXT:    beqz t1, .LBB13_4
 ; RV32I-NEXT:  # %bb.3:
-; RV32I-NEXT:    li s0, 0
+; RV32I-NEXT:    mv s11, a4
+; RV32I-NEXT:    li a4, 0
 ; RV32I-NEXT:    j .LBB13_5
 ; RV32I-NEXT:  .LBB13_4:
-; RV32I-NEXT:    srl s0, s7, a7
-; RV32I-NEXT:    or s0, s0, a4
+; RV32I-NEXT:    mv s11, a4
+; RV32I-NEXT:    srl a6, a4, a7
+; RV32I-NEXT:    or a4, a6, a5
 ; RV32I-NEXT:  .LBB13_5:
 ; RV32I-NEXT:    li a6, 0
-; RV32I-NEXT:    lbu t0, 17(a0)
-; RV32I-NEXT:    lbu a4, 18(a0)
-; RV32I-NEXT:    slli s4, t6, 8
-; RV32I-NEXT:    or s2, s1, t4
-; RV32I-NEXT:    slli a5, a5, 16
-; RV32I-NEXT:    li s5, 1
-; RV32I-NEXT:    sll t6, a3, s3
+; RV32I-NEXT:    lbu s2, 17(a0)
+; RV32I-NEXT:    lbu a5, 18(a0)
+; RV32I-NEXT:    slli s4, s0, 8
+; RV32I-NEXT:    or s1, s1, t6
+; RV32I-NEXT:    slli t0, t0, 16
+; RV32I-NEXT:    li t3, 1
+; RV32I-NEXT:    sll s6, a1, s3
 ; RV32I-NEXT:    beqz t5, .LBB13_7
 ; RV32I-NEXT:  # %bb.6:
-; RV32I-NEXT:    mv a6, t6
+; RV32I-NEXT:    mv a6, s6
 ; RV32I-NEXT:  .LBB13_7:
 ; RV32I-NEXT:    lbu t2, 16(a0)
-; RV32I-NEXT:    lbu t3, 23(a0)
-; RV32I-NEXT:    slli s1, t0, 8
-; RV32I-NEXT:    or t4, s4, a4
-; RV32I-NEXT:    srl a4, a1, a7
-; RV32I-NEXT:    or a5, a5, s2
-; RV32I-NEXT:    bne t1, s5, .LBB13_9
+; RV32I-NEXT:    lbu t4, 23(a0)
+; RV32I-NEXT:    slli s0, s2, 8
+; RV32I-NEXT:    or t6, s4, a5
+; RV32I-NEXT:    srl a3, s5, a7
+; RV32I-NEXT:    or a5, t0, s1
+; RV32I-NEXT:    sw a3, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    bne t1, t3, .LBB13_9
 ; RV32I-NEXT:  # %bb.8:
-; RV32I-NEXT:    or s0, a4, a6
+; RV32I-NEXT:    or a4, a3, a6
 ; RV32I-NEXT:  .LBB13_9:
 ; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    lbu s5, 21(a0)
+; RV32I-NEXT:    lbu s2, 21(a0)
 ; RV32I-NEXT:    lbu a6, 22(a0)
-; RV32I-NEXT:    slli s4, t3, 8
-; RV32I-NEXT:    or t2, s1, t2
-; RV32I-NEXT:    slli s6, t4, 16
-; RV32I-NEXT:    li s8, 2
-; RV32I-NEXT:    sll t3, a5, s3
+; RV32I-NEXT:    slli s1, t4, 8
+; RV32I-NEXT:    or t2, s0, t2
+; RV32I-NEXT:    slli s4, t6, 16
+; RV32I-NEXT:    li a3, 2
+; RV32I-NEXT:    sll s8, a5, s3
 ; RV32I-NEXT:    beqz t5, .LBB13_11
 ; RV32I-NEXT:  # %bb.10:
-; RV32I-NEXT:    mv t0, t3
+; RV32I-NEXT:    mv t0, s8
 ; RV32I-NEXT:  .LBB13_11:
-; RV32I-NEXT:    lbu s1, 20(a0)
-; RV32I-NEXT:    lbu s2, 27(a0)
-; RV32I-NEXT:    slli s5, s5, 8
-; RV32I-NEXT:    or s4, s4, a6
-; RV32I-NEXT:    srl t4, a3, a7
-; RV32I-NEXT:    or a6, s6, t2
-; RV32I-NEXT:    bne t1, s8, .LBB13_13
+; RV32I-NEXT:    lbu t6, 20(a0)
+; RV32I-NEXT:    lbu s0, 27(a0)
+; RV32I-NEXT:    slli s2, s2, 8
+; RV32I-NEXT:    or s1, s1, a6
+; RV32I-NEXT:    srl t3, a1, a7
+; RV32I-NEXT:    or a6, s4, t2
+; RV32I-NEXT:    sw s5, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    bne t1, a3, .LBB13_13
 ; RV32I-NEXT:  # %bb.12:
-; RV32I-NEXT:    or s0, t4, t0
+; RV32I-NEXT:    or a4, t3, t0
 ; RV32I-NEXT:  .LBB13_13:
-; RV32I-NEXT:    sw s7, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    li t2, 0
-; RV32I-NEXT:    lbu s6, 25(a0)
+; RV32I-NEXT:    lbu s4, 25(a0)
 ; RV32I-NEXT:    lbu t0, 26(a0)
-; RV32I-NEXT:    slli s8, s2, 8
-; RV32I-NEXT:    or s7, s5, s1
-; RV32I-NEXT:    slli s9, s4, 16
-; RV32I-NEXT:    sll s11, a6, s3
+; RV32I-NEXT:    slli s7, s0, 8
+; RV32I-NEXT:    or s5, s2, t6
+; RV32I-NEXT:    slli s9, s1, 16
+; RV32I-NEXT:    li t6, 3
+; RV32I-NEXT:    sll t4, a6, s3
 ; RV32I-NEXT:    beqz t5, .LBB13_15
 ; RV32I-NEXT:  # %bb.14:
-; RV32I-NEXT:    mv t2, s11
+; RV32I-NEXT:    mv t2, t4
 ; RV32I-NEXT:  .LBB13_15:
-; RV32I-NEXT:    lbu s1, 24(a0)
-; RV32I-NEXT:    lbu s2, 31(a0)
-; RV32I-NEXT:    slli s5, s6, 8
-; RV32I-NEXT:    or s4, s8, t0
-; RV32I-NEXT:    srl ra, a5, a7
-; RV32I-NEXT:    or t0, s9, s7
-; RV32I-NEXT:    li s6, 3
-; RV32I-NEXT:    bne t1, s6, .LBB13_17
+; RV32I-NEXT:    lbu s0, 24(a0)
+; RV32I-NEXT:    lbu s1, 31(a0)
+; RV32I-NEXT:    slli s4, s4, 8
+; RV32I-NEXT:    or s2, s7, t0
+; RV32I-NEXT:    srl a3, a5, a7
+; RV32I-NEXT:    or t0, s9, s5
+; RV32I-NEXT:    li s9, 3
+; RV32I-NEXT:    bne t1, t6, .LBB13_17
 ; RV32I-NEXT:  # %bb.16:
-; RV32I-NEXT:    or s0, ra, t2
+; RV32I-NEXT:    or a4, a3, t2
 ; RV32I-NEXT:  .LBB13_17:
+; RV32I-NEXT:    mv t6, t3
 ; RV32I-NEXT:    li t2, 0
 ; RV32I-NEXT:    lbu s7, 29(a0)
-; RV32I-NEXT:    lbu s6, 30(a0)
-; RV32I-NEXT:    slli s8, s2, 8
-; RV32I-NEXT:    or s2, s5, s1
-; RV32I-NEXT:    slli s5, s4, 16
-; RV32I-NEXT:    li s9, 4
-; RV32I-NEXT:    sll s1, t0, s3
-; RV32I-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lbu s5, 30(a0)
+; RV32I-NEXT:    slli s1, s1, 8
+; RV32I-NEXT:    or s10, s4, s0
+; RV32I-NEXT:    slli s2, s2, 16
+; RV32I-NEXT:    li a3, 4
+; RV32I-NEXT:    sll s0, t0, s3
 ; RV32I-NEXT:    beqz t5, .LBB13_19
 ; RV32I-NEXT:  # %bb.18:
-; RV32I-NEXT:    lw t2, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv t2, s0
 ; RV32I-NEXT:  .LBB13_19:
-; RV32I-NEXT:    lbu s1, 28(a0)
+; RV32I-NEXT:    lbu t3, 28(a0)
 ; RV32I-NEXT:    slli s7, s7, 8
-; RV32I-NEXT:    or s4, s8, s6
-; RV32I-NEXT:    srl s10, a6, a7
-; RV32I-NEXT:    or a0, s5, s2
-; RV32I-NEXT:    bne t1, s9, .LBB13_21
+; RV32I-NEXT:    or s4, s1, s5
+; RV32I-NEXT:    srl s1, a6, a7
+; RV32I-NEXT:    or a0, s2, s10
+; RV32I-NEXT:    beq t1, a3, .LBB13_21
 ; RV32I-NEXT:  # %bb.20:
-; RV32I-NEXT:    or s0, s10, t2
+; RV32I-NEXT:    mv a3, s1
+; RV32I-NEXT:    j .LBB13_22
 ; RV32I-NEXT:  .LBB13_21:
+; RV32I-NEXT:    mv a3, s1
+; RV32I-NEXT:    or a4, s1, t2
+; RV32I-NEXT:  .LBB13_22:
+; RV32I-NEXT:    li s10, 1
 ; RV32I-NEXT:    li s2, 0
-; RV32I-NEXT:    or t2, s7, s1
+; RV32I-NEXT:    or t2, s7, t3
 ; RV32I-NEXT:    slli s4, s4, 16
-; RV32I-NEXT:    li s9, 5
+; RV32I-NEXT:    li s1, 5
 ; RV32I-NEXT:    sll s7, a0, s3
-; RV32I-NEXT:    beqz t5, .LBB13_23
-; RV32I-NEXT:  # %bb.22:
+; RV32I-NEXT:    beqz t5, .LBB13_24
+; RV32I-NEXT:  # %bb.23:
 ; RV32I-NEXT:    mv s2, s7
-; RV32I-NEXT:  .LBB13_23:
-; RV32I-NEXT:    srl s8, t0, a7
+; RV32I-NEXT:  .LBB13_24:
+; RV32I-NEXT:    sw a1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    srl t3, t0, a7
 ; RV32I-NEXT:    or t2, s4, t2
-; RV32I-NEXT:    bne t1, s9, .LBB13_25
-; RV32I-NEXT:  # %bb.24:
-; RV32I-NEXT:    or s0, s8, s2
-; RV32I-NEXT:  .LBB13_25:
-; RV32I-NEXT:    li s4, 0
+; RV32I-NEXT:    beq t1, s1, .LBB13_26
+; RV32I-NEXT:  # %bb.25:
+; RV32I-NEXT:    mv a1, t3
+; RV32I-NEXT:    j .LBB13_27
+; RV32I-NEXT:  .LBB13_26:
+; RV32I-NEXT:    mv a1, t3
+; RV32I-NEXT:    or a4, t3, s2
+; RV32I-NEXT:  .LBB13_27:
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    li s2, 6
 ; RV32I-NEXT:    sll s5, t2, s3
-; RV32I-NEXT:    beqz t5, .LBB13_27
-; RV32I-NEXT:  # %bb.26:
-; RV32I-NEXT:    mv s4, s5
-; RV32I-NEXT:  .LBB13_27:
-; RV32I-NEXT:    srl s6, a0, a7
-; RV32I-NEXT:    bne t1, s2, .LBB13_29
+; RV32I-NEXT:    beqz t5, .LBB13_29
 ; RV32I-NEXT:  # %bb.28:
-; RV32I-NEXT:    or s0, s6, s4
+; RV32I-NEXT:    mv t3, s5
 ; RV32I-NEXT:  .LBB13_29:
-; RV32I-NEXT:    li s3, 7
-; RV32I-NEXT:    srl s1, t2, a7
-; RV32I-NEXT:    mv s4, s1
-; RV32I-NEXT:    bne t1, s3, .LBB13_34
+; RV32I-NEXT:    srl s3, a0, a7
+; RV32I-NEXT:    beq t1, s2, .LBB13_31
 ; RV32I-NEXT:  # %bb.30:
-; RV32I-NEXT:    bnez a7, .LBB13_35
+; RV32I-NEXT:    mv ra, s3
+; RV32I-NEXT:    j .LBB13_32
 ; RV32I-NEXT:  .LBB13_31:
-; RV32I-NEXT:    li s0, 0
-; RV32I-NEXT:    bnez t5, .LBB13_36
+; RV32I-NEXT:    mv ra, s3
+; RV32I-NEXT:    or a4, s3, t3
 ; RV32I-NEXT:  .LBB13_32:
-; RV32I-NEXT:    li s4, 2
-; RV32I-NEXT:    beqz t1, .LBB13_37
-; RV32I-NEXT:  .LBB13_33:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    j .LBB13_38
+; RV32I-NEXT:    li s3, 7
+; RV32I-NEXT:    srl s4, t2, a7
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, s3, .LBB13_34
+; RV32I-NEXT:  # %bb.33:
+; RV32I-NEXT:    mv t3, a4
 ; RV32I-NEXT:  .LBB13_34:
-; RV32I-NEXT:    mv s4, s0
-; RV32I-NEXT:    beqz a7, .LBB13_31
-; RV32I-NEXT:  .LBB13_35:
-; RV32I-NEXT:    sw s4, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    li s0, 0
-; RV32I-NEXT:    beqz t5, .LBB13_32
+; RV32I-NEXT:    mv a4, s11
+; RV32I-NEXT:    beqz a7, .LBB13_36
+; RV32I-NEXT:  # %bb.35:
+; RV32I-NEXT:    mv a4, t3
 ; RV32I-NEXT:  .LBB13_36:
-; RV32I-NEXT:    mv s0, t6
-; RV32I-NEXT:    li s4, 2
-; RV32I-NEXT:    bnez t1, .LBB13_33
-; RV32I-NEXT:  .LBB13_37:
-; RV32I-NEXT:    or a4, a4, s0
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    li s11, 2
+; RV32I-NEXT:    beqz t5, .LBB13_38
+; RV32I-NEXT:  # %bb.37:
+; RV32I-NEXT:    mv t3, s6
 ; RV32I-NEXT:  .LBB13_38:
-; RV32I-NEXT:    li s0, 1
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    bnez t5, .LBB13_57
+; RV32I-NEXT:    beqz t1, .LBB13_40
 ; RV32I-NEXT:  # %bb.39:
-; RV32I-NEXT:    beq t1, s0, .LBB13_58
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB13_41
+; RV32I-NEXT:    j .LBB13_42
 ; RV32I-NEXT:  .LBB13_40:
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    bnez t5, .LBB13_59
+; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or s6, s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB13_42
 ; RV32I-NEXT:  .LBB13_41:
-; RV32I-NEXT:    beq t1, s4, .LBB13_60
+; RV32I-NEXT:    mv t3, s8
 ; RV32I-NEXT:  .LBB13_42:
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    bnez t5, .LBB13_61
-; RV32I-NEXT:  .LBB13_43:
-; RV32I-NEXT:    li s4, 3
-; RV32I-NEXT:    bne t1, s4, .LBB13_45
+; RV32I-NEXT:    beq t1, s10, .LBB13_58
+; RV32I-NEXT:  # %bb.43:
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB13_59
 ; RV32I-NEXT:  .LBB13_44:
-; RV32I-NEXT:    or a4, s10, t6
+; RV32I-NEXT:    beq t1, s11, .LBB13_60
 ; RV32I-NEXT:  .LBB13_45:
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    li s4, 4
-; RV32I-NEXT:    bnez t5, .LBB13_62
-; RV32I-NEXT:  # %bb.46:
-; RV32I-NEXT:    beq t1, s4, .LBB13_63
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB13_61
+; RV32I-NEXT:  .LBB13_46:
+; RV32I-NEXT:    bne t1, s9, .LBB13_48
 ; RV32I-NEXT:  .LBB13_47:
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    bnez t5, .LBB13_64
+; RV32I-NEXT:    or s6, a3, t3
 ; RV32I-NEXT:  .LBB13_48:
-; RV32I-NEXT:    beq t1, s9, .LBB13_65
-; RV32I-NEXT:  .LBB13_49:
-; RV32I-NEXT:    mv t6, s1
-; RV32I-NEXT:    bne t1, s2, .LBB13_66
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    li s9, 4
+; RV32I-NEXT:    bnez t5, .LBB13_62
+; RV32I-NEXT:  # %bb.49:
+; RV32I-NEXT:    beq t1, s9, .LBB13_63
 ; RV32I-NEXT:  .LBB13_50:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s3, .LBB13_67
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB13_64
 ; RV32I-NEXT:  .LBB13_51:
-; RV32I-NEXT:    beqz a7, .LBB13_53
+; RV32I-NEXT:    beq t1, s1, .LBB13_65
 ; RV32I-NEXT:  .LBB13_52:
-; RV32I-NEXT:    mv a1, a4
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    bne t1, s2, .LBB13_66
 ; RV32I-NEXT:  .LBB13_53:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    li t6, 2
-; RV32I-NEXT:    beqz t5, .LBB13_55
-; RV32I-NEXT:  # %bb.54:
-; RV32I-NEXT:    mv a4, t3
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s3, .LBB13_67
+; RV32I-NEXT:  .LBB13_54:
+; RV32I-NEXT:    bnez a7, .LBB13_68
 ; RV32I-NEXT:  .LBB13_55:
-; RV32I-NEXT:    beqz t1, .LBB13_68
-; RV32I-NEXT:  # %bb.56:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    j .LBB13_69
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB13_69
+; RV32I-NEXT:  .LBB13_56:
+; RV32I-NEXT:    beqz t1, .LBB13_70
 ; RV32I-NEXT:  .LBB13_57:
-; RV32I-NEXT:    mv t6, t3
-; RV32I-NEXT:    bne t1, s0, .LBB13_40
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    j .LBB13_71
 ; RV32I-NEXT:  .LBB13_58:
-; RV32I-NEXT:    or a4, t4, t6
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    beqz t5, .LBB13_41
+; RV32I-NEXT:    or s6, t6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB13_44
 ; RV32I-NEXT:  .LBB13_59:
-; RV32I-NEXT:    mv t6, s11
-; RV32I-NEXT:    bne t1, s4, .LBB13_42
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    bne t1, s11, .LBB13_45
 ; RV32I-NEXT:  .LBB13_60:
-; RV32I-NEXT:    or a4, ra, t6
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    beqz t5, .LBB13_43
+; RV32I-NEXT:    srl s6, a5, a7
+; RV32I-NEXT:    or s6, s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB13_46
 ; RV32I-NEXT:  .LBB13_61:
-; RV32I-NEXT:    lw t6, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    li s4, 3
-; RV32I-NEXT:    beq t1, s4, .LBB13_44
-; RV32I-NEXT:    j .LBB13_45
+; RV32I-NEXT:    mv t3, s0
+; RV32I-NEXT:    beq t1, s9, .LBB13_47
+; RV32I-NEXT:    j .LBB13_48
 ; RV32I-NEXT:  .LBB13_62:
-; RV32I-NEXT:    mv t6, s7
-; RV32I-NEXT:    bne t1, s4, .LBB13_47
+; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:    bne t1, s9, .LBB13_50
 ; RV32I-NEXT:  .LBB13_63:
-; RV32I-NEXT:    or a4, s8, t6
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    beqz t5, .LBB13_48
+; RV32I-NEXT:    or s6, a1, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB13_51
 ; RV32I-NEXT:  .LBB13_64:
-; RV32I-NEXT:    mv t6, s5
-; RV32I-NEXT:    bne t1, s9, .LBB13_49
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    bne t1, s1, .LBB13_52
 ; RV32I-NEXT:  .LBB13_65:
-; RV32I-NEXT:    or a4, s6, t6
-; RV32I-NEXT:    mv t6, s1
-; RV32I-NEXT:    beq t1, s2, .LBB13_50
+; RV32I-NEXT:    or s6, ra, t3
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, s2, .LBB13_53
 ; RV32I-NEXT:  .LBB13_66:
-; RV32I-NEXT:    mv t6, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s3, .LBB13_51
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s3, .LBB13_54
 ; RV32I-NEXT:  .LBB13_67:
-; RV32I-NEXT:    mv a4, t6
-; RV32I-NEXT:    bnez a7, .LBB13_52
-; RV32I-NEXT:    j .LBB13_53
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    beqz a7, .LBB13_55
 ; RV32I-NEXT:  .LBB13_68:
-; RV32I-NEXT:    or a4, t4, a4
-; RV32I-NEXT:  .LBB13_69:
-; RV32I-NEXT:    li t4, 3
+; RV32I-NEXT:    sw s6, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB13_84
-; RV32I-NEXT:  # %bb.70:
-; RV32I-NEXT:    beq t1, s0, .LBB13_85
+; RV32I-NEXT:    beqz t5, .LBB13_56
+; RV32I-NEXT:  .LBB13_69:
+; RV32I-NEXT:    mv t3, s8
+; RV32I-NEXT:    bnez t1, .LBB13_57
+; RV32I-NEXT:  .LBB13_70:
+; RV32I-NEXT:    or s6, t6, t3
 ; RV32I-NEXT:  .LBB13_71:
+; RV32I-NEXT:    li t6, 3
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bnez t5, .LBB13_86
-; RV32I-NEXT:  .LBB13_72:
-; RV32I-NEXT:    beq t1, t6, .LBB13_87
+; RV32I-NEXT:  # %bb.72:
+; RV32I-NEXT:    beq t1, s10, .LBB13_87
 ; RV32I-NEXT:  .LBB13_73:
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bnez t5, .LBB13_88
 ; RV32I-NEXT:  .LBB13_74:
-; RV32I-NEXT:    beq t1, t4, .LBB13_89
+; RV32I-NEXT:    beq t1, s11, .LBB13_89
 ; RV32I-NEXT:  .LBB13_75:
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bnez t5, .LBB13_90
 ; RV32I-NEXT:  .LBB13_76:
-; RV32I-NEXT:    beq t1, s4, .LBB13_91
+; RV32I-NEXT:    beq t1, t6, .LBB13_91
 ; RV32I-NEXT:  .LBB13_77:
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    bne t1, s9, .LBB13_92
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB13_92
 ; RV32I-NEXT:  .LBB13_78:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s2, .LBB13_93
+; RV32I-NEXT:    beq t1, s9, .LBB13_93
 ; RV32I-NEXT:  .LBB13_79:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s3, .LBB13_94
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    bne t1, s1, .LBB13_94
 ; RV32I-NEXT:  .LBB13_80:
-; RV32I-NEXT:    bnez a7, .LBB13_95
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s2, .LBB13_95
 ; RV32I-NEXT:  .LBB13_81:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bnez t5, .LBB13_96
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bne t1, s3, .LBB13_96
 ; RV32I-NEXT:  .LBB13_82:
-; RV32I-NEXT:    beqz t1, .LBB13_97
+; RV32I-NEXT:    bnez a7, .LBB13_97
 ; RV32I-NEXT:  .LBB13_83:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    j .LBB13_98
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB13_98
 ; RV32I-NEXT:  .LBB13_84:
-; RV32I-NEXT:    mv t3, s11
-; RV32I-NEXT:    bne t1, s0, .LBB13_71
+; RV32I-NEXT:    beqz t1, .LBB13_99
 ; RV32I-NEXT:  .LBB13_85:
-; RV32I-NEXT:    or a4, ra, t3
+; RV32I-NEXT:    li s6, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beqz t5, .LBB13_72
+; RV32I-NEXT:    bnez t5, .LBB13_100
+; RV32I-NEXT:    j .LBB13_101
 ; RV32I-NEXT:  .LBB13_86:
-; RV32I-NEXT:    lw t3, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne t1, t6, .LBB13_73
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    bne t1, s10, .LBB13_73
 ; RV32I-NEXT:  .LBB13_87:
-; RV32I-NEXT:    or a4, s10, t3
+; RV32I-NEXT:    srl s6, a5, a7
+; RV32I-NEXT:    or s6, s6, t3
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t5, .LBB13_74
 ; RV32I-NEXT:  .LBB13_88:
-; RV32I-NEXT:    mv t3, s7
-; RV32I-NEXT:    bne t1, t4, .LBB13_75
+; RV32I-NEXT:    mv t3, s0
+; RV32I-NEXT:    bne t1, s11, .LBB13_75
 ; RV32I-NEXT:  .LBB13_89:
-; RV32I-NEXT:    or a4, s8, t3
+; RV32I-NEXT:    or s6, a3, t3
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t5, .LBB13_76
 ; RV32I-NEXT:  .LBB13_90:
-; RV32I-NEXT:    mv t3, s5
-; RV32I-NEXT:    bne t1, s4, .LBB13_77
+; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:    bne t1, t6, .LBB13_77
 ; RV32I-NEXT:  .LBB13_91:
-; RV32I-NEXT:    or a4, s6, t3
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    beq t1, s9, .LBB13_78
+; RV32I-NEXT:    or s6, a1, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB13_78
 ; RV32I-NEXT:  .LBB13_92:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s2, .LBB13_79
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    bne t1, s9, .LBB13_79
 ; RV32I-NEXT:  .LBB13_93:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s3, .LBB13_80
+; RV32I-NEXT:    or s6, ra, t3
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, s1, .LBB13_80
 ; RV32I-NEXT:  .LBB13_94:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    beqz a7, .LBB13_81
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s2, .LBB13_81
 ; RV32I-NEXT:  .LBB13_95:
-; RV32I-NEXT:    mv a3, t3
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beqz t5, .LBB13_82
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s3, .LBB13_82
 ; RV32I-NEXT:  .LBB13_96:
-; RV32I-NEXT:    mv a4, s11
-; RV32I-NEXT:    bnez t1, .LBB13_83
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    beqz a7, .LBB13_83
 ; RV32I-NEXT:  .LBB13_97:
-; RV32I-NEXT:    or a4, ra, a4
+; RV32I-NEXT:    sw t3, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB13_84
 ; RV32I-NEXT:  .LBB13_98:
-; RV32I-NEXT:    lw ra, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    bnez t1, .LBB13_85
+; RV32I-NEXT:  .LBB13_99:
+; RV32I-NEXT:    srl t4, a5, a7
+; RV32I-NEXT:    or s6, t4, t3
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB13_112
-; RV32I-NEXT:  # %bb.99:
-; RV32I-NEXT:    beq t1, s0, .LBB13_113
+; RV32I-NEXT:    beqz t5, .LBB13_101
 ; RV32I-NEXT:  .LBB13_100:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB13_114
+; RV32I-NEXT:    mv t3, s0
 ; RV32I-NEXT:  .LBB13_101:
-; RV32I-NEXT:    beq t1, t6, .LBB13_115
-; RV32I-NEXT:  .LBB13_102:
+; RV32I-NEXT:    beq t1, s10, .LBB13_114
+; RV32I-NEXT:  # %bb.102:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB13_116
+; RV32I-NEXT:    bnez t5, .LBB13_115
 ; RV32I-NEXT:  .LBB13_103:
-; RV32I-NEXT:    beq t1, t4, .LBB13_117
+; RV32I-NEXT:    beq t1, s11, .LBB13_116
 ; RV32I-NEXT:  .LBB13_104:
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    bne t1, s4, .LBB13_118
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB13_117
 ; RV32I-NEXT:  .LBB13_105:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s9, .LBB13_119
+; RV32I-NEXT:    beq t1, t6, .LBB13_118
 ; RV32I-NEXT:  .LBB13_106:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s2, .LBB13_120
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    bne t1, s9, .LBB13_119
 ; RV32I-NEXT:  .LBB13_107:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s3, .LBB13_121
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s1, .LBB13_120
 ; RV32I-NEXT:  .LBB13_108:
-; RV32I-NEXT:    bnez a7, .LBB13_122
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bne t1, s2, .LBB13_121
 ; RV32I-NEXT:  .LBB13_109:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bnez t5, .LBB13_123
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s3, .LBB13_122
 ; RV32I-NEXT:  .LBB13_110:
-; RV32I-NEXT:    beqz t1, .LBB13_124
+; RV32I-NEXT:    bnez a7, .LBB13_123
 ; RV32I-NEXT:  .LBB13_111:
-; RV32I-NEXT:    li a4, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB13_125
-; RV32I-NEXT:    j .LBB13_126
+; RV32I-NEXT:    bnez t5, .LBB13_124
 ; RV32I-NEXT:  .LBB13_112:
-; RV32I-NEXT:    lw t3, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne t1, s0, .LBB13_100
+; RV32I-NEXT:    beqz t1, .LBB13_125
 ; RV32I-NEXT:  .LBB13_113:
-; RV32I-NEXT:    or a4, s10, t3
+; RV32I-NEXT:    li s6, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beqz t5, .LBB13_101
+; RV32I-NEXT:    bnez t5, .LBB13_126
+; RV32I-NEXT:    j .LBB13_127
 ; RV32I-NEXT:  .LBB13_114:
-; RV32I-NEXT:    mv t3, s7
-; RV32I-NEXT:    bne t1, t6, .LBB13_102
-; RV32I-NEXT:  .LBB13_115:
-; RV32I-NEXT:    or a4, s8, t3
+; RV32I-NEXT:    or s6, a3, t3
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t5, .LBB13_103
+; RV32I-NEXT:  .LBB13_115:
+; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:    bne t1, s11, .LBB13_104
 ; RV32I-NEXT:  .LBB13_116:
-; RV32I-NEXT:    mv t3, s5
-; RV32I-NEXT:    bne t1, t4, .LBB13_104
+; RV32I-NEXT:    or s6, a1, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB13_105
 ; RV32I-NEXT:  .LBB13_117:
-; RV32I-NEXT:    or a4, s6, t3
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    beq t1, s4, .LBB13_105
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    bne t1, t6, .LBB13_106
 ; RV32I-NEXT:  .LBB13_118:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s9, .LBB13_106
+; RV32I-NEXT:    or s6, ra, t3
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, s9, .LBB13_107
 ; RV32I-NEXT:  .LBB13_119:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s2, .LBB13_107
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s1, .LBB13_108
 ; RV32I-NEXT:  .LBB13_120:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s3, .LBB13_108
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s2, .LBB13_109
 ; RV32I-NEXT:  .LBB13_121:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    beqz a7, .LBB13_109
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s3, .LBB13_110
 ; RV32I-NEXT:  .LBB13_122:
-; RV32I-NEXT:    mv a5, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beqz t5, .LBB13_110
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    beqz a7, .LBB13_111
 ; RV32I-NEXT:  .LBB13_123:
-; RV32I-NEXT:    lw a4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bnez t1, .LBB13_111
-; RV32I-NEXT:  .LBB13_124:
-; RV32I-NEXT:    or a4, s10, a4
+; RV32I-NEXT:    mv a5, s6
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beqz t5, .LBB13_126
+; RV32I-NEXT:    beqz t5, .LBB13_112
+; RV32I-NEXT:  .LBB13_124:
+; RV32I-NEXT:    mv t3, s0
+; RV32I-NEXT:    bnez t1, .LBB13_113
 ; RV32I-NEXT:  .LBB13_125:
-; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:    or s6, a3, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB13_127
 ; RV32I-NEXT:  .LBB13_126:
-; RV32I-NEXT:    beq t1, s0, .LBB13_138
-; RV32I-NEXT:  # %bb.127:
+; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:  .LBB13_127:
+; RV32I-NEXT:    beq t1, s10, .LBB13_139
+; RV32I-NEXT:  # %bb.128:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB13_139
-; RV32I-NEXT:  .LBB13_128:
-; RV32I-NEXT:    beq t1, t6, .LBB13_140
+; RV32I-NEXT:    bnez t5, .LBB13_140
 ; RV32I-NEXT:  .LBB13_129:
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    bne t1, t4, .LBB13_141
+; RV32I-NEXT:    beq t1, s11, .LBB13_141
 ; RV32I-NEXT:  .LBB13_130:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s4, .LBB13_142
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    bne t1, t6, .LBB13_142
 ; RV32I-NEXT:  .LBB13_131:
-; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    li s6, 0
 ; RV32I-NEXT:    bne t1, s9, .LBB13_143
 ; RV32I-NEXT:  .LBB13_132:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s2, .LBB13_144
-; RV32I-NEXT:  .LBB13_133:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s3, .LBB13_145
+; RV32I-NEXT:    bne t1, s1, .LBB13_144
+; RV32I-NEXT:  .LBB13_133:
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s2, .LBB13_145
 ; RV32I-NEXT:  .LBB13_134:
-; RV32I-NEXT:    bnez a7, .LBB13_146
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bne t1, s3, .LBB13_146
 ; RV32I-NEXT:  .LBB13_135:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bnez t5, .LBB13_147
+; RV32I-NEXT:    bnez a7, .LBB13_147
 ; RV32I-NEXT:  .LBB13_136:
-; RV32I-NEXT:    beqz t1, .LBB13_148
-; RV32I-NEXT:  .LBB13_137:
-; RV32I-NEXT:    li a4, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB13_149
-; RV32I-NEXT:    j .LBB13_150
+; RV32I-NEXT:    bnez t5, .LBB13_148
+; RV32I-NEXT:  .LBB13_137:
+; RV32I-NEXT:    beqz t1, .LBB13_149
 ; RV32I-NEXT:  .LBB13_138:
-; RV32I-NEXT:    or a4, s8, t3
+; RV32I-NEXT:    li s6, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beqz t5, .LBB13_128
+; RV32I-NEXT:    bnez t5, .LBB13_150
+; RV32I-NEXT:    j .LBB13_151
 ; RV32I-NEXT:  .LBB13_139:
-; RV32I-NEXT:    mv t3, s5
-; RV32I-NEXT:    bne t1, t6, .LBB13_129
+; RV32I-NEXT:    or s6, a1, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB13_129
 ; RV32I-NEXT:  .LBB13_140:
-; RV32I-NEXT:    or a4, s6, t3
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    beq t1, t4, .LBB13_130
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    bne t1, s11, .LBB13_130
 ; RV32I-NEXT:  .LBB13_141:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s4, .LBB13_131
+; RV32I-NEXT:    or s6, ra, t3
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, t6, .LBB13_131
 ; RV32I-NEXT:  .LBB13_142:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
 ; RV32I-NEXT:    beq t1, s9, .LBB13_132
 ; RV32I-NEXT:  .LBB13_143:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s2, .LBB13_133
-; RV32I-NEXT:  .LBB13_144:
-; RV32I-NEXT:    mv a4, t3
+; RV32I-NEXT:    mv s6, t3
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s3, .LBB13_134
+; RV32I-NEXT:    beq t1, s1, .LBB13_133
+; RV32I-NEXT:  .LBB13_144:
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s2, .LBB13_134
 ; RV32I-NEXT:  .LBB13_145:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    beqz a7, .LBB13_135
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s3, .LBB13_135
 ; RV32I-NEXT:  .LBB13_146:
-; RV32I-NEXT:    mv a6, t3
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beqz t5, .LBB13_136
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    beqz a7, .LBB13_136
 ; RV32I-NEXT:  .LBB13_147:
-; RV32I-NEXT:    mv a4, s7
-; RV32I-NEXT:    bnez t1, .LBB13_137
-; RV32I-NEXT:  .LBB13_148:
-; RV32I-NEXT:    or a4, s8, a4
+; RV32I-NEXT:    mv a6, t3
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beqz t5, .LBB13_150
+; RV32I-NEXT:    beqz t5, .LBB13_137
+; RV32I-NEXT:  .LBB13_148:
+; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:    bnez t1, .LBB13_138
 ; RV32I-NEXT:  .LBB13_149:
-; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    or s6, a1, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB13_151
 ; RV32I-NEXT:  .LBB13_150:
-; RV32I-NEXT:    beq t1, s0, .LBB13_161
-; RV32I-NEXT:  # %bb.151:
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    bne t1, t6, .LBB13_162
-; RV32I-NEXT:  .LBB13_152:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, t4, .LBB13_163
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:  .LBB13_151:
+; RV32I-NEXT:    beq t1, s10, .LBB13_162
+; RV32I-NEXT:  # %bb.152:
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    bne t1, s11, .LBB13_163
 ; RV32I-NEXT:  .LBB13_153:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s4, .LBB13_164
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, t6, .LBB13_164
 ; RV32I-NEXT:  .LBB13_154:
-; RV32I-NEXT:    li a4, 0
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bne t1, s9, .LBB13_165
 ; RV32I-NEXT:  .LBB13_155:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s2, .LBB13_166
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s1, .LBB13_166
 ; RV32I-NEXT:  .LBB13_156:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s3, .LBB13_167
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bne t1, s2, .LBB13_167
 ; RV32I-NEXT:  .LBB13_157:
-; RV32I-NEXT:    bnez a7, .LBB13_168
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s3, .LBB13_168
 ; RV32I-NEXT:  .LBB13_158:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bnez t5, .LBB13_169
+; RV32I-NEXT:    bnez a7, .LBB13_169
 ; RV32I-NEXT:  .LBB13_159:
-; RV32I-NEXT:    beqz t1, .LBB13_170
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB13_170
 ; RV32I-NEXT:  .LBB13_160:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    bne t1, s0, .LBB13_171
-; RV32I-NEXT:    j .LBB13_172
+; RV32I-NEXT:    beqz t1, .LBB13_171
 ; RV32I-NEXT:  .LBB13_161:
-; RV32I-NEXT:    or a4, s6, t3
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    beq t1, t6, .LBB13_152
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    j .LBB13_172
 ; RV32I-NEXT:  .LBB13_162:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, t4, .LBB13_153
+; RV32I-NEXT:    or s6, ra, t3
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, s11, .LBB13_153
 ; RV32I-NEXT:  .LBB13_163:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s4, .LBB13_154
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, t6, .LBB13_154
 ; RV32I-NEXT:  .LBB13_164:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beq t1, s9, .LBB13_155
 ; RV32I-NEXT:  .LBB13_165:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s2, .LBB13_156
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s1, .LBB13_156
 ; RV32I-NEXT:  .LBB13_166:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s3, .LBB13_157
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s2, .LBB13_157
 ; RV32I-NEXT:  .LBB13_167:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    beqz a7, .LBB13_158
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s3, .LBB13_158
 ; RV32I-NEXT:  .LBB13_168:
-; RV32I-NEXT:    mv t0, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beqz t5, .LBB13_159
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    beqz a7, .LBB13_159
 ; RV32I-NEXT:  .LBB13_169:
-; RV32I-NEXT:    mv a4, s5
-; RV32I-NEXT:    bnez t1, .LBB13_160
+; RV32I-NEXT:    mv t0, s6
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB13_160
 ; RV32I-NEXT:  .LBB13_170:
-; RV32I-NEXT:    or a4, s6, a4
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    beq t1, s0, .LBB13_172
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    bnez t1, .LBB13_161
 ; RV32I-NEXT:  .LBB13_171:
-; RV32I-NEXT:    mv t3, a4
+; RV32I-NEXT:    or t3, ra, t3
 ; RV32I-NEXT:  .LBB13_172:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, t6, .LBB13_190
+; RV32I-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv t5, s4
+; RV32I-NEXT:    bne t1, s10, .LBB13_191
 ; RV32I-NEXT:  # %bb.173:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, t4, .LBB13_191
+; RV32I-NEXT:    bne t1, s11, .LBB13_192
 ; RV32I-NEXT:  .LBB13_174:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s4, .LBB13_192
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    bne t1, t6, .LBB13_193
 ; RV32I-NEXT:  .LBB13_175:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s9, .LBB13_193
+; RV32I-NEXT:    bne t1, s9, .LBB13_194
 ; RV32I-NEXT:  .LBB13_176:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s2, .LBB13_194
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    bne t1, s1, .LBB13_195
 ; RV32I-NEXT:  .LBB13_177:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s3, .LBB13_195
+; RV32I-NEXT:    bne t1, s2, .LBB13_196
 ; RV32I-NEXT:  .LBB13_178:
-; RV32I-NEXT:    bnez a7, .LBB13_196
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    bne t1, s3, .LBB13_197
 ; RV32I-NEXT:  .LBB13_179:
-; RV32I-NEXT:    bnez t1, .LBB13_197
+; RV32I-NEXT:    bnez a7, .LBB13_198
 ; RV32I-NEXT:  .LBB13_180:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s0, .LBB13_198
+; RV32I-NEXT:    bnez t1, .LBB13_199
 ; RV32I-NEXT:  .LBB13_181:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, t6, .LBB13_199
+; RV32I-NEXT:    bne t1, s10, .LBB13_200
 ; RV32I-NEXT:  .LBB13_182:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, t4, .LBB13_200
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    bne t1, s11, .LBB13_201
 ; RV32I-NEXT:  .LBB13_183:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s4, .LBB13_201
+; RV32I-NEXT:    bne t1, t6, .LBB13_202
 ; RV32I-NEXT:  .LBB13_184:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s9, .LBB13_202
+; RV32I-NEXT:    li t4, 0
+; RV32I-NEXT:    bne t1, s9, .LBB13_203
 ; RV32I-NEXT:  .LBB13_185:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s2, .LBB13_203
+; RV32I-NEXT:    bne t1, s1, .LBB13_204
 ; RV32I-NEXT:  .LBB13_186:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s3, .LBB13_204
+; RV32I-NEXT:    li t4, 0
+; RV32I-NEXT:    bne t1, s2, .LBB13_205
 ; RV32I-NEXT:  .LBB13_187:
-; RV32I-NEXT:    beqz a7, .LBB13_189
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bne t1, s3, .LBB13_206
 ; RV32I-NEXT:  .LBB13_188:
-; RV32I-NEXT:    mv t2, a4
+; RV32I-NEXT:    beqz a7, .LBB13_190
 ; RV32I-NEXT:  .LBB13_189:
-; RV32I-NEXT:    srli a4, ra, 16
-; RV32I-NEXT:    lui t4, 16
-; RV32I-NEXT:    srli t3, ra, 24
-; RV32I-NEXT:    srli a7, a1, 16
-; RV32I-NEXT:    srli t6, a1, 24
-; RV32I-NEXT:    srli t1, a3, 16
-; RV32I-NEXT:    srli s2, a3, 24
-; RV32I-NEXT:    srli t5, a5, 16
-; RV32I-NEXT:    srli s3, a5, 24
-; RV32I-NEXT:    srli s1, a6, 16
-; RV32I-NEXT:    srli s6, a6, 24
-; RV32I-NEXT:    srli s0, t0, 16
-; RV32I-NEXT:    srli s5, t0, 24
-; RV32I-NEXT:    srli s4, a0, 16
-; RV32I-NEXT:    srli s7, a0, 24
-; RV32I-NEXT:    srli s8, t2, 16
-; RV32I-NEXT:    srli s9, t2, 24
-; RV32I-NEXT:    addi t4, t4, -1
-; RV32I-NEXT:    and s10, ra, t4
-; RV32I-NEXT:    and s11, a1, t4
-; RV32I-NEXT:    srli s10, s10, 8
-; RV32I-NEXT:    sb ra, 0(a2)
-; RV32I-NEXT:    sb s10, 1(a2)
-; RV32I-NEXT:    sb a4, 2(a2)
-; RV32I-NEXT:    sb t3, 3(a2)
-; RV32I-NEXT:    and a4, a3, t4
-; RV32I-NEXT:    srli t3, s11, 8
+; RV32I-NEXT:    mv t2, t3
+; RV32I-NEXT:  .LBB13_190:
+; RV32I-NEXT:    srli a7, a4, 16
+; RV32I-NEXT:    lui t5, 16
+; RV32I-NEXT:    srli t4, a4, 24
+; RV32I-NEXT:    srli t1, a1, 16
+; RV32I-NEXT:    srli s0, a1, 24
+; RV32I-NEXT:    lw a3, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    srli t3, a3, 16
+; RV32I-NEXT:    srli s3, a3, 24
+; RV32I-NEXT:    srli t6, a5, 16
+; RV32I-NEXT:    srli s4, a5, 24
+; RV32I-NEXT:    srli s2, a6, 16
+; RV32I-NEXT:    srli s7, a6, 24
+; RV32I-NEXT:    srli s1, t0, 16
+; RV32I-NEXT:    srli s6, t0, 24
+; RV32I-NEXT:    srli s5, a0, 16
+; RV32I-NEXT:    srli s8, a0, 24
+; RV32I-NEXT:    srli s9, t2, 16
+; RV32I-NEXT:    srli s10, t2, 24
+; RV32I-NEXT:    addi t5, t5, -1
+; RV32I-NEXT:    and s11, a4, t5
+; RV32I-NEXT:    and ra, a1, t5
+; RV32I-NEXT:    srli s11, s11, 8
+; RV32I-NEXT:    sb a4, 0(a2)
+; RV32I-NEXT:    sb s11, 1(a2)
+; RV32I-NEXT:    sb a7, 2(a2)
+; RV32I-NEXT:    sb t4, 3(a2)
+; RV32I-NEXT:    and a4, a3, t5
+; RV32I-NEXT:    srli a7, ra, 8
 ; RV32I-NEXT:    sb a1, 4(a2)
-; RV32I-NEXT:    sb t3, 5(a2)
-; RV32I-NEXT:    sb a7, 6(a2)
-; RV32I-NEXT:    sb t6, 7(a2)
-; RV32I-NEXT:    and a1, a5, t4
+; RV32I-NEXT:    sb a7, 5(a2)
+; RV32I-NEXT:    sb t1, 6(a2)
+; RV32I-NEXT:    sb s0, 7(a2)
+; RV32I-NEXT:    and a1, a5, t5
 ; RV32I-NEXT:    srli a4, a4, 8
 ; RV32I-NEXT:    sb a3, 8(a2)
 ; RV32I-NEXT:    sb a4, 9(a2)
-; RV32I-NEXT:    sb t1, 10(a2)
-; RV32I-NEXT:    sb s2, 11(a2)
-; RV32I-NEXT:    and a3, a6, t4
+; RV32I-NEXT:    sb t3, 10(a2)
+; RV32I-NEXT:    sb s3, 11(a2)
+; RV32I-NEXT:    and a3, a6, t5
 ; RV32I-NEXT:    srli a1, a1, 8
 ; RV32I-NEXT:    sb a5, 12(a2)
 ; RV32I-NEXT:    sb a1, 13(a2)
-; RV32I-NEXT:    sb t5, 14(a2)
-; RV32I-NEXT:    sb s3, 15(a2)
-; RV32I-NEXT:    and a1, t0, t4
+; RV32I-NEXT:    sb t6, 14(a2)
+; RV32I-NEXT:    sb s4, 15(a2)
+; RV32I-NEXT:    and a1, t0, t5
 ; RV32I-NEXT:    srli a3, a3, 8
 ; RV32I-NEXT:    sb a6, 16(a2)
 ; RV32I-NEXT:    sb a3, 17(a2)
-; RV32I-NEXT:    sb s1, 18(a2)
-; RV32I-NEXT:    sb s6, 19(a2)
-; RV32I-NEXT:    and a3, a0, t4
-; RV32I-NEXT:    and a4, t2, t4
+; RV32I-NEXT:    sb s2, 18(a2)
+; RV32I-NEXT:    sb s7, 19(a2)
+; RV32I-NEXT:    and a3, a0, t5
+; RV32I-NEXT:    and a4, t2, t5
 ; RV32I-NEXT:    srli a1, a1, 8
 ; RV32I-NEXT:    srli a3, a3, 8
 ; RV32I-NEXT:    srli a4, a4, 8
 ; RV32I-NEXT:    sb t0, 20(a2)
 ; RV32I-NEXT:    sb a1, 21(a2)
-; RV32I-NEXT:    sb s0, 22(a2)
-; RV32I-NEXT:    sb s5, 23(a2)
+; RV32I-NEXT:    sb s1, 22(a2)
+; RV32I-NEXT:    sb s6, 23(a2)
 ; RV32I-NEXT:    sb a0, 24(a2)
 ; RV32I-NEXT:    sb a3, 25(a2)
-; RV32I-NEXT:    sb s4, 26(a2)
-; RV32I-NEXT:    sb s7, 27(a2)
+; RV32I-NEXT:    sb s5, 26(a2)
+; RV32I-NEXT:    sb s8, 27(a2)
 ; RV32I-NEXT:    sb t2, 28(a2)
 ; RV32I-NEXT:    sb a4, 29(a2)
-; RV32I-NEXT:    sb s8, 30(a2)
-; RV32I-NEXT:    sb s9, 31(a2)
+; RV32I-NEXT:    sb s9, 30(a2)
+; RV32I-NEXT:    sb s10, 31(a2)
 ; RV32I-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 52(sp) # 4-byte Folded Reload
@@ -4908,64 +4940,68 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
 ; RV32I-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 64
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB13_190:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, t4, .LBB13_174
 ; RV32I-NEXT:  .LBB13_191:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s4, .LBB13_175
+; RV32I-NEXT:    mv t5, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s11, .LBB13_174
 ; RV32I-NEXT:  .LBB13_192:
-; RV32I-NEXT:    mv a4, t3
+; RV32I-NEXT:    mv t3, t5
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    beq t1, t6, .LBB13_175
+; RV32I-NEXT:  .LBB13_193:
+; RV32I-NEXT:    mv t5, t3
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beq t1, s9, .LBB13_176
-; RV32I-NEXT:  .LBB13_193:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s2, .LBB13_177
 ; RV32I-NEXT:  .LBB13_194:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s3, .LBB13_178
+; RV32I-NEXT:    mv t3, t5
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    beq t1, s1, .LBB13_177
 ; RV32I-NEXT:  .LBB13_195:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    beqz a7, .LBB13_179
+; RV32I-NEXT:    mv t5, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s2, .LBB13_178
 ; RV32I-NEXT:  .LBB13_196:
-; RV32I-NEXT:    mv a0, t3
-; RV32I-NEXT:    beqz t1, .LBB13_180
+; RV32I-NEXT:    mv t3, t5
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    beq t1, s3, .LBB13_179
 ; RV32I-NEXT:  .LBB13_197:
-; RV32I-NEXT:    li s1, 0
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s0, .LBB13_181
+; RV32I-NEXT:    mv t5, t3
+; RV32I-NEXT:    beqz a7, .LBB13_180
 ; RV32I-NEXT:  .LBB13_198:
-; RV32I-NEXT:    mv a4, s1
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, t6, .LBB13_182
+; RV32I-NEXT:    mv a0, t5
+; RV32I-NEXT:    beqz t1, .LBB13_181
 ; RV32I-NEXT:  .LBB13_199:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, t4, .LBB13_183
-; RV32I-NEXT:  .LBB13_200:
-; RV32I-NEXT:    mv a4, t3
+; RV32I-NEXT:    li s4, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s4, .LBB13_184
+; RV32I-NEXT:    beq t1, s10, .LBB13_182
+; RV32I-NEXT:  .LBB13_200:
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    beq t1, s11, .LBB13_183
 ; RV32I-NEXT:  .LBB13_201:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s9, .LBB13_185
-; RV32I-NEXT:  .LBB13_202:
-; RV32I-NEXT:    mv a4, t3
+; RV32I-NEXT:    mv t5, t3
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s2, .LBB13_186
+; RV32I-NEXT:    beq t1, t6, .LBB13_184
+; RV32I-NEXT:  .LBB13_202:
+; RV32I-NEXT:    mv t3, t5
+; RV32I-NEXT:    li t4, 0
+; RV32I-NEXT:    beq t1, s9, .LBB13_185
 ; RV32I-NEXT:  .LBB13_203:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s3, .LBB13_187
+; RV32I-NEXT:    mv t4, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s1, .LBB13_186
 ; RV32I-NEXT:  .LBB13_204:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    bnez a7, .LBB13_188
-; RV32I-NEXT:    j .LBB13_189
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    li t4, 0
+; RV32I-NEXT:    beq t1, s2, .LBB13_187
+; RV32I-NEXT:  .LBB13_205:
+; RV32I-NEXT:    mv t4, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s3, .LBB13_188
+; RV32I-NEXT:  .LBB13_206:
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    bnez a7, .LBB13_189
+; RV32I-NEXT:    j .LBB13_190
   %src = load i256, ptr %src.ptr, align 1
   %wordOff = load i256, ptr %wordOff.ptr, align 1
   %bitOff = shl i256 %wordOff, 5
@@ -5300,9 +5336,9 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
 ; RV32I-NEXT:    sw s9, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s10, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s11, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    li a4, 0
+; RV32I-NEXT:    li a5, 0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    lbu a5, 1(a0)
+; RV32I-NEXT:    lbu a4, 1(a0)
 ; RV32I-NEXT:    lbu a6, 2(a0)
 ; RV32I-NEXT:    lbu a7, 3(a0)
 ; RV32I-NEXT:    lbu t0, 4(a0)
@@ -5313,736 +5349,750 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
 ; RV32I-NEXT:    lbu t5, 9(a0)
 ; RV32I-NEXT:    lbu t6, 10(a0)
 ; RV32I-NEXT:    lbu s0, 11(a0)
-; RV32I-NEXT:    slli a5, a5, 8
+; RV32I-NEXT:    slli a4, a4, 8
 ; RV32I-NEXT:    slli a7, a7, 8
 ; RV32I-NEXT:    slli t1, t1, 8
-; RV32I-NEXT:    or a3, a5, a3
-; RV32I-NEXT:    or a7, a7, a6
-; RV32I-NEXT:    or t1, t1, t0
-; RV32I-NEXT:    lbu a6, 13(a0)
-; RV32I-NEXT:    lbu a5, 14(a0)
-; RV32I-NEXT:    lbu s1, 15(a0)
+; RV32I-NEXT:    or a3, a4, a3
+; RV32I-NEXT:    or a4, a7, a6
+; RV32I-NEXT:    or a7, t1, t0
+; RV32I-NEXT:    lbu t0, 13(a0)
+; RV32I-NEXT:    lbu a6, 14(a0)
+; RV32I-NEXT:    lbu t1, 15(a0)
 ; RV32I-NEXT:    slli t3, t3, 8
 ; RV32I-NEXT:    slli t5, t5, 8
 ; RV32I-NEXT:    slli s0, s0, 8
-; RV32I-NEXT:    or t3, t3, t2
-; RV32I-NEXT:    or t0, t5, t4
-; RV32I-NEXT:    or t5, s0, t6
-; RV32I-NEXT:    lbu t2, 1(a1)
-; RV32I-NEXT:    lbu t4, 0(a1)
+; RV32I-NEXT:    or s1, t3, t2
+; RV32I-NEXT:    or t2, t5, t4
+; RV32I-NEXT:    or t4, s0, t6
+; RV32I-NEXT:    lbu t3, 1(a1)
+; RV32I-NEXT:    lbu t5, 0(a1)
 ; RV32I-NEXT:    lbu t6, 2(a1)
 ; RV32I-NEXT:    lbu a1, 3(a1)
-; RV32I-NEXT:    slli t2, t2, 8
-; RV32I-NEXT:    or s0, t2, t4
-; RV32I-NEXT:    slli t2, s1, 8
+; RV32I-NEXT:    slli t3, t3, 8
+; RV32I-NEXT:    or t5, t3, t5
+; RV32I-NEXT:    slli t3, t1, 8
 ; RV32I-NEXT:    slli a1, a1, 8
 ; RV32I-NEXT:    or a1, a1, t6
-; RV32I-NEXT:    slli t4, a7, 16
-; RV32I-NEXT:    slli a7, t3, 16
-; RV32I-NEXT:    slli t3, t5, 16
-; RV32I-NEXT:    slli t5, a1, 16
-; RV32I-NEXT:    or a1, a7, t1
-; RV32I-NEXT:    or a7, t5, s0
+; RV32I-NEXT:    slli a4, a4, 16
+; RV32I-NEXT:    slli s1, s1, 16
+; RV32I-NEXT:    slli t4, t4, 16
+; RV32I-NEXT:    slli t1, a1, 16
+; RV32I-NEXT:    or s5, s1, a7
+; RV32I-NEXT:    or a7, t1, t5
 ; RV32I-NEXT:    slli a7, a7, 6
 ; RV32I-NEXT:    srli t1, a7, 5
 ; RV32I-NEXT:    andi t5, a7, 31
 ; RV32I-NEXT:    neg s3, t5
 ; RV32I-NEXT:    beqz t5, .LBB14_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sll a4, a1, s3
+; RV32I-NEXT:    sll a5, s5, s3
 ; RV32I-NEXT:  .LBB14_2:
-; RV32I-NEXT:    or s7, t4, a3
-; RV32I-NEXT:    lbu t4, 12(a0)
-; RV32I-NEXT:    lbu t6, 19(a0)
-; RV32I-NEXT:    slli s1, a6, 8
-; RV32I-NEXT:    or a5, t2, a5
-; RV32I-NEXT:    or a3, t3, t0
+; RV32I-NEXT:    or a4, a4, a3
+; RV32I-NEXT:    lbu t6, 12(a0)
+; RV32I-NEXT:    lbu s0, 19(a0)
+; RV32I-NEXT:    slli s1, t0, 8
+; RV32I-NEXT:    or t0, t3, a6
+; RV32I-NEXT:    or a1, t4, t2
 ; RV32I-NEXT:    beqz t1, .LBB14_4
 ; RV32I-NEXT:  # %bb.3:
-; RV32I-NEXT:    li s0, 0
+; RV32I-NEXT:    mv s11, a4
+; RV32I-NEXT:    li a4, 0
 ; RV32I-NEXT:    j .LBB14_5
 ; RV32I-NEXT:  .LBB14_4:
-; RV32I-NEXT:    srl s0, s7, a7
-; RV32I-NEXT:    or s0, s0, a4
+; RV32I-NEXT:    mv s11, a4
+; RV32I-NEXT:    srl a6, a4, a7
+; RV32I-NEXT:    or a4, a6, a5
 ; RV32I-NEXT:  .LBB14_5:
 ; RV32I-NEXT:    li a6, 0
-; RV32I-NEXT:    lbu t0, 17(a0)
-; RV32I-NEXT:    lbu a4, 18(a0)
-; RV32I-NEXT:    slli s4, t6, 8
-; RV32I-NEXT:    or s2, s1, t4
-; RV32I-NEXT:    slli a5, a5, 16
-; RV32I-NEXT:    li s5, 1
-; RV32I-NEXT:    sll t6, a3, s3
+; RV32I-NEXT:    lbu s2, 17(a0)
+; RV32I-NEXT:    lbu a5, 18(a0)
+; RV32I-NEXT:    slli s4, s0, 8
+; RV32I-NEXT:    or s1, s1, t6
+; RV32I-NEXT:    slli t0, t0, 16
+; RV32I-NEXT:    li t3, 1
+; RV32I-NEXT:    sll s6, a1, s3
 ; RV32I-NEXT:    beqz t5, .LBB14_7
 ; RV32I-NEXT:  # %bb.6:
-; RV32I-NEXT:    mv a6, t6
+; RV32I-NEXT:    mv a6, s6
 ; RV32I-NEXT:  .LBB14_7:
 ; RV32I-NEXT:    lbu t2, 16(a0)
-; RV32I-NEXT:    lbu t3, 23(a0)
-; RV32I-NEXT:    slli s1, t0, 8
-; RV32I-NEXT:    or t4, s4, a4
-; RV32I-NEXT:    srl a4, a1, a7
-; RV32I-NEXT:    or a5, a5, s2
-; RV32I-NEXT:    bne t1, s5, .LBB14_9
+; RV32I-NEXT:    lbu t4, 23(a0)
+; RV32I-NEXT:    slli s0, s2, 8
+; RV32I-NEXT:    or t6, s4, a5
+; RV32I-NEXT:    srl a3, s5, a7
+; RV32I-NEXT:    or a5, t0, s1
+; RV32I-NEXT:    sw a3, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    bne t1, t3, .LBB14_9
 ; RV32I-NEXT:  # %bb.8:
-; RV32I-NEXT:    or s0, a4, a6
+; RV32I-NEXT:    or a4, a3, a6
 ; RV32I-NEXT:  .LBB14_9:
 ; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    lbu s5, 21(a0)
+; RV32I-NEXT:    lbu s2, 21(a0)
 ; RV32I-NEXT:    lbu a6, 22(a0)
-; RV32I-NEXT:    slli s4, t3, 8
-; RV32I-NEXT:    or t2, s1, t2
-; RV32I-NEXT:    slli s6, t4, 16
-; RV32I-NEXT:    li s8, 2
-; RV32I-NEXT:    sll t3, a5, s3
+; RV32I-NEXT:    slli s1, t4, 8
+; RV32I-NEXT:    or t2, s0, t2
+; RV32I-NEXT:    slli s4, t6, 16
+; RV32I-NEXT:    li a3, 2
+; RV32I-NEXT:    sll s8, a5, s3
 ; RV32I-NEXT:    beqz t5, .LBB14_11
 ; RV32I-NEXT:  # %bb.10:
-; RV32I-NEXT:    mv t0, t3
+; RV32I-NEXT:    mv t0, s8
 ; RV32I-NEXT:  .LBB14_11:
-; RV32I-NEXT:    lbu s1, 20(a0)
-; RV32I-NEXT:    lbu s2, 27(a0)
-; RV32I-NEXT:    slli s5, s5, 8
-; RV32I-NEXT:    or s4, s4, a6
-; RV32I-NEXT:    srl t4, a3, a7
-; RV32I-NEXT:    or a6, s6, t2
-; RV32I-NEXT:    bne t1, s8, .LBB14_13
+; RV32I-NEXT:    lbu t6, 20(a0)
+; RV32I-NEXT:    lbu s0, 27(a0)
+; RV32I-NEXT:    slli s2, s2, 8
+; RV32I-NEXT:    or s1, s1, a6
+; RV32I-NEXT:    srl t3, a1, a7
+; RV32I-NEXT:    or a6, s4, t2
+; RV32I-NEXT:    sw s5, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    bne t1, a3, .LBB14_13
 ; RV32I-NEXT:  # %bb.12:
-; RV32I-NEXT:    or s0, t4, t0
+; RV32I-NEXT:    or a4, t3, t0
 ; RV32I-NEXT:  .LBB14_13:
-; RV32I-NEXT:    sw s7, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    li t2, 0
-; RV32I-NEXT:    lbu s6, 25(a0)
+; RV32I-NEXT:    lbu s4, 25(a0)
 ; RV32I-NEXT:    lbu t0, 26(a0)
-; RV32I-NEXT:    slli s8, s2, 8
-; RV32I-NEXT:    or s7, s5, s1
-; RV32I-NEXT:    slli s9, s4, 16
-; RV32I-NEXT:    sll s11, a6, s3
+; RV32I-NEXT:    slli s7, s0, 8
+; RV32I-NEXT:    or s5, s2, t6
+; RV32I-NEXT:    slli s9, s1, 16
+; RV32I-NEXT:    li t6, 3
+; RV32I-NEXT:    sll t4, a6, s3
 ; RV32I-NEXT:    beqz t5, .LBB14_15
 ; RV32I-NEXT:  # %bb.14:
-; RV32I-NEXT:    mv t2, s11
+; RV32I-NEXT:    mv t2, t4
 ; RV32I-NEXT:  .LBB14_15:
-; RV32I-NEXT:    lbu s1, 24(a0)
-; RV32I-NEXT:    lbu s2, 31(a0)
-; RV32I-NEXT:    slli s5, s6, 8
-; RV32I-NEXT:    or s4, s8, t0
-; RV32I-NEXT:    srl ra, a5, a7
-; RV32I-NEXT:    or t0, s9, s7
-; RV32I-NEXT:    li s6, 3
-; RV32I-NEXT:    bne t1, s6, .LBB14_17
+; RV32I-NEXT:    lbu s0, 24(a0)
+; RV32I-NEXT:    lbu s1, 31(a0)
+; RV32I-NEXT:    slli s4, s4, 8
+; RV32I-NEXT:    or s2, s7, t0
+; RV32I-NEXT:    srl a3, a5, a7
+; RV32I-NEXT:    or t0, s9, s5
+; RV32I-NEXT:    li s9, 3
+; RV32I-NEXT:    bne t1, t6, .LBB14_17
 ; RV32I-NEXT:  # %bb.16:
-; RV32I-NEXT:    or s0, ra, t2
+; RV32I-NEXT:    or a4, a3, t2
 ; RV32I-NEXT:  .LBB14_17:
+; RV32I-NEXT:    mv t6, t3
 ; RV32I-NEXT:    li t2, 0
 ; RV32I-NEXT:    lbu s7, 29(a0)
-; RV32I-NEXT:    lbu s6, 30(a0)
-; RV32I-NEXT:    slli s8, s2, 8
-; RV32I-NEXT:    or s2, s5, s1
-; RV32I-NEXT:    slli s5, s4, 16
-; RV32I-NEXT:    li s9, 4
-; RV32I-NEXT:    sll s1, t0, s3
-; RV32I-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lbu s5, 30(a0)
+; RV32I-NEXT:    slli s1, s1, 8
+; RV32I-NEXT:    or s10, s4, s0
+; RV32I-NEXT:    slli s2, s2, 16
+; RV32I-NEXT:    li a3, 4
+; RV32I-NEXT:    sll s0, t0, s3
 ; RV32I-NEXT:    beqz t5, .LBB14_19
 ; RV32I-NEXT:  # %bb.18:
-; RV32I-NEXT:    lw t2, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv t2, s0
 ; RV32I-NEXT:  .LBB14_19:
-; RV32I-NEXT:    lbu s1, 28(a0)
+; RV32I-NEXT:    lbu t3, 28(a0)
 ; RV32I-NEXT:    slli s7, s7, 8
-; RV32I-NEXT:    or s4, s8, s6
-; RV32I-NEXT:    srl s10, a6, a7
-; RV32I-NEXT:    or a0, s5, s2
-; RV32I-NEXT:    bne t1, s9, .LBB14_21
+; RV32I-NEXT:    or s4, s1, s5
+; RV32I-NEXT:    srl s1, a6, a7
+; RV32I-NEXT:    or a0, s2, s10
+; RV32I-NEXT:    beq t1, a3, .LBB14_21
 ; RV32I-NEXT:  # %bb.20:
-; RV32I-NEXT:    or s0, s10, t2
+; RV32I-NEXT:    mv a3, s1
+; RV32I-NEXT:    j .LBB14_22
 ; RV32I-NEXT:  .LBB14_21:
+; RV32I-NEXT:    mv a3, s1
+; RV32I-NEXT:    or a4, s1, t2
+; RV32I-NEXT:  .LBB14_22:
+; RV32I-NEXT:    li s10, 1
 ; RV32I-NEXT:    li s2, 0
-; RV32I-NEXT:    or t2, s7, s1
+; RV32I-NEXT:    or t2, s7, t3
 ; RV32I-NEXT:    slli s4, s4, 16
-; RV32I-NEXT:    li s9, 5
+; RV32I-NEXT:    li s1, 5
 ; RV32I-NEXT:    sll s7, a0, s3
-; RV32I-NEXT:    beqz t5, .LBB14_23
-; RV32I-NEXT:  # %bb.22:
+; RV32I-NEXT:    beqz t5, .LBB14_24
+; RV32I-NEXT:  # %bb.23:
 ; RV32I-NEXT:    mv s2, s7
-; RV32I-NEXT:  .LBB14_23:
-; RV32I-NEXT:    srl s8, t0, a7
+; RV32I-NEXT:  .LBB14_24:
+; RV32I-NEXT:    sw a1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    srl t3, t0, a7
 ; RV32I-NEXT:    or t2, s4, t2
-; RV32I-NEXT:    bne t1, s9, .LBB14_25
-; RV32I-NEXT:  # %bb.24:
-; RV32I-NEXT:    or s0, s8, s2
-; RV32I-NEXT:  .LBB14_25:
-; RV32I-NEXT:    li s4, 0
+; RV32I-NEXT:    beq t1, s1, .LBB14_26
+; RV32I-NEXT:  # %bb.25:
+; RV32I-NEXT:    mv a1, t3
+; RV32I-NEXT:    j .LBB14_27
+; RV32I-NEXT:  .LBB14_26:
+; RV32I-NEXT:    mv a1, t3
+; RV32I-NEXT:    or a4, t3, s2
+; RV32I-NEXT:  .LBB14_27:
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    li s2, 6
 ; RV32I-NEXT:    sll s5, t2, s3
-; RV32I-NEXT:    beqz t5, .LBB14_27
-; RV32I-NEXT:  # %bb.26:
-; RV32I-NEXT:    mv s4, s5
-; RV32I-NEXT:  .LBB14_27:
-; RV32I-NEXT:    srl s6, a0, a7
-; RV32I-NEXT:    bne t1, s2, .LBB14_29
+; RV32I-NEXT:    beqz t5, .LBB14_29
 ; RV32I-NEXT:  # %bb.28:
-; RV32I-NEXT:    or s0, s6, s4
+; RV32I-NEXT:    mv t3, s5
 ; RV32I-NEXT:  .LBB14_29:
-; RV32I-NEXT:    li s3, 7
-; RV32I-NEXT:    srl s1, t2, a7
-; RV32I-NEXT:    mv s4, s1
-; RV32I-NEXT:    bne t1, s3, .LBB14_34
+; RV32I-NEXT:    srl s3, a0, a7
+; RV32I-NEXT:    beq t1, s2, .LBB14_31
 ; RV32I-NEXT:  # %bb.30:
-; RV32I-NEXT:    bnez a7, .LBB14_35
+; RV32I-NEXT:    mv ra, s3
+; RV32I-NEXT:    j .LBB14_32
 ; RV32I-NEXT:  .LBB14_31:
-; RV32I-NEXT:    li s0, 0
-; RV32I-NEXT:    bnez t5, .LBB14_36
+; RV32I-NEXT:    mv ra, s3
+; RV32I-NEXT:    or a4, s3, t3
 ; RV32I-NEXT:  .LBB14_32:
-; RV32I-NEXT:    li s4, 2
-; RV32I-NEXT:    beqz t1, .LBB14_37
-; RV32I-NEXT:  .LBB14_33:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    j .LBB14_38
+; RV32I-NEXT:    li s3, 7
+; RV32I-NEXT:    srl s4, t2, a7
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, s3, .LBB14_34
+; RV32I-NEXT:  # %bb.33:
+; RV32I-NEXT:    mv t3, a4
 ; RV32I-NEXT:  .LBB14_34:
-; RV32I-NEXT:    mv s4, s0
-; RV32I-NEXT:    beqz a7, .LBB14_31
-; RV32I-NEXT:  .LBB14_35:
-; RV32I-NEXT:    sw s4, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    li s0, 0
-; RV32I-NEXT:    beqz t5, .LBB14_32
+; RV32I-NEXT:    mv a4, s11
+; RV32I-NEXT:    beqz a7, .LBB14_36
+; RV32I-NEXT:  # %bb.35:
+; RV32I-NEXT:    mv a4, t3
 ; RV32I-NEXT:  .LBB14_36:
-; RV32I-NEXT:    mv s0, t6
-; RV32I-NEXT:    li s4, 2
-; RV32I-NEXT:    bnez t1, .LBB14_33
-; RV32I-NEXT:  .LBB14_37:
-; RV32I-NEXT:    or a4, a4, s0
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    li s11, 2
+; RV32I-NEXT:    beqz t5, .LBB14_38
+; RV32I-NEXT:  # %bb.37:
+; RV32I-NEXT:    mv t3, s6
 ; RV32I-NEXT:  .LBB14_38:
-; RV32I-NEXT:    li s0, 1
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    bnez t5, .LBB14_57
+; RV32I-NEXT:    beqz t1, .LBB14_40
 ; RV32I-NEXT:  # %bb.39:
-; RV32I-NEXT:    beq t1, s0, .LBB14_58
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB14_41
+; RV32I-NEXT:    j .LBB14_42
 ; RV32I-NEXT:  .LBB14_40:
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    bnez t5, .LBB14_59
+; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or s6, s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB14_42
 ; RV32I-NEXT:  .LBB14_41:
-; RV32I-NEXT:    beq t1, s4, .LBB14_60
+; RV32I-NEXT:    mv t3, s8
 ; RV32I-NEXT:  .LBB14_42:
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    bnez t5, .LBB14_61
-; RV32I-NEXT:  .LBB14_43:
-; RV32I-NEXT:    li s4, 3
-; RV32I-NEXT:    bne t1, s4, .LBB14_45
+; RV32I-NEXT:    beq t1, s10, .LBB14_58
+; RV32I-NEXT:  # %bb.43:
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB14_59
 ; RV32I-NEXT:  .LBB14_44:
-; RV32I-NEXT:    or a4, s10, t6
+; RV32I-NEXT:    beq t1, s11, .LBB14_60
 ; RV32I-NEXT:  .LBB14_45:
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    li s4, 4
-; RV32I-NEXT:    bnez t5, .LBB14_62
-; RV32I-NEXT:  # %bb.46:
-; RV32I-NEXT:    beq t1, s4, .LBB14_63
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB14_61
+; RV32I-NEXT:  .LBB14_46:
+; RV32I-NEXT:    bne t1, s9, .LBB14_48
 ; RV32I-NEXT:  .LBB14_47:
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    bnez t5, .LBB14_64
+; RV32I-NEXT:    or s6, a3, t3
 ; RV32I-NEXT:  .LBB14_48:
-; RV32I-NEXT:    beq t1, s9, .LBB14_65
-; RV32I-NEXT:  .LBB14_49:
-; RV32I-NEXT:    mv t6, s1
-; RV32I-NEXT:    bne t1, s2, .LBB14_66
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    li s9, 4
+; RV32I-NEXT:    bnez t5, .LBB14_62
+; RV32I-NEXT:  # %bb.49:
+; RV32I-NEXT:    beq t1, s9, .LBB14_63
 ; RV32I-NEXT:  .LBB14_50:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s3, .LBB14_67
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB14_64
 ; RV32I-NEXT:  .LBB14_51:
-; RV32I-NEXT:    beqz a7, .LBB14_53
+; RV32I-NEXT:    beq t1, s1, .LBB14_65
 ; RV32I-NEXT:  .LBB14_52:
-; RV32I-NEXT:    mv a1, a4
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    bne t1, s2, .LBB14_66
 ; RV32I-NEXT:  .LBB14_53:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    li t6, 2
-; RV32I-NEXT:    beqz t5, .LBB14_55
-; RV32I-NEXT:  # %bb.54:
-; RV32I-NEXT:    mv a4, t3
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s3, .LBB14_67
+; RV32I-NEXT:  .LBB14_54:
+; RV32I-NEXT:    bnez a7, .LBB14_68
 ; RV32I-NEXT:  .LBB14_55:
-; RV32I-NEXT:    beqz t1, .LBB14_68
-; RV32I-NEXT:  # %bb.56:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    j .LBB14_69
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB14_69
+; RV32I-NEXT:  .LBB14_56:
+; RV32I-NEXT:    beqz t1, .LBB14_70
 ; RV32I-NEXT:  .LBB14_57:
-; RV32I-NEXT:    mv t6, t3
-; RV32I-NEXT:    bne t1, s0, .LBB14_40
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    j .LBB14_71
 ; RV32I-NEXT:  .LBB14_58:
-; RV32I-NEXT:    or a4, t4, t6
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    beqz t5, .LBB14_41
+; RV32I-NEXT:    or s6, t6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB14_44
 ; RV32I-NEXT:  .LBB14_59:
-; RV32I-NEXT:    mv t6, s11
-; RV32I-NEXT:    bne t1, s4, .LBB14_42
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    bne t1, s11, .LBB14_45
 ; RV32I-NEXT:  .LBB14_60:
-; RV32I-NEXT:    or a4, ra, t6
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    beqz t5, .LBB14_43
+; RV32I-NEXT:    srl s6, a5, a7
+; RV32I-NEXT:    or s6, s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB14_46
 ; RV32I-NEXT:  .LBB14_61:
-; RV32I-NEXT:    lw t6, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    li s4, 3
-; RV32I-NEXT:    beq t1, s4, .LBB14_44
-; RV32I-NEXT:    j .LBB14_45
+; RV32I-NEXT:    mv t3, s0
+; RV32I-NEXT:    beq t1, s9, .LBB14_47
+; RV32I-NEXT:    j .LBB14_48
 ; RV32I-NEXT:  .LBB14_62:
-; RV32I-NEXT:    mv t6, s7
-; RV32I-NEXT:    bne t1, s4, .LBB14_47
+; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:    bne t1, s9, .LBB14_50
 ; RV32I-NEXT:  .LBB14_63:
-; RV32I-NEXT:    or a4, s8, t6
-; RV32I-NEXT:    li t6, 0
-; RV32I-NEXT:    beqz t5, .LBB14_48
+; RV32I-NEXT:    or s6, a1, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB14_51
 ; RV32I-NEXT:  .LBB14_64:
-; RV32I-NEXT:    mv t6, s5
-; RV32I-NEXT:    bne t1, s9, .LBB14_49
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    bne t1, s1, .LBB14_52
 ; RV32I-NEXT:  .LBB14_65:
-; RV32I-NEXT:    or a4, s6, t6
-; RV32I-NEXT:    mv t6, s1
-; RV32I-NEXT:    beq t1, s2, .LBB14_50
+; RV32I-NEXT:    or s6, ra, t3
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, s2, .LBB14_53
 ; RV32I-NEXT:  .LBB14_66:
-; RV32I-NEXT:    mv t6, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s3, .LBB14_51
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s3, .LBB14_54
 ; RV32I-NEXT:  .LBB14_67:
-; RV32I-NEXT:    mv a4, t6
-; RV32I-NEXT:    bnez a7, .LBB14_52
-; RV32I-NEXT:    j .LBB14_53
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    beqz a7, .LBB14_55
 ; RV32I-NEXT:  .LBB14_68:
-; RV32I-NEXT:    or a4, t4, a4
-; RV32I-NEXT:  .LBB14_69:
-; RV32I-NEXT:    li t4, 3
+; RV32I-NEXT:    sw s6, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB14_84
-; RV32I-NEXT:  # %bb.70:
-; RV32I-NEXT:    beq t1, s0, .LBB14_85
+; RV32I-NEXT:    beqz t5, .LBB14_56
+; RV32I-NEXT:  .LBB14_69:
+; RV32I-NEXT:    mv t3, s8
+; RV32I-NEXT:    bnez t1, .LBB14_57
+; RV32I-NEXT:  .LBB14_70:
+; RV32I-NEXT:    or s6, t6, t3
 ; RV32I-NEXT:  .LBB14_71:
+; RV32I-NEXT:    li t6, 3
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bnez t5, .LBB14_86
-; RV32I-NEXT:  .LBB14_72:
-; RV32I-NEXT:    beq t1, t6, .LBB14_87
+; RV32I-NEXT:  # %bb.72:
+; RV32I-NEXT:    beq t1, s10, .LBB14_87
 ; RV32I-NEXT:  .LBB14_73:
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bnez t5, .LBB14_88
 ; RV32I-NEXT:  .LBB14_74:
-; RV32I-NEXT:    beq t1, t4, .LBB14_89
+; RV32I-NEXT:    beq t1, s11, .LBB14_89
 ; RV32I-NEXT:  .LBB14_75:
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bnez t5, .LBB14_90
 ; RV32I-NEXT:  .LBB14_76:
-; RV32I-NEXT:    beq t1, s4, .LBB14_91
+; RV32I-NEXT:    beq t1, t6, .LBB14_91
 ; RV32I-NEXT:  .LBB14_77:
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    bne t1, s9, .LBB14_92
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB14_92
 ; RV32I-NEXT:  .LBB14_78:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s2, .LBB14_93
+; RV32I-NEXT:    beq t1, s9, .LBB14_93
 ; RV32I-NEXT:  .LBB14_79:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s3, .LBB14_94
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    bne t1, s1, .LBB14_94
 ; RV32I-NEXT:  .LBB14_80:
-; RV32I-NEXT:    bnez a7, .LBB14_95
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s2, .LBB14_95
 ; RV32I-NEXT:  .LBB14_81:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bnez t5, .LBB14_96
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bne t1, s3, .LBB14_96
 ; RV32I-NEXT:  .LBB14_82:
-; RV32I-NEXT:    beqz t1, .LBB14_97
+; RV32I-NEXT:    bnez a7, .LBB14_97
 ; RV32I-NEXT:  .LBB14_83:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    j .LBB14_98
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB14_98
 ; RV32I-NEXT:  .LBB14_84:
-; RV32I-NEXT:    mv t3, s11
-; RV32I-NEXT:    bne t1, s0, .LBB14_71
+; RV32I-NEXT:    beqz t1, .LBB14_99
 ; RV32I-NEXT:  .LBB14_85:
-; RV32I-NEXT:    or a4, ra, t3
+; RV32I-NEXT:    li s6, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beqz t5, .LBB14_72
+; RV32I-NEXT:    bnez t5, .LBB14_100
+; RV32I-NEXT:    j .LBB14_101
 ; RV32I-NEXT:  .LBB14_86:
-; RV32I-NEXT:    lw t3, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne t1, t6, .LBB14_73
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    bne t1, s10, .LBB14_73
 ; RV32I-NEXT:  .LBB14_87:
-; RV32I-NEXT:    or a4, s10, t3
+; RV32I-NEXT:    srl s6, a5, a7
+; RV32I-NEXT:    or s6, s6, t3
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t5, .LBB14_74
 ; RV32I-NEXT:  .LBB14_88:
-; RV32I-NEXT:    mv t3, s7
-; RV32I-NEXT:    bne t1, t4, .LBB14_75
+; RV32I-NEXT:    mv t3, s0
+; RV32I-NEXT:    bne t1, s11, .LBB14_75
 ; RV32I-NEXT:  .LBB14_89:
-; RV32I-NEXT:    or a4, s8, t3
+; RV32I-NEXT:    or s6, a3, t3
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t5, .LBB14_76
 ; RV32I-NEXT:  .LBB14_90:
-; RV32I-NEXT:    mv t3, s5
-; RV32I-NEXT:    bne t1, s4, .LBB14_77
+; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:    bne t1, t6, .LBB14_77
 ; RV32I-NEXT:  .LBB14_91:
-; RV32I-NEXT:    or a4, s6, t3
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    beq t1, s9, .LBB14_78
+; RV32I-NEXT:    or s6, a1, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB14_78
 ; RV32I-NEXT:  .LBB14_92:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s2, .LBB14_79
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    bne t1, s9, .LBB14_79
 ; RV32I-NEXT:  .LBB14_93:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s3, .LBB14_80
+; RV32I-NEXT:    or s6, ra, t3
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, s1, .LBB14_80
 ; RV32I-NEXT:  .LBB14_94:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    beqz a7, .LBB14_81
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s2, .LBB14_81
 ; RV32I-NEXT:  .LBB14_95:
-; RV32I-NEXT:    mv a3, t3
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beqz t5, .LBB14_82
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s3, .LBB14_82
 ; RV32I-NEXT:  .LBB14_96:
-; RV32I-NEXT:    mv a4, s11
-; RV32I-NEXT:    bnez t1, .LBB14_83
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    beqz a7, .LBB14_83
 ; RV32I-NEXT:  .LBB14_97:
-; RV32I-NEXT:    or a4, ra, a4
+; RV32I-NEXT:    sw t3, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB14_84
 ; RV32I-NEXT:  .LBB14_98:
-; RV32I-NEXT:    lw ra, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    bnez t1, .LBB14_85
+; RV32I-NEXT:  .LBB14_99:
+; RV32I-NEXT:    srl t4, a5, a7
+; RV32I-NEXT:    or s6, t4, t3
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB14_112
-; RV32I-NEXT:  # %bb.99:
-; RV32I-NEXT:    beq t1, s0, .LBB14_113
+; RV32I-NEXT:    beqz t5, .LBB14_101
 ; RV32I-NEXT:  .LBB14_100:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB14_114
+; RV32I-NEXT:    mv t3, s0
 ; RV32I-NEXT:  .LBB14_101:
-; RV32I-NEXT:    beq t1, t6, .LBB14_115
-; RV32I-NEXT:  .LBB14_102:
+; RV32I-NEXT:    beq t1, s10, .LBB14_114
+; RV32I-NEXT:  # %bb.102:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB14_116
+; RV32I-NEXT:    bnez t5, .LBB14_115
 ; RV32I-NEXT:  .LBB14_103:
-; RV32I-NEXT:    beq t1, t4, .LBB14_117
+; RV32I-NEXT:    beq t1, s11, .LBB14_116
 ; RV32I-NEXT:  .LBB14_104:
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    bne t1, s4, .LBB14_118
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB14_117
 ; RV32I-NEXT:  .LBB14_105:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s9, .LBB14_119
+; RV32I-NEXT:    beq t1, t6, .LBB14_118
 ; RV32I-NEXT:  .LBB14_106:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s2, .LBB14_120
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    bne t1, s9, .LBB14_119
 ; RV32I-NEXT:  .LBB14_107:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s3, .LBB14_121
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s1, .LBB14_120
 ; RV32I-NEXT:  .LBB14_108:
-; RV32I-NEXT:    bnez a7, .LBB14_122
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bne t1, s2, .LBB14_121
 ; RV32I-NEXT:  .LBB14_109:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bnez t5, .LBB14_123
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s3, .LBB14_122
 ; RV32I-NEXT:  .LBB14_110:
-; RV32I-NEXT:    beqz t1, .LBB14_124
+; RV32I-NEXT:    bnez a7, .LBB14_123
 ; RV32I-NEXT:  .LBB14_111:
-; RV32I-NEXT:    li a4, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB14_125
-; RV32I-NEXT:    j .LBB14_126
+; RV32I-NEXT:    bnez t5, .LBB14_124
 ; RV32I-NEXT:  .LBB14_112:
-; RV32I-NEXT:    lw t3, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne t1, s0, .LBB14_100
+; RV32I-NEXT:    beqz t1, .LBB14_125
 ; RV32I-NEXT:  .LBB14_113:
-; RV32I-NEXT:    or a4, s10, t3
+; RV32I-NEXT:    li s6, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beqz t5, .LBB14_101
+; RV32I-NEXT:    bnez t5, .LBB14_126
+; RV32I-NEXT:    j .LBB14_127
 ; RV32I-NEXT:  .LBB14_114:
-; RV32I-NEXT:    mv t3, s7
-; RV32I-NEXT:    bne t1, t6, .LBB14_102
-; RV32I-NEXT:  .LBB14_115:
-; RV32I-NEXT:    or a4, s8, t3
+; RV32I-NEXT:    or s6, a3, t3
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t5, .LBB14_103
+; RV32I-NEXT:  .LBB14_115:
+; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:    bne t1, s11, .LBB14_104
 ; RV32I-NEXT:  .LBB14_116:
-; RV32I-NEXT:    mv t3, s5
-; RV32I-NEXT:    bne t1, t4, .LBB14_104
+; RV32I-NEXT:    or s6, a1, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB14_105
 ; RV32I-NEXT:  .LBB14_117:
-; RV32I-NEXT:    or a4, s6, t3
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    beq t1, s4, .LBB14_105
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    bne t1, t6, .LBB14_106
 ; RV32I-NEXT:  .LBB14_118:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s9, .LBB14_106
+; RV32I-NEXT:    or s6, ra, t3
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, s9, .LBB14_107
 ; RV32I-NEXT:  .LBB14_119:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s2, .LBB14_107
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s1, .LBB14_108
 ; RV32I-NEXT:  .LBB14_120:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s3, .LBB14_108
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s2, .LBB14_109
 ; RV32I-NEXT:  .LBB14_121:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    beqz a7, .LBB14_109
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s3, .LBB14_110
 ; RV32I-NEXT:  .LBB14_122:
-; RV32I-NEXT:    mv a5, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beqz t5, .LBB14_110
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    beqz a7, .LBB14_111
 ; RV32I-NEXT:  .LBB14_123:
-; RV32I-NEXT:    lw a4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bnez t1, .LBB14_111
-; RV32I-NEXT:  .LBB14_124:
-; RV32I-NEXT:    or a4, s10, a4
+; RV32I-NEXT:    mv a5, s6
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beqz t5, .LBB14_126
+; RV32I-NEXT:    beqz t5, .LBB14_112
+; RV32I-NEXT:  .LBB14_124:
+; RV32I-NEXT:    mv t3, s0
+; RV32I-NEXT:    bnez t1, .LBB14_113
 ; RV32I-NEXT:  .LBB14_125:
-; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:    or s6, a3, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB14_127
 ; RV32I-NEXT:  .LBB14_126:
-; RV32I-NEXT:    beq t1, s0, .LBB14_138
-; RV32I-NEXT:  # %bb.127:
+; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:  .LBB14_127:
+; RV32I-NEXT:    beq t1, s10, .LBB14_139
+; RV32I-NEXT:  # %bb.128:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB14_139
-; RV32I-NEXT:  .LBB14_128:
-; RV32I-NEXT:    beq t1, t6, .LBB14_140
+; RV32I-NEXT:    bnez t5, .LBB14_140
 ; RV32I-NEXT:  .LBB14_129:
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    bne t1, t4, .LBB14_141
+; RV32I-NEXT:    beq t1, s11, .LBB14_141
 ; RV32I-NEXT:  .LBB14_130:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s4, .LBB14_142
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    bne t1, t6, .LBB14_142
 ; RV32I-NEXT:  .LBB14_131:
-; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    li s6, 0
 ; RV32I-NEXT:    bne t1, s9, .LBB14_143
 ; RV32I-NEXT:  .LBB14_132:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s2, .LBB14_144
-; RV32I-NEXT:  .LBB14_133:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s3, .LBB14_145
+; RV32I-NEXT:    bne t1, s1, .LBB14_144
+; RV32I-NEXT:  .LBB14_133:
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s2, .LBB14_145
 ; RV32I-NEXT:  .LBB14_134:
-; RV32I-NEXT:    bnez a7, .LBB14_146
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bne t1, s3, .LBB14_146
 ; RV32I-NEXT:  .LBB14_135:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bnez t5, .LBB14_147
+; RV32I-NEXT:    bnez a7, .LBB14_147
 ; RV32I-NEXT:  .LBB14_136:
-; RV32I-NEXT:    beqz t1, .LBB14_148
-; RV32I-NEXT:  .LBB14_137:
-; RV32I-NEXT:    li a4, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bnez t5, .LBB14_149
-; RV32I-NEXT:    j .LBB14_150
+; RV32I-NEXT:    bnez t5, .LBB14_148
+; RV32I-NEXT:  .LBB14_137:
+; RV32I-NEXT:    beqz t1, .LBB14_149
 ; RV32I-NEXT:  .LBB14_138:
-; RV32I-NEXT:    or a4, s8, t3
+; RV32I-NEXT:    li s6, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beqz t5, .LBB14_128
+; RV32I-NEXT:    bnez t5, .LBB14_150
+; RV32I-NEXT:    j .LBB14_151
 ; RV32I-NEXT:  .LBB14_139:
-; RV32I-NEXT:    mv t3, s5
-; RV32I-NEXT:    bne t1, t6, .LBB14_129
+; RV32I-NEXT:    or s6, a1, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB14_129
 ; RV32I-NEXT:  .LBB14_140:
-; RV32I-NEXT:    or a4, s6, t3
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    beq t1, t4, .LBB14_130
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    bne t1, s11, .LBB14_130
 ; RV32I-NEXT:  .LBB14_141:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s4, .LBB14_131
+; RV32I-NEXT:    or s6, ra, t3
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, t6, .LBB14_131
 ; RV32I-NEXT:  .LBB14_142:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
 ; RV32I-NEXT:    beq t1, s9, .LBB14_132
 ; RV32I-NEXT:  .LBB14_143:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s2, .LBB14_133
-; RV32I-NEXT:  .LBB14_144:
-; RV32I-NEXT:    mv a4, t3
+; RV32I-NEXT:    mv s6, t3
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s3, .LBB14_134
+; RV32I-NEXT:    beq t1, s1, .LBB14_133
+; RV32I-NEXT:  .LBB14_144:
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s2, .LBB14_134
 ; RV32I-NEXT:  .LBB14_145:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    beqz a7, .LBB14_135
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s3, .LBB14_135
 ; RV32I-NEXT:  .LBB14_146:
-; RV32I-NEXT:    mv a6, t3
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beqz t5, .LBB14_136
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    beqz a7, .LBB14_136
 ; RV32I-NEXT:  .LBB14_147:
-; RV32I-NEXT:    mv a4, s7
-; RV32I-NEXT:    bnez t1, .LBB14_137
-; RV32I-NEXT:  .LBB14_148:
-; RV32I-NEXT:    or a4, s8, a4
+; RV32I-NEXT:    mv a6, t3
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beqz t5, .LBB14_150
+; RV32I-NEXT:    beqz t5, .LBB14_137
+; RV32I-NEXT:  .LBB14_148:
+; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:    bnez t1, .LBB14_138
 ; RV32I-NEXT:  .LBB14_149:
-; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    or s6, a1, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB14_151
 ; RV32I-NEXT:  .LBB14_150:
-; RV32I-NEXT:    beq t1, s0, .LBB14_161
-; RV32I-NEXT:  # %bb.151:
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    bne t1, t6, .LBB14_162
-; RV32I-NEXT:  .LBB14_152:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, t4, .LBB14_163
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:  .LBB14_151:
+; RV32I-NEXT:    beq t1, s10, .LBB14_162
+; RV32I-NEXT:  # %bb.152:
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    bne t1, s11, .LBB14_163
 ; RV32I-NEXT:  .LBB14_153:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s4, .LBB14_164
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, t6, .LBB14_164
 ; RV32I-NEXT:  .LBB14_154:
-; RV32I-NEXT:    li a4, 0
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bne t1, s9, .LBB14_165
 ; RV32I-NEXT:  .LBB14_155:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s2, .LBB14_166
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s1, .LBB14_166
 ; RV32I-NEXT:  .LBB14_156:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s3, .LBB14_167
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bne t1, s2, .LBB14_167
 ; RV32I-NEXT:  .LBB14_157:
-; RV32I-NEXT:    bnez a7, .LBB14_168
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    bne t1, s3, .LBB14_168
 ; RV32I-NEXT:  .LBB14_158:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bnez t5, .LBB14_169
+; RV32I-NEXT:    bnez a7, .LBB14_169
 ; RV32I-NEXT:  .LBB14_159:
-; RV32I-NEXT:    beqz t1, .LBB14_170
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t5, .LBB14_170
 ; RV32I-NEXT:  .LBB14_160:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    bne t1, s0, .LBB14_171
-; RV32I-NEXT:    j .LBB14_172
+; RV32I-NEXT:    beqz t1, .LBB14_171
 ; RV32I-NEXT:  .LBB14_161:
-; RV32I-NEXT:    or a4, s6, t3
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    beq t1, t6, .LBB14_152
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    j .LBB14_172
 ; RV32I-NEXT:  .LBB14_162:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, t4, .LBB14_153
+; RV32I-NEXT:    or s6, ra, t3
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    beq t1, s11, .LBB14_153
 ; RV32I-NEXT:  .LBB14_163:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s4, .LBB14_154
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, t6, .LBB14_154
 ; RV32I-NEXT:  .LBB14_164:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beq t1, s9, .LBB14_155
 ; RV32I-NEXT:  .LBB14_165:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s2, .LBB14_156
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s1, .LBB14_156
 ; RV32I-NEXT:  .LBB14_166:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s3, .LBB14_157
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s2, .LBB14_157
 ; RV32I-NEXT:  .LBB14_167:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    beqz a7, .LBB14_158
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:    li s6, 0
+; RV32I-NEXT:    beq t1, s3, .LBB14_158
 ; RV32I-NEXT:  .LBB14_168:
-; RV32I-NEXT:    mv t0, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beqz t5, .LBB14_159
+; RV32I-NEXT:    mv s6, t3
+; RV32I-NEXT:    beqz a7, .LBB14_159
 ; RV32I-NEXT:  .LBB14_169:
-; RV32I-NEXT:    mv a4, s5
-; RV32I-NEXT:    bnez t1, .LBB14_160
+; RV32I-NEXT:    mv t0, s6
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t5, .LBB14_160
 ; RV32I-NEXT:  .LBB14_170:
-; RV32I-NEXT:    or a4, s6, a4
-; RV32I-NEXT:    mv t3, s1
-; RV32I-NEXT:    beq t1, s0, .LBB14_172
+; RV32I-NEXT:    mv t3, s5
+; RV32I-NEXT:    bnez t1, .LBB14_161
 ; RV32I-NEXT:  .LBB14_171:
-; RV32I-NEXT:    mv t3, a4
+; RV32I-NEXT:    or t3, ra, t3
 ; RV32I-NEXT:  .LBB14_172:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, t6, .LBB14_190
+; RV32I-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv t5, s4
+; RV32I-NEXT:    bne t1, s10, .LBB14_191
 ; RV32I-NEXT:  # %bb.173:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, t4, .LBB14_191
+; RV32I-NEXT:    bne t1, s11, .LBB14_192
 ; RV32I-NEXT:  .LBB14_174:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s4, .LBB14_192
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    bne t1, t6, .LBB14_193
 ; RV32I-NEXT:  .LBB14_175:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s9, .LBB14_193
+; RV32I-NEXT:    bne t1, s9, .LBB14_194
 ; RV32I-NEXT:  .LBB14_176:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s2, .LBB14_194
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    bne t1, s1, .LBB14_195
 ; RV32I-NEXT:  .LBB14_177:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s3, .LBB14_195
+; RV32I-NEXT:    bne t1, s2, .LBB14_196
 ; RV32I-NEXT:  .LBB14_178:
-; RV32I-NEXT:    bnez a7, .LBB14_196
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    bne t1, s3, .LBB14_197
 ; RV32I-NEXT:  .LBB14_179:
-; RV32I-NEXT:    bnez t1, .LBB14_197
+; RV32I-NEXT:    bnez a7, .LBB14_198
 ; RV32I-NEXT:  .LBB14_180:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s0, .LBB14_198
+; RV32I-NEXT:    bnez t1, .LBB14_199
 ; RV32I-NEXT:  .LBB14_181:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, t6, .LBB14_199
+; RV32I-NEXT:    bne t1, s10, .LBB14_200
 ; RV32I-NEXT:  .LBB14_182:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, t4, .LBB14_200
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    bne t1, s11, .LBB14_201
 ; RV32I-NEXT:  .LBB14_183:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s4, .LBB14_201
+; RV32I-NEXT:    bne t1, t6, .LBB14_202
 ; RV32I-NEXT:  .LBB14_184:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s9, .LBB14_202
+; RV32I-NEXT:    li t4, 0
+; RV32I-NEXT:    bne t1, s9, .LBB14_203
 ; RV32I-NEXT:  .LBB14_185:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    bne t1, s2, .LBB14_203
+; RV32I-NEXT:    bne t1, s1, .LBB14_204
 ; RV32I-NEXT:  .LBB14_186:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bne t1, s3, .LBB14_204
+; RV32I-NEXT:    li t4, 0
+; RV32I-NEXT:    bne t1, s2, .LBB14_205
 ; RV32I-NEXT:  .LBB14_187:
-; RV32I-NEXT:    beqz a7, .LBB14_189
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bne t1, s3, .LBB14_206
 ; RV32I-NEXT:  .LBB14_188:
-; RV32I-NEXT:    mv t2, a4
+; RV32I-NEXT:    beqz a7, .LBB14_190
 ; RV32I-NEXT:  .LBB14_189:
-; RV32I-NEXT:    srli a4, ra, 16
-; RV32I-NEXT:    lui t4, 16
-; RV32I-NEXT:    srli t3, ra, 24
-; RV32I-NEXT:    srli a7, a1, 16
-; RV32I-NEXT:    srli t6, a1, 24
-; RV32I-NEXT:    srli t1, a3, 16
-; RV32I-NEXT:    srli s2, a3, 24
-; RV32I-NEXT:    srli t5, a5, 16
-; RV32I-NEXT:    srli s3, a5, 24
-; RV32I-NEXT:    srli s1, a6, 16
-; RV32I-NEXT:    srli s6, a6, 24
-; RV32I-NEXT:    srli s0, t0, 16
-; RV32I-NEXT:    srli s5, t0, 24
-; RV32I-NEXT:    srli s4, a0, 16
-; RV32I-NEXT:    srli s7, a0, 24
-; RV32I-NEXT:    srli s8, t2, 16
-; RV32I-NEXT:    srli s9, t2, 24
-; RV32I-NEXT:    addi t4, t4, -1
-; RV32I-NEXT:    and s10, ra, t4
-; RV32I-NEXT:    and s11, a1, t4
-; RV32I-NEXT:    srli s10, s10, 8
-; RV32I-NEXT:    sb ra, 0(a2)
-; RV32I-NEXT:    sb s10, 1(a2)
-; RV32I-NEXT:    sb a4, 2(a2)
-; RV32I-NEXT:    sb t3, 3(a2)
-; RV32I-NEXT:    and a4, a3, t4
-; RV32I-NEXT:    srli t3, s11, 8
+; RV32I-NEXT:    mv t2, t3
+; RV32I-NEXT:  .LBB14_190:
+; RV32I-NEXT:    srli a7, a4, 16
+; RV32I-NEXT:    lui t5, 16
+; RV32I-NEXT:    srli t4, a4, 24
+; RV32I-NEXT:    srli t1, a1, 16
+; RV32I-NEXT:    srli s0, a1, 24
+; RV32I-NEXT:    lw a3, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    srli t3, a3, 16
+; RV32I-NEXT:    srli s3, a3, 24
+; RV32I-NEXT:    srli t6, a5, 16
+; RV32I-NEXT:    srli s4, a5, 24
+; RV32I-NEXT:    srli s2, a6, 16
+; RV32I-NEXT:    srli s7, a6, 24
+; RV32I-NEXT:    srli s1, t0, 16
+; RV32I-NEXT:    srli s6, t0, 24
+; RV32I-NEXT:    srli s5, a0, 16
+; RV32I-NEXT:    srli s8, a0, 24
+; RV32I-NEXT:    srli s9, t2, 16
+; RV32I-NEXT:    srli s10, t2, 24
+; RV32I-NEXT:    addi t5, t5, -1
+; RV32I-NEXT:    and s11, a4, t5
+; RV32I-NEXT:    and ra, a1, t5
+; RV32I-NEXT:    srli s11, s11, 8
+; RV32I-NEXT:    sb a4, 0(a2)
+; RV32I-NEXT:    sb s11, 1(a2)
+; RV32I-NEXT:    sb a7, 2(a2)
+; RV32I-NEXT:    sb t4, 3(a2)
+; RV32I-NEXT:    and a4, a3, t5
+; RV32I-NEXT:    srli a7, ra, 8
 ; RV32I-NEXT:    sb a1, 4(a2)
-; RV32I-NEXT:    sb t3, 5(a2)
-; RV32I-NEXT:    sb a7, 6(a2)
-; RV32I-NEXT:    sb t6, 7(a2)
-; RV32I-NEXT:    and a1, a5, t4
+; RV32I-NEXT:    sb a7, 5(a2)
+; RV32I-NEXT:    sb t1, 6(a2)
+; RV32I-NEXT:    sb s0, 7(a2)
+; RV32I-NEXT:    and a1, a5, t5
 ; RV32I-NEXT:    srli a4, a4, 8
 ; RV32I-NEXT:    sb a3, 8(a2)
 ; RV32I-NEXT:    sb a4, 9(a2)
-; RV32I-NEXT:    sb t1, 10(a2)
-; RV32I-NEXT:    sb s2, 11(a2)
-; RV32I-NEXT:    and a3, a6, t4
+; RV32I-NEXT:    sb t3, 10(a2)
+; RV32I-NEXT:    sb s3, 11(a2)
+; RV32I-NEXT:    and a3, a6, t5
 ; RV32I-NEXT:    srli a1, a1, 8
 ; RV32I-NEXT:    sb a5, 12(a2)
 ; RV32I-NEXT:    sb a1, 13(a2)
-; RV32I-NEXT:    sb t5, 14(a2)
-; RV32I-NEXT:    sb s3, 15(a2)
-; RV32I-NEXT:    and a1, t0, t4
+; RV32I-NEXT:    sb t6, 14(a2)
+; RV32I-NEXT:    sb s4, 15(a2)
+; RV32I-NEXT:    and a1, t0, t5
 ; RV32I-NEXT:    srli a3, a3, 8
 ; RV32I-NEXT:    sb a6, 16(a2)
 ; RV32I-NEXT:    sb a3, 17(a2)
-; RV32I-NEXT:    sb s1, 18(a2)
-; RV32I-NEXT:    sb s6, 19(a2)
-; RV32I-NEXT:    and a3, a0, t4
-; RV32I-NEXT:    and a4, t2, t4
+; RV32I-NEXT:    sb s2, 18(a2)
+; RV32I-NEXT:    sb s7, 19(a2)
+; RV32I-NEXT:    and a3, a0, t5
+; RV32I-NEXT:    and a4, t2, t5
 ; RV32I-NEXT:    srli a1, a1, 8
 ; RV32I-NEXT:    srli a3, a3, 8
 ; RV32I-NEXT:    srli a4, a4, 8
 ; RV32I-NEXT:    sb t0, 20(a2)
 ; RV32I-NEXT:    sb a1, 21(a2)
-; RV32I-NEXT:    sb s0, 22(a2)
-; RV32I-NEXT:    sb s5, 23(a2)
+; RV32I-NEXT:    sb s1, 22(a2)
+; RV32I-NEXT:    sb s6, 23(a2)
 ; RV32I-NEXT:    sb a0, 24(a2)
 ; RV32I-NEXT:    sb a3, 25(a2)
-; RV32I-NEXT:    sb s4, 26(a2)
-; RV32I-NEXT:    sb s7, 27(a2)
+; RV32I-NEXT:    sb s5, 26(a2)
+; RV32I-NEXT:    sb s8, 27(a2)
 ; RV32I-NEXT:    sb t2, 28(a2)
 ; RV32I-NEXT:    sb a4, 29(a2)
-; RV32I-NEXT:    sb s8, 30(a2)
-; RV32I-NEXT:    sb s9, 31(a2)
+; RV32I-NEXT:    sb s9, 30(a2)
+; RV32I-NEXT:    sb s10, 31(a2)
 ; RV32I-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 52(sp) # 4-byte Folded Reload
@@ -6058,64 +6108,68 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
 ; RV32I-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 64
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB14_190:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, t4, .LBB14_174
 ; RV32I-NEXT:  .LBB14_191:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s4, .LBB14_175
+; RV32I-NEXT:    mv t5, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s11, .LBB14_174
 ; RV32I-NEXT:  .LBB14_192:
-; RV32I-NEXT:    mv a4, t3
+; RV32I-NEXT:    mv t3, t5
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    beq t1, t6, .LBB14_175
+; RV32I-NEXT:  .LBB14_193:
+; RV32I-NEXT:    mv t5, t3
 ; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beq t1, s9, .LBB14_176
-; RV32I-NEXT:  .LBB14_193:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s2, .LBB14_177
 ; RV32I-NEXT:  .LBB14_194:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s3, .LBB14_178
+; RV32I-NEXT:    mv t3, t5
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    beq t1, s1, .LBB14_177
 ; RV32I-NEXT:  .LBB14_195:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    beqz a7, .LBB14_179
+; RV32I-NEXT:    mv t5, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s2, .LBB14_178
 ; RV32I-NEXT:  .LBB14_196:
-; RV32I-NEXT:    mv a0, t3
-; RV32I-NEXT:    beqz t1, .LBB14_180
+; RV32I-NEXT:    mv t3, t5
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    beq t1, s3, .LBB14_179
 ; RV32I-NEXT:  .LBB14_197:
-; RV32I-NEXT:    li s1, 0
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s0, .LBB14_181
+; RV32I-NEXT:    mv t5, t3
+; RV32I-NEXT:    beqz a7, .LBB14_180
 ; RV32I-NEXT:  .LBB14_198:
-; RV32I-NEXT:    mv a4, s1
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, t6, .LBB14_182
+; RV32I-NEXT:    mv a0, t5
+; RV32I-NEXT:    beqz t1, .LBB14_181
 ; RV32I-NEXT:  .LBB14_199:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, t4, .LBB14_183
-; RV32I-NEXT:  .LBB14_200:
-; RV32I-NEXT:    mv a4, t3
+; RV32I-NEXT:    li s4, 0
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s4, .LBB14_184
+; RV32I-NEXT:    beq t1, s10, .LBB14_182
+; RV32I-NEXT:  .LBB14_200:
+; RV32I-NEXT:    mv t3, s4
+; RV32I-NEXT:    li t5, 0
+; RV32I-NEXT:    beq t1, s11, .LBB14_183
 ; RV32I-NEXT:  .LBB14_201:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s9, .LBB14_185
-; RV32I-NEXT:  .LBB14_202:
-; RV32I-NEXT:    mv a4, t3
+; RV32I-NEXT:    mv t5, t3
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    beq t1, s2, .LBB14_186
+; RV32I-NEXT:    beq t1, t6, .LBB14_184
+; RV32I-NEXT:  .LBB14_202:
+; RV32I-NEXT:    mv t3, t5
+; RV32I-NEXT:    li t4, 0
+; RV32I-NEXT:    beq t1, s9, .LBB14_185
 ; RV32I-NEXT:  .LBB14_203:
-; RV32I-NEXT:    mv t3, a4
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    beq t1, s3, .LBB14_187
+; RV32I-NEXT:    mv t4, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s1, .LBB14_186
 ; RV32I-NEXT:  .LBB14_204:
-; RV32I-NEXT:    mv a4, t3
-; RV32I-NEXT:    bnez a7, .LBB14_188
-; RV32I-NEXT:    j .LBB14_189
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    li t4, 0
+; RV32I-NEXT:    beq t1, s2, .LBB14_187
+; RV32I-NEXT:  .LBB14_205:
+; RV32I-NEXT:    mv t4, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beq t1, s3, .LBB14_188
+; RV32I-NEXT:  .LBB14_206:
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    bnez a7, .LBB14_189
+; RV32I-NEXT:    j .LBB14_190
   %src = load i256, ptr %src.ptr, align 1
   %dwordOff = load i256, ptr %dwordOff.ptr, align 1
   %bitOff = shl i256 %dwordOff, 6
@@ -6431,20 +6485,20 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ;
 ; RV32I-LABEL: shl_32bytes:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    addi sp, sp, -80
-; RV32I-NEXT:    sw ra, 76(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s0, 72(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s1, 68(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s2, 64(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s3, 60(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s4, 56(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s5, 52(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s6, 48(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s7, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s8, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s9, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s10, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s11, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    addi sp, sp, -64
+; RV32I-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 52(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 48(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s4, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s6, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s7, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s8, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s9, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s10, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s11, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lbu a3, 1(a0)
 ; RV32I-NEXT:    lbu a4, 0(a0)
 ; RV32I-NEXT:    lbu a5, 2(a0)
@@ -6463,657 +6517,663 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    or a1, a1, t0
 ; RV32I-NEXT:    slli a5, a5, 16
 ; RV32I-NEXT:    slli a1, a1, 16
-; RV32I-NEXT:    or a6, a5, a3
+; RV32I-NEXT:    or a3, a5, a3
 ; RV32I-NEXT:    or a1, a1, a4
 ; RV32I-NEXT:    slli a1, a1, 3
 ; RV32I-NEXT:    srli a5, a1, 5
-; RV32I-NEXT:    sll t5, a6, a1
-; RV32I-NEXT:    li s7, 1
-; RV32I-NEXT:    mv a3, t5
+; RV32I-NEXT:    sll t4, a3, a1
+; RV32I-NEXT:    li s6, 1
+; RV32I-NEXT:    mv a6, t4
 ; RV32I-NEXT:    beqz a5, .LBB15_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:  .LBB15_2:
 ; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    li s8, 2
-; RV32I-NEXT:    beq a5, s7, .LBB15_4
+; RV32I-NEXT:    li s9, 2
+; RV32I-NEXT:    beq a5, s6, .LBB15_4
 ; RV32I-NEXT:  # %bb.3:
-; RV32I-NEXT:    mv a4, a3
+; RV32I-NEXT:    mv a4, a6
 ; RV32I-NEXT:  .LBB15_4:
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    li s0, 3
-; RV32I-NEXT:    beq a5, s8, .LBB15_6
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    li s3, 3
+; RV32I-NEXT:    beq a5, s9, .LBB15_6
 ; RV32I-NEXT:  # %bb.5:
-; RV32I-NEXT:    mv a7, a4
+; RV32I-NEXT:    mv a6, a4
 ; RV32I-NEXT:  .LBB15_6:
 ; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    li s4, 4
-; RV32I-NEXT:    beq a5, s0, .LBB15_8
+; RV32I-NEXT:    li s5, 4
+; RV32I-NEXT:    beq a5, s3, .LBB15_8
 ; RV32I-NEXT:  # %bb.7:
-; RV32I-NEXT:    mv a4, a7
+; RV32I-NEXT:    mv a4, a6
 ; RV32I-NEXT:  .LBB15_8:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    li s5, 5
-; RV32I-NEXT:    beq a5, s4, .LBB15_10
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    li s1, 5
+; RV32I-NEXT:    beq a5, s5, .LBB15_10
 ; RV32I-NEXT:  # %bb.9:
-; RV32I-NEXT:    mv t0, a4
+; RV32I-NEXT:    mv a7, a4
 ; RV32I-NEXT:  .LBB15_10:
-; RV32I-NEXT:    lbu t2, 7(a0)
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    beq a5, s5, .LBB15_12
+; RV32I-NEXT:    lbu t1, 7(a0)
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    beq a5, s1, .LBB15_12
 ; RV32I-NEXT:  # %bb.11:
-; RV32I-NEXT:    mv a7, t0
+; RV32I-NEXT:    mv a6, a7
 ; RV32I-NEXT:  .LBB15_12:
 ; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    lbu t0, 5(a0)
-; RV32I-NEXT:    lbu t1, 6(a0)
-; RV32I-NEXT:    li s1, 6
-; RV32I-NEXT:    slli t2, t2, 8
-; RV32I-NEXT:    beq a5, s1, .LBB15_14
+; RV32I-NEXT:    lbu a7, 5(a0)
+; RV32I-NEXT:    lbu t0, 6(a0)
+; RV32I-NEXT:    li s0, 6
+; RV32I-NEXT:    slli t2, t1, 8
+; RV32I-NEXT:    beq a5, s0, .LBB15_14
 ; RV32I-NEXT:  # %bb.13:
-; RV32I-NEXT:    mv a4, a7
+; RV32I-NEXT:    mv a4, a6
 ; RV32I-NEXT:  .LBB15_14:
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    lbu a3, 4(a0)
-; RV32I-NEXT:    or t1, t2, t1
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    lbu t1, 4(a0)
+; RV32I-NEXT:    or t2, t2, t0
 ; RV32I-NEXT:    li ra, 7
-; RV32I-NEXT:    slli t0, t0, 8
+; RV32I-NEXT:    slli a7, a7, 8
 ; RV32I-NEXT:    beq a5, ra, .LBB15_16
 ; RV32I-NEXT:  # %bb.15:
-; RV32I-NEXT:    mv a7, a4
+; RV32I-NEXT:    mv a6, a4
 ; RV32I-NEXT:  .LBB15_16:
-; RV32I-NEXT:    or a3, t0, a3
-; RV32I-NEXT:    slli t1, t1, 16
+; RV32I-NEXT:    or t0, a7, t1
+; RV32I-NEXT:    slli t2, t2, 16
 ; RV32I-NEXT:    andi t6, a1, 31
-; RV32I-NEXT:    mv a4, a6
+; RV32I-NEXT:    mv a4, a3
 ; RV32I-NEXT:    beqz a1, .LBB15_18
 ; RV32I-NEXT:  # %bb.17:
-; RV32I-NEXT:    mv a4, a7
+; RV32I-NEXT:    mv a4, a6
 ; RV32I-NEXT:  .LBB15_18:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    or a7, t1, a3
-; RV32I-NEXT:    neg s3, t6
-; RV32I-NEXT:    srl s11, a6, s3
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    or a6, t2, t0
+; RV32I-NEXT:    neg s10, t6
+; RV32I-NEXT:    srl a3, a3, s10
+; RV32I-NEXT:    sw a3, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    beqz t6, .LBB15_20
 ; RV32I-NEXT:  # %bb.19:
-; RV32I-NEXT:    mv t0, s11
+; RV32I-NEXT:    lw a7, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:  .LBB15_20:
-; RV32I-NEXT:    sll s10, a7, a1
+; RV32I-NEXT:    sll s4, a6, a1
 ; RV32I-NEXT:    beqz a5, .LBB15_22
 ; RV32I-NEXT:  # %bb.21:
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    mv a6, t5
-; RV32I-NEXT:    bne a5, s7, .LBB15_23
+; RV32I-NEXT:    mv a7, t4
+; RV32I-NEXT:    bne a5, s6, .LBB15_23
 ; RV32I-NEXT:    j .LBB15_24
 ; RV32I-NEXT:  .LBB15_22:
-; RV32I-NEXT:    or a3, s10, t0
-; RV32I-NEXT:    mv a6, t5
-; RV32I-NEXT:    beq a5, s7, .LBB15_24
+; RV32I-NEXT:    or a3, s4, a7
+; RV32I-NEXT:    mv a7, t4
+; RV32I-NEXT:    beq a5, s6, .LBB15_24
 ; RV32I-NEXT:  .LBB15_23:
-; RV32I-NEXT:    mv a6, a3
+; RV32I-NEXT:    mv a7, a3
 ; RV32I-NEXT:  .LBB15_24:
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bne a5, s8, .LBB15_40
+; RV32I-NEXT:    bne a5, s9, .LBB15_40
 ; RV32I-NEXT:  # %bb.25:
-; RV32I-NEXT:    li a6, 0
-; RV32I-NEXT:    bne a5, s0, .LBB15_41
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    bne a5, s3, .LBB15_41
 ; RV32I-NEXT:  .LBB15_26:
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beq a5, s4, .LBB15_28
+; RV32I-NEXT:    beq a5, s5, .LBB15_28
 ; RV32I-NEXT:  .LBB15_27:
-; RV32I-NEXT:    mv a3, a6
+; RV32I-NEXT:    mv a3, a7
 ; RV32I-NEXT:  .LBB15_28:
 ; RV32I-NEXT:    lbu t2, 11(a0)
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    beq a5, s5, .LBB15_30
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    beq a5, s1, .LBB15_30
 ; RV32I-NEXT:  # %bb.29:
-; RV32I-NEXT:    mv t0, a3
+; RV32I-NEXT:    mv a7, a3
 ; RV32I-NEXT:  .LBB15_30:
-; RV32I-NEXT:    li a6, 0
-; RV32I-NEXT:    lbu t1, 9(a0)
-; RV32I-NEXT:    lbu a3, 10(a0)
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lbu t0, 9(a0)
+; RV32I-NEXT:    lbu t1, 10(a0)
 ; RV32I-NEXT:    slli t2, t2, 8
-; RV32I-NEXT:    beq a5, s1, .LBB15_32
+; RV32I-NEXT:    beq a5, s0, .LBB15_32
 ; RV32I-NEXT:  # %bb.31:
-; RV32I-NEXT:    mv a6, t0
+; RV32I-NEXT:    mv a3, a7
 ; RV32I-NEXT:  .LBB15_32:
-; RV32I-NEXT:    li t0, 0
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    lbu t3, 8(a0)
-; RV32I-NEXT:    or t2, t2, a3
-; RV32I-NEXT:    slli t1, t1, 8
+; RV32I-NEXT:    or t1, t2, t1
+; RV32I-NEXT:    slli t0, t0, 8
 ; RV32I-NEXT:    beq a5, ra, .LBB15_34
 ; RV32I-NEXT:  # %bb.33:
-; RV32I-NEXT:    mv t0, a6
+; RV32I-NEXT:    mv a7, a3
 ; RV32I-NEXT:  .LBB15_34:
-; RV32I-NEXT:    or a3, t1, t3
-; RV32I-NEXT:    slli a6, t2, 16
-; RV32I-NEXT:    mv t2, a7
+; RV32I-NEXT:    or a3, t0, t3
+; RV32I-NEXT:    slli t1, t1, 16
+; RV32I-NEXT:    mv t2, a6
 ; RV32I-NEXT:    beqz a1, .LBB15_36
 ; RV32I-NEXT:  # %bb.35:
-; RV32I-NEXT:    mv t2, t0
+; RV32I-NEXT:    mv t2, a7
 ; RV32I-NEXT:  .LBB15_36:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    or a6, a6, a3
-; RV32I-NEXT:    srl s2, a7, s3
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    or a3, t1, a3
+; RV32I-NEXT:    srl a6, a6, s10
+; RV32I-NEXT:    sw a6, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    beqz t6, .LBB15_38
 ; RV32I-NEXT:  # %bb.37:
-; RV32I-NEXT:    mv t0, s2
+; RV32I-NEXT:    lw a7, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:  .LBB15_38:
-; RV32I-NEXT:    sll s9, a6, a1
+; RV32I-NEXT:    sll s8, a3, a1
 ; RV32I-NEXT:    beqz a5, .LBB15_42
 ; RV32I-NEXT:  # %bb.39:
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    bnez t6, .LBB15_43
 ; RV32I-NEXT:    j .LBB15_44
 ; RV32I-NEXT:  .LBB15_40:
-; RV32I-NEXT:    mv a3, a6
-; RV32I-NEXT:    li a6, 0
-; RV32I-NEXT:    beq a5, s0, .LBB15_26
+; RV32I-NEXT:    mv a3, a7
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    beq a5, s3, .LBB15_26
 ; RV32I-NEXT:  .LBB15_41:
-; RV32I-NEXT:    mv a6, a3
+; RV32I-NEXT:    mv a7, a3
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bne a5, s4, .LBB15_27
+; RV32I-NEXT:    bne a5, s5, .LBB15_27
 ; RV32I-NEXT:    j .LBB15_28
 ; RV32I-NEXT:  .LBB15_42:
-; RV32I-NEXT:    or a7, s9, t0
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a6, s8, a7
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    beqz t6, .LBB15_44
 ; RV32I-NEXT:  .LBB15_43:
-; RV32I-NEXT:    mv a3, s11
+; RV32I-NEXT:    lw a7, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:  .LBB15_44:
-; RV32I-NEXT:    beq a5, s7, .LBB15_61
+; RV32I-NEXT:    beq a5, s6, .LBB15_61
 ; RV32I-NEXT:  # %bb.45:
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    bne a5, s8, .LBB15_62
+; RV32I-NEXT:    mv a7, t4
+; RV32I-NEXT:    bne a5, s9, .LBB15_62
 ; RV32I-NEXT:  .LBB15_46:
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    bne a5, s0, .LBB15_63
+; RV32I-NEXT:    li t0, 0
+; RV32I-NEXT:    bne a5, s3, .LBB15_63
 ; RV32I-NEXT:  .LBB15_47:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beq a5, s4, .LBB15_49
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    beq a5, s5, .LBB15_49
 ; RV32I-NEXT:  .LBB15_48:
-; RV32I-NEXT:    mv a3, a7
+; RV32I-NEXT:    mv a6, t0
 ; RV32I-NEXT:  .LBB15_49:
 ; RV32I-NEXT:    lbu t3, 15(a0)
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    beq a5, s5, .LBB15_51
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    beq a5, s1, .LBB15_51
 ; RV32I-NEXT:  # %bb.50:
-; RV32I-NEXT:    mv t0, a3
+; RV32I-NEXT:    mv a7, a6
 ; RV32I-NEXT:  .LBB15_51:
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    lbu t1, 13(a0)
-; RV32I-NEXT:    lbu a3, 14(a0)
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    lbu t0, 13(a0)
+; RV32I-NEXT:    lbu t1, 14(a0)
 ; RV32I-NEXT:    slli t3, t3, 8
-; RV32I-NEXT:    beq a5, s1, .LBB15_53
+; RV32I-NEXT:    beq a5, s0, .LBB15_53
 ; RV32I-NEXT:  # %bb.52:
-; RV32I-NEXT:    mv a7, t0
+; RV32I-NEXT:    mv a6, a7
 ; RV32I-NEXT:  .LBB15_53:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    lbu t4, 12(a0)
-; RV32I-NEXT:    or t3, t3, a3
-; RV32I-NEXT:    slli t1, t1, 8
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    lbu t5, 12(a0)
+; RV32I-NEXT:    or t1, t3, t1
+; RV32I-NEXT:    slli t0, t0, 8
 ; RV32I-NEXT:    beq a5, ra, .LBB15_55
 ; RV32I-NEXT:  # %bb.54:
-; RV32I-NEXT:    mv t0, a7
-; RV32I-NEXT:  .LBB15_55:
-; RV32I-NEXT:    or a3, t1, t4
-; RV32I-NEXT:    slli t3, t3, 16
 ; RV32I-NEXT:    mv a7, a6
+; RV32I-NEXT:  .LBB15_55:
+; RV32I-NEXT:    or t0, t0, t5
+; RV32I-NEXT:    slli t1, t1, 16
+; RV32I-NEXT:    mv s2, a3
 ; RV32I-NEXT:    beqz a1, .LBB15_57
 ; RV32I-NEXT:  # %bb.56:
-; RV32I-NEXT:    mv a7, t0
+; RV32I-NEXT:    mv s2, a7
 ; RV32I-NEXT:  .LBB15_57:
-; RV32I-NEXT:    sw a7, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    or a7, t3, a3
-; RV32I-NEXT:    srl a3, a6, s3
-; RV32I-NEXT:    sw a3, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    or a7, t1, t0
+; RV32I-NEXT:    srl s7, a3, s10
 ; RV32I-NEXT:    beqz t6, .LBB15_59
 ; RV32I-NEXT:  # %bb.58:
-; RV32I-NEXT:    lw t0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv a6, s7
 ; RV32I-NEXT:  .LBB15_59:
 ; RV32I-NEXT:    sll a3, a7, a1
-; RV32I-NEXT:    sw a3, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a3, 0(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    beqz a5, .LBB15_64
 ; RV32I-NEXT:  # %bb.60:
-; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    bnez t6, .LBB15_65
 ; RV32I-NEXT:    j .LBB15_66
 ; RV32I-NEXT:  .LBB15_61:
-; RV32I-NEXT:    or a7, s10, a3
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    beq a5, s8, .LBB15_46
+; RV32I-NEXT:    or a6, s4, a7
+; RV32I-NEXT:    mv a7, t4
+; RV32I-NEXT:    beq a5, s9, .LBB15_46
 ; RV32I-NEXT:  .LBB15_62:
-; RV32I-NEXT:    mv a3, a7
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    beq a5, s0, .LBB15_47
+; RV32I-NEXT:    mv a7, a6
+; RV32I-NEXT:    li t0, 0
+; RV32I-NEXT:    beq a5, s3, .LBB15_47
 ; RV32I-NEXT:  .LBB15_63:
-; RV32I-NEXT:    mv a7, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bne a5, s4, .LBB15_48
+; RV32I-NEXT:    mv t0, a7
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    bne a5, s5, .LBB15_48
 ; RV32I-NEXT:    j .LBB15_49
 ; RV32I-NEXT:  .LBB15_64:
-; RV32I-NEXT:    or a6, a3, t0
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, a3, a6
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    beqz t6, .LBB15_66
 ; RV32I-NEXT:  .LBB15_65:
-; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    lw a6, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:  .LBB15_66:
-; RV32I-NEXT:    beq a5, s7, .LBB15_84
+; RV32I-NEXT:    beq a5, s6, .LBB15_84
 ; RV32I-NEXT:  # %bb.67:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    bnez t6, .LBB15_85
 ; RV32I-NEXT:  .LBB15_68:
-; RV32I-NEXT:    beq a5, s8, .LBB15_86
+; RV32I-NEXT:    beq a5, s9, .LBB15_86
 ; RV32I-NEXT:  .LBB15_69:
-; RV32I-NEXT:    mv t0, t5
-; RV32I-NEXT:    bne a5, s0, .LBB15_87
+; RV32I-NEXT:    mv a6, t4
+; RV32I-NEXT:    bne a5, s3, .LBB15_87
 ; RV32I-NEXT:  .LBB15_70:
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beq a5, s4, .LBB15_72
+; RV32I-NEXT:    beq a5, s5, .LBB15_72
 ; RV32I-NEXT:  .LBB15_71:
-; RV32I-NEXT:    mv a3, t0
+; RV32I-NEXT:    mv a3, a6
 ; RV32I-NEXT:  .LBB15_72:
 ; RV32I-NEXT:    lbu t3, 19(a0)
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    beq a5, s5, .LBB15_74
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    beq a5, s1, .LBB15_74
 ; RV32I-NEXT:  # %bb.73:
-; RV32I-NEXT:    mv t0, a3
+; RV32I-NEXT:    mv a6, a3
 ; RV32I-NEXT:  .LBB15_74:
-; RV32I-NEXT:    li a6, 0
-; RV32I-NEXT:    lbu t1, 17(a0)
-; RV32I-NEXT:    lbu a3, 18(a0)
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lbu t0, 17(a0)
+; RV32I-NEXT:    lbu t1, 18(a0)
 ; RV32I-NEXT:    slli t3, t3, 8
-; RV32I-NEXT:    beq a5, s1, .LBB15_76
+; RV32I-NEXT:    beq a5, s0, .LBB15_76
 ; RV32I-NEXT:  # %bb.75:
-; RV32I-NEXT:    mv a6, t0
+; RV32I-NEXT:    mv a3, a6
 ; RV32I-NEXT:  .LBB15_76:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    lbu t4, 16(a0)
-; RV32I-NEXT:    or t3, t3, a3
-; RV32I-NEXT:    slli t1, t1, 8
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    lbu t5, 16(a0)
+; RV32I-NEXT:    or t1, t3, t1
+; RV32I-NEXT:    slli t0, t0, 8
 ; RV32I-NEXT:    beq a5, ra, .LBB15_78
 ; RV32I-NEXT:  # %bb.77:
-; RV32I-NEXT:    mv t0, a6
+; RV32I-NEXT:    mv a6, a3
 ; RV32I-NEXT:  .LBB15_78:
-; RV32I-NEXT:    or a3, t1, t4
-; RV32I-NEXT:    slli t3, t3, 16
+; RV32I-NEXT:    or t0, t0, t5
+; RV32I-NEXT:    slli t1, t1, 16
 ; RV32I-NEXT:    mv s6, a7
 ; RV32I-NEXT:    beqz a1, .LBB15_80
 ; RV32I-NEXT:  # %bb.79:
-; RV32I-NEXT:    mv s6, t0
+; RV32I-NEXT:    mv s6, a6
 ; RV32I-NEXT:  .LBB15_80:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    or a6, t3, a3
-; RV32I-NEXT:    srl a3, a7, s3
-; RV32I-NEXT:    sw a3, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a6, t1, t0
+; RV32I-NEXT:    srl s10, a7, s10
 ; RV32I-NEXT:    beqz t6, .LBB15_82
 ; RV32I-NEXT:  # %bb.81:
-; RV32I-NEXT:    lw t0, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv a3, s10
 ; RV32I-NEXT:  .LBB15_82:
-; RV32I-NEXT:    sll a3, a6, a1
-; RV32I-NEXT:    sw a3, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sll s11, a6, a1
 ; RV32I-NEXT:    beqz a5, .LBB15_88
 ; RV32I-NEXT:  # %bb.83:
-; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    bnez t6, .LBB15_89
 ; RV32I-NEXT:    j .LBB15_90
 ; RV32I-NEXT:  .LBB15_84:
-; RV32I-NEXT:    or a6, s9, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, s8, a6
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    beqz t6, .LBB15_68
 ; RV32I-NEXT:  .LBB15_85:
-; RV32I-NEXT:    mv a3, s11
-; RV32I-NEXT:    bne a5, s8, .LBB15_69
+; RV32I-NEXT:    lw a6, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, s9, .LBB15_69
 ; RV32I-NEXT:  .LBB15_86:
-; RV32I-NEXT:    or a6, s10, a3
-; RV32I-NEXT:    mv t0, t5
-; RV32I-NEXT:    beq a5, s0, .LBB15_70
+; RV32I-NEXT:    or a3, s4, a6
+; RV32I-NEXT:    mv a6, t4
+; RV32I-NEXT:    beq a5, s3, .LBB15_70
 ; RV32I-NEXT:  .LBB15_87:
-; RV32I-NEXT:    mv t0, a6
+; RV32I-NEXT:    mv a6, a3
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bne a5, s4, .LBB15_71
+; RV32I-NEXT:    bne a5, s5, .LBB15_71
 ; RV32I-NEXT:    j .LBB15_72
 ; RV32I-NEXT:  .LBB15_88:
-; RV32I-NEXT:    or a7, a3, t0
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, s11, a3
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    beqz t6, .LBB15_90
 ; RV32I-NEXT:  .LBB15_89:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv a7, s7
 ; RV32I-NEXT:  .LBB15_90:
-; RV32I-NEXT:    beq a5, s7, .LBB15_109
+; RV32I-NEXT:    li t0, 1
+; RV32I-NEXT:    beq a5, t0, .LBB15_109
 ; RV32I-NEXT:  # %bb.91:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    bnez t6, .LBB15_110
 ; RV32I-NEXT:  .LBB15_92:
-; RV32I-NEXT:    beq a5, s8, .LBB15_111
+; RV32I-NEXT:    beq a5, s9, .LBB15_111
 ; RV32I-NEXT:  .LBB15_93:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    bnez t6, .LBB15_112
 ; RV32I-NEXT:  .LBB15_94:
-; RV32I-NEXT:    beq a5, s0, .LBB15_113
+; RV32I-NEXT:    beq a5, s3, .LBB15_113
 ; RV32I-NEXT:  .LBB15_95:
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    beq a5, s4, .LBB15_97
+; RV32I-NEXT:    mv t0, t4
+; RV32I-NEXT:    beq a5, s5, .LBB15_97
 ; RV32I-NEXT:  .LBB15_96:
-; RV32I-NEXT:    mv a3, a7
+; RV32I-NEXT:    mv t0, a3
 ; RV32I-NEXT:  .LBB15_97:
 ; RV32I-NEXT:    lbu t3, 23(a0)
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    beq a5, s5, .LBB15_99
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    beq a5, s1, .LBB15_99
 ; RV32I-NEXT:  # %bb.98:
-; RV32I-NEXT:    mv t0, a3
+; RV32I-NEXT:    mv a7, t0
 ; RV32I-NEXT:  .LBB15_99:
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    lbu t1, 21(a0)
-; RV32I-NEXT:    lbu a3, 22(a0)
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lbu t0, 21(a0)
+; RV32I-NEXT:    lbu t1, 22(a0)
 ; RV32I-NEXT:    slli t3, t3, 8
-; RV32I-NEXT:    beq a5, s1, .LBB15_101
+; RV32I-NEXT:    beq a5, s0, .LBB15_101
 ; RV32I-NEXT:  # %bb.100:
-; RV32I-NEXT:    mv a7, t0
+; RV32I-NEXT:    mv a3, a7
 ; RV32I-NEXT:  .LBB15_101:
-; RV32I-NEXT:    sw s9, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    lbu t4, 20(a0)
-; RV32I-NEXT:    or t3, t3, a3
-; RV32I-NEXT:    slli t1, t1, 8
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    lbu t5, 20(a0)
+; RV32I-NEXT:    or t1, t3, t1
+; RV32I-NEXT:    slli t0, t0, 8
 ; RV32I-NEXT:    beq a5, ra, .LBB15_103
 ; RV32I-NEXT:  # %bb.102:
-; RV32I-NEXT:    mv t0, a7
+; RV32I-NEXT:    mv a7, a3
 ; RV32I-NEXT:  .LBB15_103:
-; RV32I-NEXT:    or a3, t1, t4
-; RV32I-NEXT:    slli t3, t3, 16
+; RV32I-NEXT:    or t0, t0, t5
+; RV32I-NEXT:    slli t1, t1, 16
 ; RV32I-NEXT:    mv s9, a6
 ; RV32I-NEXT:    beqz a1, .LBB15_105
 ; RV32I-NEXT:  # %bb.104:
-; RV32I-NEXT:    mv s9, t0
+; RV32I-NEXT:    mv s9, a7
 ; RV32I-NEXT:  .LBB15_105:
-; RV32I-NEXT:    li t1, 0
-; RV32I-NEXT:    or t0, t3, a3
-; RV32I-NEXT:    srl a6, a6, s3
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or t0, t1, t0
+; RV32I-NEXT:    neg a7, t6
+; RV32I-NEXT:    srl a6, a6, a7
 ; RV32I-NEXT:    beqz t6, .LBB15_107
 ; RV32I-NEXT:  # %bb.106:
-; RV32I-NEXT:    mv t1, a6
+; RV32I-NEXT:    mv a3, a6
 ; RV32I-NEXT:  .LBB15_107:
-; RV32I-NEXT:    sll a3, t0, a1
-; RV32I-NEXT:    sw a3, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sll a7, t0, a1
 ; RV32I-NEXT:    beqz a5, .LBB15_114
 ; RV32I-NEXT:  # %bb.108:
-; RV32I-NEXT:    li t1, 0
 ; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li t1, 0
 ; RV32I-NEXT:    bnez t6, .LBB15_115
 ; RV32I-NEXT:    j .LBB15_116
 ; RV32I-NEXT:  .LBB15_109:
-; RV32I-NEXT:    lw a7, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or a7, a7, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lw a3, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, a7
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    beqz t6, .LBB15_92
 ; RV32I-NEXT:  .LBB15_110:
-; RV32I-NEXT:    mv a3, s2
-; RV32I-NEXT:    bne a5, s8, .LBB15_93
+; RV32I-NEXT:    lw a7, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, s9, .LBB15_93
 ; RV32I-NEXT:  .LBB15_111:
-; RV32I-NEXT:    or a7, s9, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, s8, a7
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    beqz t6, .LBB15_94
 ; RV32I-NEXT:  .LBB15_112:
-; RV32I-NEXT:    mv a3, s11
-; RV32I-NEXT:    bne a5, s0, .LBB15_95
+; RV32I-NEXT:    lw a7, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, s3, .LBB15_95
 ; RV32I-NEXT:  .LBB15_113:
-; RV32I-NEXT:    or a7, s10, a3
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    bne a5, s4, .LBB15_96
+; RV32I-NEXT:    or a3, s4, a7
+; RV32I-NEXT:    mv t0, t4
+; RV32I-NEXT:    bne a5, s5, .LBB15_96
 ; RV32I-NEXT:    j .LBB15_97
 ; RV32I-NEXT:  .LBB15_114:
-; RV32I-NEXT:    or t1, a3, t1
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, a7, a3
+; RV32I-NEXT:    li t1, 0
 ; RV32I-NEXT:    beqz t6, .LBB15_116
 ; RV32I-NEXT:  .LBB15_115:
-; RV32I-NEXT:    lw a3, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv t1, s10
 ; RV32I-NEXT:  .LBB15_116:
-; RV32I-NEXT:    beq a5, s7, .LBB15_136
+; RV32I-NEXT:    li t3, 1
+; RV32I-NEXT:    beq a5, t3, .LBB15_136
 ; RV32I-NEXT:  # %bb.117:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li t1, 0
 ; RV32I-NEXT:    bnez t6, .LBB15_137
 ; RV32I-NEXT:  .LBB15_118:
-; RV32I-NEXT:    beq a5, s8, .LBB15_138
+; RV32I-NEXT:    li t3, 2
+; RV32I-NEXT:    beq a5, t3, .LBB15_138
 ; RV32I-NEXT:  .LBB15_119:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li t1, 0
 ; RV32I-NEXT:    bnez t6, .LBB15_139
 ; RV32I-NEXT:  .LBB15_120:
-; RV32I-NEXT:    beq a5, s0, .LBB15_140
+; RV32I-NEXT:    beq a5, s3, .LBB15_140
 ; RV32I-NEXT:  .LBB15_121:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li t1, 0
 ; RV32I-NEXT:    bnez t6, .LBB15_141
 ; RV32I-NEXT:  .LBB15_122:
-; RV32I-NEXT:    bne a5, s4, .LBB15_124
+; RV32I-NEXT:    bne a5, s5, .LBB15_124
 ; RV32I-NEXT:  .LBB15_123:
-; RV32I-NEXT:    or t1, s10, a3
+; RV32I-NEXT:    or a3, s4, t1
 ; RV32I-NEXT:  .LBB15_124:
-; RV32I-NEXT:    lbu s0, 27(a0)
-; RV32I-NEXT:    mv t3, t5
-; RV32I-NEXT:    beq a5, s5, .LBB15_126
+; RV32I-NEXT:    lbu t5, 27(a0)
+; RV32I-NEXT:    mv t1, t4
+; RV32I-NEXT:    beq a5, s1, .LBB15_126
 ; RV32I-NEXT:  # %bb.125:
-; RV32I-NEXT:    mv t3, t1
+; RV32I-NEXT:    mv t1, a3
 ; RV32I-NEXT:  .LBB15_126:
-; RV32I-NEXT:    li t1, 0
-; RV32I-NEXT:    lbu t4, 25(a0)
-; RV32I-NEXT:    lbu a3, 26(a0)
-; RV32I-NEXT:    slli s0, s0, 8
-; RV32I-NEXT:    beq a5, s1, .LBB15_128
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lbu s3, 25(a0)
+; RV32I-NEXT:    lbu t3, 26(a0)
+; RV32I-NEXT:    slli t5, t5, 8
+; RV32I-NEXT:    beq a5, s0, .LBB15_128
 ; RV32I-NEXT:  # %bb.127:
-; RV32I-NEXT:    mv t1, t3
+; RV32I-NEXT:    mv a3, t1
 ; RV32I-NEXT:  .LBB15_128:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    lbu s1, 24(a0)
-; RV32I-NEXT:    or s0, s0, a3
-; RV32I-NEXT:    slli a3, t4, 8
+; RV32I-NEXT:    li t1, 0
+; RV32I-NEXT:    lbu s0, 24(a0)
+; RV32I-NEXT:    or t5, t5, t3
+; RV32I-NEXT:    slli s3, s3, 8
 ; RV32I-NEXT:    beq a5, ra, .LBB15_130
 ; RV32I-NEXT:  # %bb.129:
-; RV32I-NEXT:    mv t3, t1
+; RV32I-NEXT:    mv t1, a3
 ; RV32I-NEXT:  .LBB15_130:
-; RV32I-NEXT:    or a3, a3, s1
-; RV32I-NEXT:    slli s0, s0, 16
+; RV32I-NEXT:    or t3, s3, s0
+; RV32I-NEXT:    slli t5, t5, 16
 ; RV32I-NEXT:    mv ra, t0
 ; RV32I-NEXT:    beqz a1, .LBB15_132
 ; RV32I-NEXT:  # %bb.131:
-; RV32I-NEXT:    mv ra, t3
+; RV32I-NEXT:    mv ra, t1
 ; RV32I-NEXT:  .LBB15_132:
-; RV32I-NEXT:    li t4, 0
-; RV32I-NEXT:    or t3, s0, a3
-; RV32I-NEXT:    srl t0, t0, s3
-; RV32I-NEXT:    li s1, 6
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or s3, t5, t3
+; RV32I-NEXT:    neg t1, t6
+; RV32I-NEXT:    srl t0, t0, t1
+; RV32I-NEXT:    li t5, 6
 ; RV32I-NEXT:    beqz t6, .LBB15_134
 ; RV32I-NEXT:  # %bb.133:
-; RV32I-NEXT:    mv t4, t0
+; RV32I-NEXT:    mv a3, t0
 ; RV32I-NEXT:  .LBB15_134:
-; RV32I-NEXT:    sll t1, t3, a1
-; RV32I-NEXT:    li s0, 3
+; RV32I-NEXT:    sll t1, s3, a1
 ; RV32I-NEXT:    beqz a5, .LBB15_142
 ; RV32I-NEXT:  # %bb.135:
-; RV32I-NEXT:    li t4, 0
 ; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bnez t6, .LBB15_143
 ; RV32I-NEXT:    j .LBB15_144
 ; RV32I-NEXT:  .LBB15_136:
-; RV32I-NEXT:    lw a7, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t1, a7, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, s11, t1
+; RV32I-NEXT:    li t1, 0
 ; RV32I-NEXT:    beqz t6, .LBB15_118
 ; RV32I-NEXT:  .LBB15_137:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne a5, s8, .LBB15_119
+; RV32I-NEXT:    mv t1, s7
+; RV32I-NEXT:    li t3, 2
+; RV32I-NEXT:    bne a5, t3, .LBB15_119
 ; RV32I-NEXT:  .LBB15_138:
-; RV32I-NEXT:    lw a7, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t1, a7, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lw a3, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, t1
+; RV32I-NEXT:    li t1, 0
 ; RV32I-NEXT:    beqz t6, .LBB15_120
 ; RV32I-NEXT:  .LBB15_139:
-; RV32I-NEXT:    mv a3, s2
-; RV32I-NEXT:    bne a5, s0, .LBB15_121
+; RV32I-NEXT:    lw t1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, s3, .LBB15_121
 ; RV32I-NEXT:  .LBB15_140:
-; RV32I-NEXT:    lw a7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t1, a7, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, s8, t1
+; RV32I-NEXT:    li t1, 0
 ; RV32I-NEXT:    beqz t6, .LBB15_122
 ; RV32I-NEXT:  .LBB15_141:
-; RV32I-NEXT:    mv a3, s11
-; RV32I-NEXT:    beq a5, s4, .LBB15_123
+; RV32I-NEXT:    lw t1, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    beq a5, s5, .LBB15_123
 ; RV32I-NEXT:    j .LBB15_124
 ; RV32I-NEXT:  .LBB15_142:
-; RV32I-NEXT:    or t4, t1, t4
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, t1, a3
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t6, .LBB15_144
 ; RV32I-NEXT:  .LBB15_143:
-; RV32I-NEXT:    mv a3, a6
+; RV32I-NEXT:    mv t3, a6
 ; RV32I-NEXT:  .LBB15_144:
-; RV32I-NEXT:    beq a5, s7, .LBB15_166
+; RV32I-NEXT:    li s0, 1
+; RV32I-NEXT:    beq a5, s0, .LBB15_164
 ; RV32I-NEXT:  # %bb.145:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB15_167
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t6, .LBB15_165
 ; RV32I-NEXT:  .LBB15_146:
-; RV32I-NEXT:    beq a5, s8, .LBB15_168
+; RV32I-NEXT:    li s0, 2
+; RV32I-NEXT:    beq a5, s0, .LBB15_166
 ; RV32I-NEXT:  .LBB15_147:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB15_169
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t6, .LBB15_167
 ; RV32I-NEXT:  .LBB15_148:
-; RV32I-NEXT:    beq a5, s0, .LBB15_170
+; RV32I-NEXT:    li s0, 3
+; RV32I-NEXT:    beq a5, s0, .LBB15_168
 ; RV32I-NEXT:  .LBB15_149:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB15_171
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t6, .LBB15_169
 ; RV32I-NEXT:  .LBB15_150:
-; RV32I-NEXT:    bne a5, s4, .LBB15_152
+; RV32I-NEXT:    beq a5, s5, .LBB15_170
 ; RV32I-NEXT:  .LBB15_151:
-; RV32I-NEXT:    lw a7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t4, a7, a3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t6, .LBB15_171
 ; RV32I-NEXT:  .LBB15_152:
-; RV32I-NEXT:    li a7, 1
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB15_154
-; RV32I-NEXT:  # %bb.153:
-; RV32I-NEXT:    mv a3, s11
+; RV32I-NEXT:    bne a5, s1, .LBB15_154
+; RV32I-NEXT:  .LBB15_153:
+; RV32I-NEXT:    or a3, s4, t3
 ; RV32I-NEXT:  .LBB15_154:
-; RV32I-NEXT:    li s7, 2
-; RV32I-NEXT:    li s8, 3
-; RV32I-NEXT:    bne a5, s5, .LBB15_156
+; RV32I-NEXT:    lbu s1, 31(a0)
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    beq a5, t5, .LBB15_156
 ; RV32I-NEXT:  # %bb.155:
-; RV32I-NEXT:    or t4, s10, a3
+; RV32I-NEXT:    mv t3, a3
 ; RV32I-NEXT:  .LBB15_156:
-; RV32I-NEXT:    lbu s0, 31(a0)
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    beq a5, s1, .LBB15_158
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lbu s0, 29(a0)
+; RV32I-NEXT:    lbu t5, 30(a0)
+; RV32I-NEXT:    slli s1, s1, 8
+; RV32I-NEXT:    li s5, 7
+; RV32I-NEXT:    beq a5, s5, .LBB15_158
 ; RV32I-NEXT:  # %bb.157:
-; RV32I-NEXT:    mv a3, t4
+; RV32I-NEXT:    mv a3, t3
 ; RV32I-NEXT:  .LBB15_158:
-; RV32I-NEXT:    li t4, 0
-; RV32I-NEXT:    lbu s5, 29(a0)
-; RV32I-NEXT:    lbu s1, 30(a0)
+; RV32I-NEXT:    lbu t3, 28(a0)
 ; RV32I-NEXT:    slli s0, s0, 8
-; RV32I-NEXT:    li s4, 7
-; RV32I-NEXT:    beq a5, s4, .LBB15_160
+; RV32I-NEXT:    or t5, s1, t5
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    beqz a1, .LBB15_160
 ; RV32I-NEXT:  # %bb.159:
-; RV32I-NEXT:    mv t4, a3
+; RV32I-NEXT:    mv a0, a3
 ; RV32I-NEXT:  .LBB15_160:
-; RV32I-NEXT:    lbu a3, 28(a0)
-; RV32I-NEXT:    slli s5, s5, 8
-; RV32I-NEXT:    or s0, s0, s1
-; RV32I-NEXT:    mv a0, t3
-; RV32I-NEXT:    beqz a1, .LBB15_162
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or t3, s0, t3
+; RV32I-NEXT:    slli t5, t5, 16
+; RV32I-NEXT:    li s0, 5
+; RV32I-NEXT:    li s1, 4
+; RV32I-NEXT:    beqz t6, .LBB15_162
 ; RV32I-NEXT:  # %bb.161:
-; RV32I-NEXT:    mv a0, t4
+; RV32I-NEXT:    neg a3, t6
+; RV32I-NEXT:    srl a3, s3, a3
 ; RV32I-NEXT:  .LBB15_162:
-; RV32I-NEXT:    li t4, 0
-; RV32I-NEXT:    or a3, s5, a3
-; RV32I-NEXT:    slli s0, s0, 16
-; RV32I-NEXT:    li s1, 6
-; RV32I-NEXT:    li s4, 4
-; RV32I-NEXT:    beqz t6, .LBB15_164
-; RV32I-NEXT:  # %bb.163:
-; RV32I-NEXT:    srl t4, t3, s3
-; RV32I-NEXT:  .LBB15_164:
-; RV32I-NEXT:    or s3, s0, a3
-; RV32I-NEXT:    li s0, 5
+; RV32I-NEXT:    or s3, t5, t3
 ; RV32I-NEXT:    beqz a5, .LBB15_172
-; RV32I-NEXT:  # %bb.165:
-; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:  # %bb.163:
 ; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bnez t6, .LBB15_173
 ; RV32I-NEXT:    j .LBB15_174
-; RV32I-NEXT:  .LBB15_166:
-; RV32I-NEXT:    lw a7, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t4, a7, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:  .LBB15_164:
+; RV32I-NEXT:    or a3, a7, t3
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t6, .LBB15_146
+; RV32I-NEXT:  .LBB15_165:
+; RV32I-NEXT:    mv t3, s10
+; RV32I-NEXT:    li s0, 2
+; RV32I-NEXT:    bne a5, s0, .LBB15_147
+; RV32I-NEXT:  .LBB15_166:
+; RV32I-NEXT:    or a3, s11, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t6, .LBB15_148
 ; RV32I-NEXT:  .LBB15_167:
-; RV32I-NEXT:    lw a3, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne a5, s8, .LBB15_147
+; RV32I-NEXT:    mv t3, s7
+; RV32I-NEXT:    li s0, 3
+; RV32I-NEXT:    bne a5, s0, .LBB15_149
 ; RV32I-NEXT:  .LBB15_168:
-; RV32I-NEXT:    lw a7, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t4, a7, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB15_148
+; RV32I-NEXT:    lw a3, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t6, .LBB15_150
 ; RV32I-NEXT:  .LBB15_169:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne a5, s0, .LBB15_149
+; RV32I-NEXT:    lw t3, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, s5, .LBB15_151
 ; RV32I-NEXT:  .LBB15_170:
-; RV32I-NEXT:    lw a7, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t4, a7, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB15_150
+; RV32I-NEXT:    or a3, s8, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t6, .LBB15_152
 ; RV32I-NEXT:  .LBB15_171:
-; RV32I-NEXT:    mv a3, s2
-; RV32I-NEXT:    beq a5, s4, .LBB15_151
-; RV32I-NEXT:    j .LBB15_152
+; RV32I-NEXT:    lw t3, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    beq a5, s1, .LBB15_153
+; RV32I-NEXT:    j .LBB15_154
 ; RV32I-NEXT:  .LBB15_172:
-; RV32I-NEXT:    sll a3, s3, a1
-; RV32I-NEXT:    or t3, a3, t4
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    sll t3, s3, a1
+; RV32I-NEXT:    or a3, t3, a3
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t6, .LBB15_174
 ; RV32I-NEXT:  .LBB15_173:
-; RV32I-NEXT:    mv a3, t0
+; RV32I-NEXT:    mv t3, t0
 ; RV32I-NEXT:  .LBB15_174:
-; RV32I-NEXT:    beq a5, a7, .LBB15_189
+; RV32I-NEXT:    li t0, 1
+; RV32I-NEXT:    beq a5, t0, .LBB15_190
 ; RV32I-NEXT:  # %bb.175:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB15_190
+; RV32I-NEXT:    li t0, 0
+; RV32I-NEXT:    bnez t6, .LBB15_191
 ; RV32I-NEXT:  .LBB15_176:
-; RV32I-NEXT:    beq a5, s7, .LBB15_191
+; RV32I-NEXT:    li a6, 2
+; RV32I-NEXT:    beq a5, a6, .LBB15_192
 ; RV32I-NEXT:  .LBB15_177:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB15_192
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    bnez t6, .LBB15_193
 ; RV32I-NEXT:  .LBB15_178:
-; RV32I-NEXT:    beq a5, s8, .LBB15_193
+; RV32I-NEXT:    li a7, 3
+; RV32I-NEXT:    beq a5, a7, .LBB15_194
 ; RV32I-NEXT:  .LBB15_179:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB15_194
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    bnez t6, .LBB15_195
 ; RV32I-NEXT:  .LBB15_180:
-; RV32I-NEXT:    beq a5, s4, .LBB15_195
+; RV32I-NEXT:    beq a5, s1, .LBB15_196
 ; RV32I-NEXT:  .LBB15_181:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB15_196
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    bnez t6, .LBB15_197
 ; RV32I-NEXT:  .LBB15_182:
-; RV32I-NEXT:    beq a5, s0, .LBB15_197
+; RV32I-NEXT:    bne a5, s0, .LBB15_184
 ; RV32I-NEXT:  .LBB15_183:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB15_198
+; RV32I-NEXT:    or a3, s8, a6
 ; RV32I-NEXT:  .LBB15_184:
-; RV32I-NEXT:    beq a5, s1, .LBB15_199
-; RV32I-NEXT:  .LBB15_185:
-; RV32I-NEXT:    li a3, 7
-; RV32I-NEXT:    bne a5, a3, .LBB15_200
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    li a7, 6
+; RV32I-NEXT:    bnez t6, .LBB15_198
+; RV32I-NEXT:  # %bb.185:
+; RV32I-NEXT:    beq a5, a7, .LBB15_199
 ; RV32I-NEXT:  .LBB15_186:
-; RV32I-NEXT:    beqz a1, .LBB15_188
+; RV32I-NEXT:    li a6, 7
+; RV32I-NEXT:    bne a5, a6, .LBB15_200
 ; RV32I-NEXT:  .LBB15_187:
-; RV32I-NEXT:    mv s3, t5
+; RV32I-NEXT:    beqz a1, .LBB15_189
 ; RV32I-NEXT:  .LBB15_188:
+; RV32I-NEXT:    mv s3, t4
+; RV32I-NEXT:  .LBB15_189:
 ; RV32I-NEXT:    srli a1, a4, 16
 ; RV32I-NEXT:    lui a7, 16
 ; RV32I-NEXT:    srli a6, a4, 24
 ; RV32I-NEXT:    srli a3, t2, 16
 ; RV32I-NEXT:    srli t1, t2, 24
-; RV32I-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    srli a5, s2, 16
 ; RV32I-NEXT:    srli t5, s2, 24
 ; RV32I-NEXT:    srli t0, s6, 16
@@ -7175,68 +7235,64 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    sb a4, 29(a2)
 ; RV32I-NEXT:    sb s7, 30(a2)
 ; RV32I-NEXT:    sb s8, 31(a2)
-; RV32I-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 64(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 60(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 56(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 52(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s6, 48(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s7, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s8, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s9, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s10, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s11, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    addi sp, sp, 80
+; RV32I-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 52(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 48(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s8, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s9, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s10, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 64
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB15_189:
-; RV32I-NEXT:    or t3, t1, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB15_176
 ; RV32I-NEXT:  .LBB15_190:
-; RV32I-NEXT:    mv a3, a6
-; RV32I-NEXT:    bne a5, s7, .LBB15_177
+; RV32I-NEXT:    or a3, t1, t3
+; RV32I-NEXT:    li t0, 0
+; RV32I-NEXT:    beqz t6, .LBB15_176
 ; RV32I-NEXT:  .LBB15_191:
-; RV32I-NEXT:    lw a6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t3, a6, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB15_178
+; RV32I-NEXT:    mv t0, a6
+; RV32I-NEXT:    li a6, 2
+; RV32I-NEXT:    bne a5, a6, .LBB15_177
 ; RV32I-NEXT:  .LBB15_192:
-; RV32I-NEXT:    lw a3, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne a5, s8, .LBB15_179
+; RV32I-NEXT:    or a3, a7, t0
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    beqz t6, .LBB15_178
 ; RV32I-NEXT:  .LBB15_193:
-; RV32I-NEXT:    lw a6, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t3, a6, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB15_180
+; RV32I-NEXT:    mv a6, s10
+; RV32I-NEXT:    li a7, 3
+; RV32I-NEXT:    bne a5, a7, .LBB15_179
 ; RV32I-NEXT:  .LBB15_194:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne a5, s4, .LBB15_181
+; RV32I-NEXT:    or a3, s11, a6
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    beqz t6, .LBB15_180
 ; RV32I-NEXT:  .LBB15_195:
-; RV32I-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t3, a6, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB15_182
+; RV32I-NEXT:    mv a6, s7
+; RV32I-NEXT:    bne a5, s1, .LBB15_181
 ; RV32I-NEXT:  .LBB15_196:
-; RV32I-NEXT:    mv a3, s2
-; RV32I-NEXT:    bne a5, s0, .LBB15_183
+; RV32I-NEXT:    lw a3, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, a6
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    beqz t6, .LBB15_182
 ; RV32I-NEXT:  .LBB15_197:
-; RV32I-NEXT:    lw a6, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t3, a6, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB15_184
+; RV32I-NEXT:    lw a6, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    beq a5, s0, .LBB15_183
+; RV32I-NEXT:    j .LBB15_184
 ; RV32I-NEXT:  .LBB15_198:
-; RV32I-NEXT:    mv a3, s11
-; RV32I-NEXT:    bne a5, s1, .LBB15_185
+; RV32I-NEXT:    lw a6, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, a7, .LBB15_186
 ; RV32I-NEXT:  .LBB15_199:
-; RV32I-NEXT:    or t3, s10, a3
-; RV32I-NEXT:    li a3, 7
-; RV32I-NEXT:    beq a5, a3, .LBB15_186
+; RV32I-NEXT:    or a3, s4, a6
+; RV32I-NEXT:    li a6, 7
+; RV32I-NEXT:    beq a5, a6, .LBB15_187
 ; RV32I-NEXT:  .LBB15_200:
-; RV32I-NEXT:    mv t5, t3
-; RV32I-NEXT:    bnez a1, .LBB15_187
-; RV32I-NEXT:    j .LBB15_188
+; RV32I-NEXT:    mv t4, a3
+; RV32I-NEXT:    bnez a1, .LBB15_188
+; RV32I-NEXT:    j .LBB15_189
   %src = load i256, ptr %src.ptr, align 1
   %byteOff = load i256, ptr %byteOff.ptr, align 1
   %bitOff = shl i256 %byteOff, 3
@@ -7584,675 +7640,662 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
 ; RV32I-NEXT:    or a1, a1, t0
 ; RV32I-NEXT:    slli a5, a5, 16
 ; RV32I-NEXT:    slli a1, a1, 16
-; RV32I-NEXT:    or a6, a5, a3
+; RV32I-NEXT:    or a3, a5, a3
 ; RV32I-NEXT:    or a1, a1, a4
 ; RV32I-NEXT:    slli a1, a1, 5
 ; RV32I-NEXT:    srli a5, a1, 5
-; RV32I-NEXT:    sll t5, a6, a1
-; RV32I-NEXT:    li s9, 1
-; RV32I-NEXT:    mv a4, t5
+; RV32I-NEXT:    sll t4, a3, a1
+; RV32I-NEXT:    li s1, 1
+; RV32I-NEXT:    mv a6, t4
 ; RV32I-NEXT:    beqz a5, .LBB16_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    li a4, 0
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:  .LBB16_2:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    li s0, 2
-; RV32I-NEXT:    beq a5, s9, .LBB16_4
+; RV32I-NEXT:    li a4, 0
+; RV32I-NEXT:    li s3, 2
+; RV32I-NEXT:    beq a5, s1, .LBB16_4
 ; RV32I-NEXT:  # %bb.3:
-; RV32I-NEXT:    mv a3, a4
+; RV32I-NEXT:    mv a4, a6
 ; RV32I-NEXT:  .LBB16_4:
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    beq a5, s0, .LBB16_6
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    beq a5, s3, .LBB16_6
 ; RV32I-NEXT:  # %bb.5:
-; RV32I-NEXT:    mv a7, a3
+; RV32I-NEXT:    mv a6, a4
 ; RV32I-NEXT:  .LBB16_6:
 ; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    li s6, 3
-; RV32I-NEXT:    li s4, 4
-; RV32I-NEXT:    beq a5, s6, .LBB16_8
+; RV32I-NEXT:    li s4, 3
+; RV32I-NEXT:    li s5, 4
+; RV32I-NEXT:    beq a5, s4, .LBB16_8
 ; RV32I-NEXT:  # %bb.7:
-; RV32I-NEXT:    mv a4, a7
+; RV32I-NEXT:    mv a4, a6
 ; RV32I-NEXT:  .LBB16_8:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    li s11, 5
-; RV32I-NEXT:    beq a5, s4, .LBB16_10
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    li s9, 5
+; RV32I-NEXT:    beq a5, s5, .LBB16_10
 ; RV32I-NEXT:  # %bb.9:
-; RV32I-NEXT:    mv t0, a4
+; RV32I-NEXT:    mv a7, a4
 ; RV32I-NEXT:  .LBB16_10:
-; RV32I-NEXT:    lbu t2, 7(a0)
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    beq a5, s11, .LBB16_12
+; RV32I-NEXT:    lbu t1, 7(a0)
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    beq a5, s9, .LBB16_12
 ; RV32I-NEXT:  # %bb.11:
-; RV32I-NEXT:    mv a7, t0
+; RV32I-NEXT:    mv a6, a7
 ; RV32I-NEXT:  .LBB16_12:
 ; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    lbu t0, 5(a0)
-; RV32I-NEXT:    lbu t1, 6(a0)
-; RV32I-NEXT:    li s1, 6
-; RV32I-NEXT:    slli t2, t2, 8
-; RV32I-NEXT:    beq a5, s1, .LBB16_14
+; RV32I-NEXT:    lbu a7, 5(a0)
+; RV32I-NEXT:    lbu t0, 6(a0)
+; RV32I-NEXT:    li s0, 6
+; RV32I-NEXT:    slli t2, t1, 8
+; RV32I-NEXT:    beq a5, s0, .LBB16_14
 ; RV32I-NEXT:  # %bb.13:
-; RV32I-NEXT:    mv a4, a7
+; RV32I-NEXT:    mv a4, a6
 ; RV32I-NEXT:  .LBB16_14:
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    lbu a3, 4(a0)
-; RV32I-NEXT:    or t1, t2, t1
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    lbu t1, 4(a0)
+; RV32I-NEXT:    or t2, t2, t0
 ; RV32I-NEXT:    li ra, 7
-; RV32I-NEXT:    slli t0, t0, 8
+; RV32I-NEXT:    slli a7, a7, 8
 ; RV32I-NEXT:    beq a5, ra, .LBB16_16
 ; RV32I-NEXT:  # %bb.15:
-; RV32I-NEXT:    mv a7, a4
+; RV32I-NEXT:    mv a6, a4
 ; RV32I-NEXT:  .LBB16_16:
-; RV32I-NEXT:    or a3, t0, a3
-; RV32I-NEXT:    slli t1, t1, 16
+; RV32I-NEXT:    or t0, a7, t1
+; RV32I-NEXT:    slli t2, t2, 16
 ; RV32I-NEXT:    andi t6, a1, 31
-; RV32I-NEXT:    mv a4, a6
+; RV32I-NEXT:    mv a4, a3
 ; RV32I-NEXT:    beqz a1, .LBB16_18
 ; RV32I-NEXT:  # %bb.17:
-; RV32I-NEXT:    mv a4, a7
+; RV32I-NEXT:    mv a4, a6
 ; RV32I-NEXT:  .LBB16_18:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    or a7, t1, a3
-; RV32I-NEXT:    neg s3, t6
-; RV32I-NEXT:    srl s5, a6, s3
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    or a6, t2, t0
+; RV32I-NEXT:    neg s10, t6
+; RV32I-NEXT:    srl s8, a3, s10
 ; RV32I-NEXT:    beqz t6, .LBB16_20
 ; RV32I-NEXT:  # %bb.19:
-; RV32I-NEXT:    mv t0, s5
+; RV32I-NEXT:    mv a7, s8
 ; RV32I-NEXT:  .LBB16_20:
-; RV32I-NEXT:    sll s7, a7, a1
+; RV32I-NEXT:    sll s7, a6, a1
 ; RV32I-NEXT:    beqz a5, .LBB16_22
 ; RV32I-NEXT:  # %bb.21:
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    mv a6, t5
-; RV32I-NEXT:    bne a5, s9, .LBB16_23
+; RV32I-NEXT:    mv a7, t4
+; RV32I-NEXT:    bne a5, s1, .LBB16_23
 ; RV32I-NEXT:    j .LBB16_24
 ; RV32I-NEXT:  .LBB16_22:
-; RV32I-NEXT:    or a3, s7, t0
-; RV32I-NEXT:    mv a6, t5
-; RV32I-NEXT:    beq a5, s9, .LBB16_24
+; RV32I-NEXT:    or a3, s7, a7
+; RV32I-NEXT:    mv a7, t4
+; RV32I-NEXT:    beq a5, s1, .LBB16_24
 ; RV32I-NEXT:  .LBB16_23:
-; RV32I-NEXT:    mv a6, a3
+; RV32I-NEXT:    mv a7, a3
 ; RV32I-NEXT:  .LBB16_24:
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bne a5, s0, .LBB16_40
+; RV32I-NEXT:    bne a5, s3, .LBB16_40
 ; RV32I-NEXT:  # %bb.25:
-; RV32I-NEXT:    li a6, 0
-; RV32I-NEXT:    bne a5, s6, .LBB16_41
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    bne a5, s4, .LBB16_41
 ; RV32I-NEXT:  .LBB16_26:
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beq a5, s4, .LBB16_28
+; RV32I-NEXT:    beq a5, s5, .LBB16_28
 ; RV32I-NEXT:  .LBB16_27:
-; RV32I-NEXT:    mv a3, a6
+; RV32I-NEXT:    mv a3, a7
 ; RV32I-NEXT:  .LBB16_28:
 ; RV32I-NEXT:    lbu t2, 11(a0)
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    beq a5, s11, .LBB16_30
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    beq a5, s9, .LBB16_30
 ; RV32I-NEXT:  # %bb.29:
-; RV32I-NEXT:    mv t0, a3
+; RV32I-NEXT:    mv a7, a3
 ; RV32I-NEXT:  .LBB16_30:
-; RV32I-NEXT:    li a6, 0
-; RV32I-NEXT:    lbu t1, 9(a0)
-; RV32I-NEXT:    lbu a3, 10(a0)
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lbu t0, 9(a0)
+; RV32I-NEXT:    lbu t1, 10(a0)
 ; RV32I-NEXT:    slli t2, t2, 8
-; RV32I-NEXT:    beq a5, s1, .LBB16_32
+; RV32I-NEXT:    beq a5, s0, .LBB16_32
 ; RV32I-NEXT:  # %bb.31:
-; RV32I-NEXT:    mv a6, t0
+; RV32I-NEXT:    mv a3, a7
 ; RV32I-NEXT:  .LBB16_32:
-; RV32I-NEXT:    li t0, 0
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    lbu t3, 8(a0)
-; RV32I-NEXT:    or t2, t2, a3
-; RV32I-NEXT:    slli t1, t1, 8
+; RV32I-NEXT:    or t1, t2, t1
+; RV32I-NEXT:    slli t0, t0, 8
 ; RV32I-NEXT:    beq a5, ra, .LBB16_34
 ; RV32I-NEXT:  # %bb.33:
-; RV32I-NEXT:    mv t0, a6
+; RV32I-NEXT:    mv a7, a3
 ; RV32I-NEXT:  .LBB16_34:
-; RV32I-NEXT:    or a3, t1, t3
-; RV32I-NEXT:    slli a6, t2, 16
-; RV32I-NEXT:    mv t2, a7
+; RV32I-NEXT:    or a3, t0, t3
+; RV32I-NEXT:    slli t1, t1, 16
+; RV32I-NEXT:    mv t2, a6
 ; RV32I-NEXT:    beqz a1, .LBB16_36
 ; RV32I-NEXT:  # %bb.35:
-; RV32I-NEXT:    mv t2, t0
+; RV32I-NEXT:    mv t2, a7
 ; RV32I-NEXT:  .LBB16_36:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    or a6, a6, a3
-; RV32I-NEXT:    srl s8, a7, s3
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    or a3, t1, a3
+; RV32I-NEXT:    srl a6, a6, s10
+; RV32I-NEXT:    sw a6, 24(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    beqz t6, .LBB16_38
 ; RV32I-NEXT:  # %bb.37:
-; RV32I-NEXT:    mv t0, s8
+; RV32I-NEXT:    lw a7, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:  .LBB16_38:
-; RV32I-NEXT:    sll s10, a6, a1
+; RV32I-NEXT:    sll s6, a3, a1
 ; RV32I-NEXT:    beqz a5, .LBB16_42
 ; RV32I-NEXT:  # %bb.39:
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    bnez t6, .LBB16_43
 ; RV32I-NEXT:    j .LBB16_44
 ; RV32I-NEXT:  .LBB16_40:
-; RV32I-NEXT:    mv a3, a6
-; RV32I-NEXT:    li a6, 0
-; RV32I-NEXT:    beq a5, s6, .LBB16_26
+; RV32I-NEXT:    mv a3, a7
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    beq a5, s4, .LBB16_26
 ; RV32I-NEXT:  .LBB16_41:
-; RV32I-NEXT:    mv a6, a3
+; RV32I-NEXT:    mv a7, a3
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bne a5, s4, .LBB16_27
+; RV32I-NEXT:    bne a5, s5, .LBB16_27
 ; RV32I-NEXT:    j .LBB16_28
 ; RV32I-NEXT:  .LBB16_42:
-; RV32I-NEXT:    or a7, s10, t0
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a6, s6, a7
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    beqz t6, .LBB16_44
 ; RV32I-NEXT:  .LBB16_43:
-; RV32I-NEXT:    mv a3, s5
+; RV32I-NEXT:    mv a7, s8
 ; RV32I-NEXT:  .LBB16_44:
-; RV32I-NEXT:    beq a5, s9, .LBB16_61
+; RV32I-NEXT:    beq a5, s1, .LBB16_61
 ; RV32I-NEXT:  # %bb.45:
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    bne a5, s0, .LBB16_62
+; RV32I-NEXT:    mv a7, t4
+; RV32I-NEXT:    bne a5, s3, .LBB16_62
 ; RV32I-NEXT:  .LBB16_46:
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    bne a5, s6, .LBB16_63
+; RV32I-NEXT:    li t0, 0
+; RV32I-NEXT:    bne a5, s4, .LBB16_63
 ; RV32I-NEXT:  .LBB16_47:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beq a5, s4, .LBB16_49
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    beq a5, s5, .LBB16_49
 ; RV32I-NEXT:  .LBB16_48:
-; RV32I-NEXT:    mv a3, a7
+; RV32I-NEXT:    mv a6, t0
 ; RV32I-NEXT:  .LBB16_49:
 ; RV32I-NEXT:    lbu t3, 15(a0)
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    beq a5, s11, .LBB16_51
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    beq a5, s9, .LBB16_51
 ; RV32I-NEXT:  # %bb.50:
-; RV32I-NEXT:    mv t0, a3
+; RV32I-NEXT:    mv a7, a6
 ; RV32I-NEXT:  .LBB16_51:
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    lbu t1, 13(a0)
-; RV32I-NEXT:    lbu a3, 14(a0)
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    lbu t0, 13(a0)
+; RV32I-NEXT:    lbu t1, 14(a0)
 ; RV32I-NEXT:    slli t3, t3, 8
-; RV32I-NEXT:    beq a5, s1, .LBB16_53
+; RV32I-NEXT:    beq a5, s0, .LBB16_53
 ; RV32I-NEXT:  # %bb.52:
-; RV32I-NEXT:    mv a7, t0
+; RV32I-NEXT:    mv a6, a7
 ; RV32I-NEXT:  .LBB16_53:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    lbu t4, 12(a0)
-; RV32I-NEXT:    or t3, t3, a3
-; RV32I-NEXT:    slli t1, t1, 8
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    lbu t5, 12(a0)
+; RV32I-NEXT:    or t1, t3, t1
+; RV32I-NEXT:    slli t0, t0, 8
 ; RV32I-NEXT:    beq a5, ra, .LBB16_55
 ; RV32I-NEXT:  # %bb.54:
-; RV32I-NEXT:    mv t0, a7
+; RV32I-NEXT:    mv a7, a6
 ; RV32I-NEXT:  .LBB16_55:
-; RV32I-NEXT:    or a3, t1, t4
-; RV32I-NEXT:    slli t3, t3, 16
-; RV32I-NEXT:    mv s2, a6
+; RV32I-NEXT:    or t0, t0, t5
+; RV32I-NEXT:    slli t1, t1, 16
+; RV32I-NEXT:    mv s2, a3
 ; RV32I-NEXT:    beqz a1, .LBB16_57
 ; RV32I-NEXT:  # %bb.56:
-; RV32I-NEXT:    mv s2, t0
+; RV32I-NEXT:    mv s2, a7
 ; RV32I-NEXT:  .LBB16_57:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    or a7, t3, a3
-; RV32I-NEXT:    srl a3, a6, s3
-; RV32I-NEXT:    sw a3, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    or a7, t1, t0
+; RV32I-NEXT:    srl a3, a3, s10
+; RV32I-NEXT:    sw a3, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    beqz t6, .LBB16_59
 ; RV32I-NEXT:  # %bb.58:
-; RV32I-NEXT:    lw t0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:  .LBB16_59:
 ; RV32I-NEXT:    sll a3, a7, a1
-; RV32I-NEXT:    sw a3, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    beqz a5, .LBB16_64
 ; RV32I-NEXT:  # %bb.60:
-; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    bnez t6, .LBB16_65
 ; RV32I-NEXT:    j .LBB16_66
 ; RV32I-NEXT:  .LBB16_61:
-; RV32I-NEXT:    or a7, s7, a3
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    beq a5, s0, .LBB16_46
+; RV32I-NEXT:    or a6, s7, a7
+; RV32I-NEXT:    mv a7, t4
+; RV32I-NEXT:    beq a5, s3, .LBB16_46
 ; RV32I-NEXT:  .LBB16_62:
-; RV32I-NEXT:    mv a3, a7
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    beq a5, s6, .LBB16_47
+; RV32I-NEXT:    mv a7, a6
+; RV32I-NEXT:    li t0, 0
+; RV32I-NEXT:    beq a5, s4, .LBB16_47
 ; RV32I-NEXT:  .LBB16_63:
-; RV32I-NEXT:    mv a7, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bne a5, s4, .LBB16_48
+; RV32I-NEXT:    mv t0, a7
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    bne a5, s5, .LBB16_48
 ; RV32I-NEXT:    j .LBB16_49
 ; RV32I-NEXT:  .LBB16_64:
-; RV32I-NEXT:    or a6, a3, t0
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, a3, a6
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    beqz t6, .LBB16_66
 ; RV32I-NEXT:  .LBB16_65:
-; RV32I-NEXT:    mv a3, s8
+; RV32I-NEXT:    lw a6, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:  .LBB16_66:
-; RV32I-NEXT:    beq a5, s9, .LBB16_84
+; RV32I-NEXT:    beq a5, s1, .LBB16_84
 ; RV32I-NEXT:  # %bb.67:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    bnez t6, .LBB16_85
 ; RV32I-NEXT:  .LBB16_68:
-; RV32I-NEXT:    beq a5, s0, .LBB16_86
+; RV32I-NEXT:    beq a5, s3, .LBB16_86
 ; RV32I-NEXT:  .LBB16_69:
-; RV32I-NEXT:    mv t0, t5
-; RV32I-NEXT:    bne a5, s6, .LBB16_87
+; RV32I-NEXT:    mv a6, t4
+; RV32I-NEXT:    bne a5, s4, .LBB16_87
 ; RV32I-NEXT:  .LBB16_70:
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beq a5, s4, .LBB16_72
+; RV32I-NEXT:    beq a5, s5, .LBB16_72
 ; RV32I-NEXT:  .LBB16_71:
-; RV32I-NEXT:    mv a3, t0
+; RV32I-NEXT:    mv a3, a6
 ; RV32I-NEXT:  .LBB16_72:
 ; RV32I-NEXT:    lbu t3, 19(a0)
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    beq a5, s11, .LBB16_74
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    beq a5, s9, .LBB16_74
 ; RV32I-NEXT:  # %bb.73:
-; RV32I-NEXT:    mv t0, a3
+; RV32I-NEXT:    mv a6, a3
 ; RV32I-NEXT:  .LBB16_74:
-; RV32I-NEXT:    li a6, 0
-; RV32I-NEXT:    lbu t1, 17(a0)
-; RV32I-NEXT:    lbu a3, 18(a0)
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lbu t0, 17(a0)
+; RV32I-NEXT:    lbu t1, 18(a0)
 ; RV32I-NEXT:    slli t3, t3, 8
-; RV32I-NEXT:    beq a5, s1, .LBB16_76
+; RV32I-NEXT:    beq a5, s0, .LBB16_76
 ; RV32I-NEXT:  # %bb.75:
-; RV32I-NEXT:    mv a6, t0
+; RV32I-NEXT:    mv a3, a6
 ; RV32I-NEXT:  .LBB16_76:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    lbu t4, 16(a0)
-; RV32I-NEXT:    or t3, t3, a3
-; RV32I-NEXT:    slli t1, t1, 8
+; RV32I-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    lbu t5, 16(a0)
+; RV32I-NEXT:    or t1, t3, t1
+; RV32I-NEXT:    slli t0, t0, 8
 ; RV32I-NEXT:    beq a5, ra, .LBB16_78
 ; RV32I-NEXT:  # %bb.77:
-; RV32I-NEXT:    mv t0, a6
+; RV32I-NEXT:    mv a6, a3
 ; RV32I-NEXT:  .LBB16_78:
-; RV32I-NEXT:    or a3, t1, t4
-; RV32I-NEXT:    slli t3, t3, 16
+; RV32I-NEXT:    or t0, t0, t5
+; RV32I-NEXT:    slli t1, t1, 16
 ; RV32I-NEXT:    mv s6, a7
 ; RV32I-NEXT:    beqz a1, .LBB16_80
 ; RV32I-NEXT:  # %bb.79:
-; RV32I-NEXT:    mv s6, t0
+; RV32I-NEXT:    mv s6, a6
 ; RV32I-NEXT:  .LBB16_80:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    or a6, t3, a3
-; RV32I-NEXT:    srl a3, a7, s3
-; RV32I-NEXT:    sw a3, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a6, t1, t0
+; RV32I-NEXT:    srl s10, a7, s10
 ; RV32I-NEXT:    beqz t6, .LBB16_82
 ; RV32I-NEXT:  # %bb.81:
-; RV32I-NEXT:    lw t0, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv a3, s10
 ; RV32I-NEXT:  .LBB16_82:
-; RV32I-NEXT:    sll a3, a6, a1
-; RV32I-NEXT:    sw a3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sll s11, a6, a1
 ; RV32I-NEXT:    beqz a5, .LBB16_88
 ; RV32I-NEXT:  # %bb.83:
-; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    bnez t6, .LBB16_89
 ; RV32I-NEXT:    j .LBB16_90
 ; RV32I-NEXT:  .LBB16_84:
-; RV32I-NEXT:    or a6, s10, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, s6, a6
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    beqz t6, .LBB16_68
 ; RV32I-NEXT:  .LBB16_85:
-; RV32I-NEXT:    mv a3, s5
-; RV32I-NEXT:    bne a5, s0, .LBB16_69
+; RV32I-NEXT:    mv a6, s8
+; RV32I-NEXT:    bne a5, s3, .LBB16_69
 ; RV32I-NEXT:  .LBB16_86:
-; RV32I-NEXT:    or a6, s7, a3
-; RV32I-NEXT:    mv t0, t5
-; RV32I-NEXT:    beq a5, s6, .LBB16_70
+; RV32I-NEXT:    or a3, s7, a6
+; RV32I-NEXT:    mv a6, t4
+; RV32I-NEXT:    beq a5, s4, .LBB16_70
 ; RV32I-NEXT:  .LBB16_87:
-; RV32I-NEXT:    mv t0, a6
+; RV32I-NEXT:    mv a6, a3
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bne a5, s4, .LBB16_71
+; RV32I-NEXT:    bne a5, s5, .LBB16_71
 ; RV32I-NEXT:    j .LBB16_72
 ; RV32I-NEXT:  .LBB16_88:
-; RV32I-NEXT:    or a7, a3, t0
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, s11, a3
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    beqz t6, .LBB16_90
 ; RV32I-NEXT:  .LBB16_89:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a7, 20(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:  .LBB16_90:
-; RV32I-NEXT:    beq a5, s9, .LBB16_109
+; RV32I-NEXT:    beq a5, s1, .LBB16_109
 ; RV32I-NEXT:  # %bb.91:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    bnez t6, .LBB16_110
 ; RV32I-NEXT:  .LBB16_92:
-; RV32I-NEXT:    beq a5, s0, .LBB16_111
+; RV32I-NEXT:    beq a5, s3, .LBB16_111
 ; RV32I-NEXT:  .LBB16_93:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    bnez t6, .LBB16_112
 ; RV32I-NEXT:  .LBB16_94:
-; RV32I-NEXT:    li t0, 3
-; RV32I-NEXT:    beq a5, t0, .LBB16_113
+; RV32I-NEXT:    beq a5, s4, .LBB16_113
 ; RV32I-NEXT:  .LBB16_95:
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    beq a5, s4, .LBB16_97
+; RV32I-NEXT:    mv t0, t4
+; RV32I-NEXT:    beq a5, s5, .LBB16_97
 ; RV32I-NEXT:  .LBB16_96:
-; RV32I-NEXT:    mv a3, a7
+; RV32I-NEXT:    mv t0, a3
 ; RV32I-NEXT:  .LBB16_97:
 ; RV32I-NEXT:    lbu t3, 23(a0)
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    beq a5, s11, .LBB16_99
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    beq a5, s9, .LBB16_99
 ; RV32I-NEXT:  # %bb.98:
-; RV32I-NEXT:    mv t0, a3
+; RV32I-NEXT:    mv a7, t0
 ; RV32I-NEXT:  .LBB16_99:
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    lbu t1, 21(a0)
-; RV32I-NEXT:    lbu a3, 22(a0)
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lbu t0, 21(a0)
+; RV32I-NEXT:    lbu t1, 22(a0)
 ; RV32I-NEXT:    slli t3, t3, 8
-; RV32I-NEXT:    beq a5, s1, .LBB16_101
+; RV32I-NEXT:    beq a5, s0, .LBB16_101
 ; RV32I-NEXT:  # %bb.100:
-; RV32I-NEXT:    mv a7, t0
+; RV32I-NEXT:    mv a3, a7
 ; RV32I-NEXT:  .LBB16_101:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    lbu t4, 20(a0)
-; RV32I-NEXT:    or t3, t3, a3
-; RV32I-NEXT:    slli t1, t1, 8
+; RV32I-NEXT:    sw s7, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    lbu t5, 20(a0)
+; RV32I-NEXT:    or t1, t3, t1
+; RV32I-NEXT:    slli t0, t0, 8
 ; RV32I-NEXT:    beq a5, ra, .LBB16_103
 ; RV32I-NEXT:  # %bb.102:
-; RV32I-NEXT:    mv t0, a7
+; RV32I-NEXT:    mv a7, a3
 ; RV32I-NEXT:  .LBB16_103:
-; RV32I-NEXT:    or a3, t1, t4
-; RV32I-NEXT:    slli t3, t3, 16
+; RV32I-NEXT:    or t0, t0, t5
+; RV32I-NEXT:    slli t1, t1, 16
 ; RV32I-NEXT:    mv s9, a6
 ; RV32I-NEXT:    beqz a1, .LBB16_105
 ; RV32I-NEXT:  # %bb.104:
-; RV32I-NEXT:    mv s9, t0
+; RV32I-NEXT:    mv s9, a7
 ; RV32I-NEXT:  .LBB16_105:
-; RV32I-NEXT:    li t1, 0
-; RV32I-NEXT:    or t0, t3, a3
-; RV32I-NEXT:    srl a6, a6, s3
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or t0, t1, t0
+; RV32I-NEXT:    neg a7, t6
+; RV32I-NEXT:    srl a6, a6, a7
 ; RV32I-NEXT:    beqz t6, .LBB16_107
 ; RV32I-NEXT:  # %bb.106:
-; RV32I-NEXT:    mv t1, a6
+; RV32I-NEXT:    mv a3, a6
 ; RV32I-NEXT:  .LBB16_107:
 ; RV32I-NEXT:    sll a7, t0, a1
 ; RV32I-NEXT:    beqz a5, .LBB16_114
 ; RV32I-NEXT:  # %bb.108:
-; RV32I-NEXT:    li t1, 0
 ; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li t1, 0
 ; RV32I-NEXT:    bnez t6, .LBB16_115
 ; RV32I-NEXT:    j .LBB16_116
 ; RV32I-NEXT:  .LBB16_109:
-; RV32I-NEXT:    lw a7, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or a7, a7, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lw a3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, a7
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    beqz t6, .LBB16_92
 ; RV32I-NEXT:  .LBB16_110:
-; RV32I-NEXT:    mv a3, s8
-; RV32I-NEXT:    bne a5, s0, .LBB16_93
+; RV32I-NEXT:    lw a7, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, s3, .LBB16_93
 ; RV32I-NEXT:  .LBB16_111:
-; RV32I-NEXT:    or a7, s10, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lw a3, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, a7
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    beqz t6, .LBB16_94
 ; RV32I-NEXT:  .LBB16_112:
-; RV32I-NEXT:    mv a3, s5
-; RV32I-NEXT:    li t0, 3
-; RV32I-NEXT:    bne a5, t0, .LBB16_95
+; RV32I-NEXT:    mv a7, s8
+; RV32I-NEXT:    bne a5, s4, .LBB16_95
 ; RV32I-NEXT:  .LBB16_113:
-; RV32I-NEXT:    or a7, s7, a3
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    bne a5, s4, .LBB16_96
+; RV32I-NEXT:    or a3, s7, a7
+; RV32I-NEXT:    mv t0, t4
+; RV32I-NEXT:    bne a5, s5, .LBB16_96
 ; RV32I-NEXT:    j .LBB16_97
 ; RV32I-NEXT:  .LBB16_114:
-; RV32I-NEXT:    or t1, a7, t1
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, a7, a3
+; RV32I-NEXT:    li t1, 0
 ; RV32I-NEXT:    beqz t6, .LBB16_116
 ; RV32I-NEXT:  .LBB16_115:
-; RV32I-NEXT:    lw a3, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv t1, s10
 ; RV32I-NEXT:  .LBB16_116:
-; RV32I-NEXT:    li t3, 1
-; RV32I-NEXT:    beq a5, t3, .LBB16_136
+; RV32I-NEXT:    beq a5, s1, .LBB16_137
 ; RV32I-NEXT:  # %bb.117:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB16_137
+; RV32I-NEXT:    li t1, 0
+; RV32I-NEXT:    bnez t6, .LBB16_138
 ; RV32I-NEXT:  .LBB16_118:
-; RV32I-NEXT:    beq a5, s0, .LBB16_138
+; RV32I-NEXT:    beq a5, s3, .LBB16_139
 ; RV32I-NEXT:  .LBB16_119:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB16_139
+; RV32I-NEXT:    li t1, 0
+; RV32I-NEXT:    bnez t6, .LBB16_140
 ; RV32I-NEXT:  .LBB16_120:
-; RV32I-NEXT:    li t3, 3
-; RV32I-NEXT:    beq a5, t3, .LBB16_140
+; RV32I-NEXT:    beq a5, s4, .LBB16_141
 ; RV32I-NEXT:  .LBB16_121:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB16_141
+; RV32I-NEXT:    li t1, 0
+; RV32I-NEXT:    beqz t6, .LBB16_123
 ; RV32I-NEXT:  .LBB16_122:
-; RV32I-NEXT:    bne a5, s4, .LBB16_124
+; RV32I-NEXT:    mv t1, s8
 ; RV32I-NEXT:  .LBB16_123:
-; RV32I-NEXT:    or t1, s7, a3
-; RV32I-NEXT:  .LBB16_124:
-; RV32I-NEXT:    sw t2, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lbu s0, 27(a0)
-; RV32I-NEXT:    mv t3, t5
-; RV32I-NEXT:    beq a5, s11, .LBB16_126
-; RV32I-NEXT:  # %bb.125:
-; RV32I-NEXT:    mv t3, t1
-; RV32I-NEXT:  .LBB16_126:
-; RV32I-NEXT:    mv t2, s10
+; RV32I-NEXT:    li s7, 1
+; RV32I-NEXT:    li s1, 3
+; RV32I-NEXT:    bne a5, s5, .LBB16_125
+; RV32I-NEXT:  # %bb.124:
+; RV32I-NEXT:    lw a3, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, t1
+; RV32I-NEXT:  .LBB16_125:
+; RV32I-NEXT:    li s4, 2
+; RV32I-NEXT:    lbu t5, 27(a0)
+; RV32I-NEXT:    mv t1, t4
+; RV32I-NEXT:    li t3, 5
+; RV32I-NEXT:    beq a5, t3, .LBB16_127
+; RV32I-NEXT:  # %bb.126:
+; RV32I-NEXT:    mv t1, a3
+; RV32I-NEXT:  .LBB16_127:
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lbu s3, 25(a0)
+; RV32I-NEXT:    lbu t3, 26(a0)
+; RV32I-NEXT:    slli t5, t5, 8
+; RV32I-NEXT:    beq a5, s0, .LBB16_129
+; RV32I-NEXT:  # %bb.128:
+; RV32I-NEXT:    mv a3, t1
+; RV32I-NEXT:  .LBB16_129:
 ; RV32I-NEXT:    li t1, 0
-; RV32I-NEXT:    lbu t4, 25(a0)
-; RV32I-NEXT:    lbu a3, 26(a0)
-; RV32I-NEXT:    slli s0, s0, 8
-; RV32I-NEXT:    beq a5, s1, .LBB16_128
-; RV32I-NEXT:  # %bb.127:
-; RV32I-NEXT:    mv t1, t3
-; RV32I-NEXT:  .LBB16_128:
-; RV32I-NEXT:    mv s10, s8
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    lbu s1, 24(a0)
-; RV32I-NEXT:    or s0, s0, a3
-; RV32I-NEXT:    slli a3, t4, 8
-; RV32I-NEXT:    beq a5, ra, .LBB16_130
-; RV32I-NEXT:  # %bb.129:
-; RV32I-NEXT:    mv t3, t1
-; RV32I-NEXT:  .LBB16_130:
-; RV32I-NEXT:    li s8, 4
-; RV32I-NEXT:    or a3, a3, s1
-; RV32I-NEXT:    slli s0, s0, 16
+; RV32I-NEXT:    lbu s0, 24(a0)
+; RV32I-NEXT:    or t5, t5, t3
+; RV32I-NEXT:    slli s3, s3, 8
+; RV32I-NEXT:    beq a5, ra, .LBB16_131
+; RV32I-NEXT:  # %bb.130:
+; RV32I-NEXT:    mv t1, a3
+; RV32I-NEXT:  .LBB16_131:
+; RV32I-NEXT:    or t3, s3, s0
+; RV32I-NEXT:    slli t5, t5, 16
 ; RV32I-NEXT:    mv ra, t0
-; RV32I-NEXT:    beqz a1, .LBB16_132
-; RV32I-NEXT:  # %bb.131:
-; RV32I-NEXT:    mv ra, t3
-; RV32I-NEXT:  .LBB16_132:
-; RV32I-NEXT:    li s4, 5
-; RV32I-NEXT:    li t4, 0
-; RV32I-NEXT:    or t3, s0, a3
-; RV32I-NEXT:    srl t0, t0, s3
-; RV32I-NEXT:    li s1, 6
-; RV32I-NEXT:    beqz t6, .LBB16_134
-; RV32I-NEXT:  # %bb.133:
-; RV32I-NEXT:    mv t4, t0
-; RV32I-NEXT:  .LBB16_134:
-; RV32I-NEXT:    mv s11, a4
-; RV32I-NEXT:    sll t1, t3, a1
-; RV32I-NEXT:    li s0, 2
-; RV32I-NEXT:    mv a4, s7
+; RV32I-NEXT:    beqz a1, .LBB16_133
+; RV32I-NEXT:  # %bb.132:
+; RV32I-NEXT:    mv ra, t1
+; RV32I-NEXT:  .LBB16_133:
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or s3, t5, t3
+; RV32I-NEXT:    neg t1, t6
+; RV32I-NEXT:    srl t0, t0, t1
+; RV32I-NEXT:    li t5, 6
+; RV32I-NEXT:    beqz t6, .LBB16_135
+; RV32I-NEXT:  # %bb.134:
+; RV32I-NEXT:    mv a3, t0
+; RV32I-NEXT:  .LBB16_135:
+; RV32I-NEXT:    sll t1, s3, a1
 ; RV32I-NEXT:    beqz a5, .LBB16_142
-; RV32I-NEXT:  # %bb.135:
-; RV32I-NEXT:    li t4, 0
-; RV32I-NEXT:    j .LBB16_143
-; RV32I-NEXT:  .LBB16_136:
-; RV32I-NEXT:    lw t1, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t1, t1, a3
+; RV32I-NEXT:  # %bb.136:
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB16_118
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t6, .LBB16_143
+; RV32I-NEXT:    j .LBB16_144
 ; RV32I-NEXT:  .LBB16_137:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne a5, s0, .LBB16_119
+; RV32I-NEXT:    or a3, s11, t1
+; RV32I-NEXT:    li t1, 0
+; RV32I-NEXT:    beqz t6, .LBB16_118
 ; RV32I-NEXT:  .LBB16_138:
 ; RV32I-NEXT:    lw t1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t1, t1, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB16_120
+; RV32I-NEXT:    bne a5, s3, .LBB16_119
 ; RV32I-NEXT:  .LBB16_139:
-; RV32I-NEXT:    mv a3, s8
-; RV32I-NEXT:    li t3, 3
-; RV32I-NEXT:    bne a5, t3, .LBB16_121
+; RV32I-NEXT:    lw a3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, t1
+; RV32I-NEXT:    li t1, 0
+; RV32I-NEXT:    beqz t6, .LBB16_120
 ; RV32I-NEXT:  .LBB16_140:
-; RV32I-NEXT:    or t1, s10, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB16_122
+; RV32I-NEXT:    lw t1, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, s4, .LBB16_121
 ; RV32I-NEXT:  .LBB16_141:
-; RV32I-NEXT:    mv a3, s5
-; RV32I-NEXT:    beq a5, s4, .LBB16_123
-; RV32I-NEXT:    j .LBB16_124
+; RV32I-NEXT:    lw a3, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, t1
+; RV32I-NEXT:    li t1, 0
+; RV32I-NEXT:    bnez t6, .LBB16_122
+; RV32I-NEXT:    j .LBB16_123
 ; RV32I-NEXT:  .LBB16_142:
-; RV32I-NEXT:    or t4, t1, t4
+; RV32I-NEXT:    or a3, t1, a3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t6, .LBB16_144
 ; RV32I-NEXT:  .LBB16_143:
-; RV32I-NEXT:    mv s7, s5
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB16_145
-; RV32I-NEXT:  # %bb.144:
-; RV32I-NEXT:    mv a3, a6
-; RV32I-NEXT:  .LBB16_145:
-; RV32I-NEXT:    li s5, 1
-; RV32I-NEXT:    bne a5, s5, .LBB16_147
-; RV32I-NEXT:  # %bb.146:
-; RV32I-NEXT:    or t4, a7, a3
+; RV32I-NEXT:    mv t3, a6
+; RV32I-NEXT:  .LBB16_144:
+; RV32I-NEXT:    beq a5, s7, .LBB16_164
+; RV32I-NEXT:  # %bb.145:
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t6, .LBB16_165
+; RV32I-NEXT:  .LBB16_146:
+; RV32I-NEXT:    beq a5, s4, .LBB16_166
 ; RV32I-NEXT:  .LBB16_147:
-; RV32I-NEXT:    mv s5, s7
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB16_166
-; RV32I-NEXT:  # %bb.148:
-; RV32I-NEXT:    beq a5, s0, .LBB16_167
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t6, .LBB16_167
+; RV32I-NEXT:  .LBB16_148:
+; RV32I-NEXT:    beq a5, s1, .LBB16_168
 ; RV32I-NEXT:  .LBB16_149:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB16_168
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t6, .LBB16_169
 ; RV32I-NEXT:  .LBB16_150:
-; RV32I-NEXT:    li s0, 3
-; RV32I-NEXT:    beq a5, s0, .LBB16_169
+; RV32I-NEXT:    beq a5, s5, .LBB16_170
 ; RV32I-NEXT:  .LBB16_151:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB16_170
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t6, .LBB16_171
 ; RV32I-NEXT:  .LBB16_152:
-; RV32I-NEXT:    beq a5, s8, .LBB16_171
+; RV32I-NEXT:    li s0, 5
+; RV32I-NEXT:    bne a5, s0, .LBB16_154
 ; RV32I-NEXT:  .LBB16_153:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB16_172
+; RV32I-NEXT:    lw a3, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, t3
 ; RV32I-NEXT:  .LBB16_154:
-; RV32I-NEXT:    li s8, 2
-; RV32I-NEXT:    bne a5, s4, .LBB16_156
-; RV32I-NEXT:  .LBB16_155:
-; RV32I-NEXT:    or t4, a4, a3
+; RV32I-NEXT:    lbu s1, 31(a0)
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    beq a5, t5, .LBB16_156
+; RV32I-NEXT:  # %bb.155:
+; RV32I-NEXT:    mv t3, a3
 ; RV32I-NEXT:  .LBB16_156:
-; RV32I-NEXT:    lbu s0, 31(a0)
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    beq a5, s1, .LBB16_158
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lbu s0, 29(a0)
+; RV32I-NEXT:    lbu t5, 30(a0)
+; RV32I-NEXT:    slli s1, s1, 8
+; RV32I-NEXT:    li s5, 7
+; RV32I-NEXT:    beq a5, s5, .LBB16_158
 ; RV32I-NEXT:  # %bb.157:
-; RV32I-NEXT:    mv a3, t4
+; RV32I-NEXT:    mv a3, t3
 ; RV32I-NEXT:  .LBB16_158:
-; RV32I-NEXT:    li t4, 0
-; RV32I-NEXT:    lbu s5, 29(a0)
-; RV32I-NEXT:    lbu s1, 30(a0)
+; RV32I-NEXT:    lbu t3, 28(a0)
 ; RV32I-NEXT:    slli s0, s0, 8
-; RV32I-NEXT:    li s4, 7
-; RV32I-NEXT:    beq a5, s4, .LBB16_160
+; RV32I-NEXT:    or t5, s1, t5
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    beqz a1, .LBB16_160
 ; RV32I-NEXT:  # %bb.159:
-; RV32I-NEXT:    mv t4, a3
+; RV32I-NEXT:    mv a0, a3
 ; RV32I-NEXT:  .LBB16_160:
-; RV32I-NEXT:    lbu a3, 28(a0)
-; RV32I-NEXT:    slli s5, s5, 8
-; RV32I-NEXT:    or s0, s0, s1
-; RV32I-NEXT:    mv a0, t3
-; RV32I-NEXT:    beqz a1, .LBB16_162
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or t3, s0, t3
+; RV32I-NEXT:    slli t5, t5, 16
+; RV32I-NEXT:    li s0, 5
+; RV32I-NEXT:    li s1, 4
+; RV32I-NEXT:    beqz t6, .LBB16_162
 ; RV32I-NEXT:  # %bb.161:
-; RV32I-NEXT:    mv a0, t4
+; RV32I-NEXT:    neg a3, t6
+; RV32I-NEXT:    srl a3, s3, a3
 ; RV32I-NEXT:  .LBB16_162:
-; RV32I-NEXT:    li t4, 0
-; RV32I-NEXT:    or a3, s5, a3
-; RV32I-NEXT:    slli s0, s0, 16
-; RV32I-NEXT:    li s1, 6
-; RV32I-NEXT:    li s4, 4
-; RV32I-NEXT:    beqz t6, .LBB16_164
+; RV32I-NEXT:    or s3, t5, t3
+; RV32I-NEXT:    beqz a5, .LBB16_172
 ; RV32I-NEXT:  # %bb.163:
-; RV32I-NEXT:    srl t4, t3, s3
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bnez t6, .LBB16_173
+; RV32I-NEXT:    j .LBB16_174
 ; RV32I-NEXT:  .LBB16_164:
-; RV32I-NEXT:    or s3, s0, a3
-; RV32I-NEXT:    li s0, 5
-; RV32I-NEXT:    beqz a5, .LBB16_173
-; RV32I-NEXT:  # %bb.165:
+; RV32I-NEXT:    or a3, a7, t3
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB16_174
-; RV32I-NEXT:    j .LBB16_175
+; RV32I-NEXT:    beqz t6, .LBB16_146
+; RV32I-NEXT:  .LBB16_165:
+; RV32I-NEXT:    mv t3, s10
+; RV32I-NEXT:    bne a5, s4, .LBB16_147
 ; RV32I-NEXT:  .LBB16_166:
-; RV32I-NEXT:    lw a3, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne a5, s0, .LBB16_149
+; RV32I-NEXT:    or a3, s11, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t6, .LBB16_148
 ; RV32I-NEXT:  .LBB16_167:
-; RV32I-NEXT:    lw t4, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t4, t4, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB16_150
+; RV32I-NEXT:    lw t3, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, s1, .LBB16_149
 ; RV32I-NEXT:  .LBB16_168:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    li s0, 3
-; RV32I-NEXT:    bne a5, s0, .LBB16_151
+; RV32I-NEXT:    lw a3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t6, .LBB16_150
 ; RV32I-NEXT:  .LBB16_169:
-; RV32I-NEXT:    lw t4, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t4, t4, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB16_152
+; RV32I-NEXT:    lw t3, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, s5, .LBB16_151
 ; RV32I-NEXT:  .LBB16_170:
-; RV32I-NEXT:    mv a3, s10
-; RV32I-NEXT:    bne a5, s8, .LBB16_153
+; RV32I-NEXT:    lw a3, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, t3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t6, .LBB16_152
 ; RV32I-NEXT:  .LBB16_171:
-; RV32I-NEXT:    or t4, t2, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB16_154
+; RV32I-NEXT:    mv t3, s8
+; RV32I-NEXT:    li s0, 5
+; RV32I-NEXT:    beq a5, s0, .LBB16_153
+; RV32I-NEXT:    j .LBB16_154
 ; RV32I-NEXT:  .LBB16_172:
-; RV32I-NEXT:    mv a3, s5
-; RV32I-NEXT:    li s8, 2
-; RV32I-NEXT:    beq a5, s4, .LBB16_155
-; RV32I-NEXT:    j .LBB16_156
+; RV32I-NEXT:    sll t3, s3, a1
+; RV32I-NEXT:    or a3, t3, a3
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    beqz t6, .LBB16_174
 ; RV32I-NEXT:  .LBB16_173:
-; RV32I-NEXT:    sll a3, s3, a1
-; RV32I-NEXT:    or t3, a3, t4
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB16_175
+; RV32I-NEXT:    mv t3, t0
 ; RV32I-NEXT:  .LBB16_174:
-; RV32I-NEXT:    mv a3, t0
-; RV32I-NEXT:  .LBB16_175:
-; RV32I-NEXT:    li t0, 1
-; RV32I-NEXT:    beq a5, t0, .LBB16_195
-; RV32I-NEXT:  # %bb.176:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB16_196
+; RV32I-NEXT:    beq a5, s7, .LBB16_191
+; RV32I-NEXT:  # %bb.175:
+; RV32I-NEXT:    li t0, 0
+; RV32I-NEXT:    bnez t6, .LBB16_192
+; RV32I-NEXT:  .LBB16_176:
+; RV32I-NEXT:    bne a5, s4, .LBB16_178
 ; RV32I-NEXT:  .LBB16_177:
-; RV32I-NEXT:    bne a5, s8, .LBB16_179
+; RV32I-NEXT:    or a3, a7, t0
 ; RV32I-NEXT:  .LBB16_178:
-; RV32I-NEXT:    or t3, a7, a3
-; RV32I-NEXT:  .LBB16_179:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    li a6, 3
-; RV32I-NEXT:    bnez t6, .LBB16_197
-; RV32I-NEXT:  # %bb.180:
-; RV32I-NEXT:    beq a5, a6, .LBB16_198
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    li a7, 3
+; RV32I-NEXT:    bnez t6, .LBB16_193
+; RV32I-NEXT:  # %bb.179:
+; RV32I-NEXT:    beq a5, a7, .LBB16_194
+; RV32I-NEXT:  .LBB16_180:
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    bnez t6, .LBB16_195
 ; RV32I-NEXT:  .LBB16_181:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB16_199
+; RV32I-NEXT:    beq a5, s1, .LBB16_196
 ; RV32I-NEXT:  .LBB16_182:
-; RV32I-NEXT:    beq a5, s4, .LBB16_200
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    bnez t6, .LBB16_197
 ; RV32I-NEXT:  .LBB16_183:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB16_201
+; RV32I-NEXT:    bne a5, s0, .LBB16_185
 ; RV32I-NEXT:  .LBB16_184:
-; RV32I-NEXT:    bne a5, s0, .LBB16_186
+; RV32I-NEXT:    lw a3, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, a6
 ; RV32I-NEXT:  .LBB16_185:
-; RV32I-NEXT:    or t3, t2, a3
-; RV32I-NEXT:  .LBB16_186:
-; RV32I-NEXT:    lw t2, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB16_188
-; RV32I-NEXT:  # %bb.187:
-; RV32I-NEXT:    mv a3, s7
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    li a7, 6
+; RV32I-NEXT:    bnez t6, .LBB16_198
+; RV32I-NEXT:  # %bb.186:
+; RV32I-NEXT:    beq a5, a7, .LBB16_199
+; RV32I-NEXT:  .LBB16_187:
+; RV32I-NEXT:    li a6, 7
+; RV32I-NEXT:    bne a5, a6, .LBB16_200
 ; RV32I-NEXT:  .LBB16_188:
-; RV32I-NEXT:    bne a5, s1, .LBB16_190
-; RV32I-NEXT:  # %bb.189:
-; RV32I-NEXT:    or t3, a4, a3
+; RV32I-NEXT:    beqz a1, .LBB16_190
+; RV32I-NEXT:  .LBB16_189:
+; RV32I-NEXT:    mv s3, t4
 ; RV32I-NEXT:  .LBB16_190:
-; RV32I-NEXT:    mv a4, s11
-; RV32I-NEXT:    li a3, 7
-; RV32I-NEXT:    beq a5, a3, .LBB16_192
-; RV32I-NEXT:  # %bb.191:
-; RV32I-NEXT:    mv t5, t3
-; RV32I-NEXT:  .LBB16_192:
-; RV32I-NEXT:    beqz a1, .LBB16_194
-; RV32I-NEXT:  # %bb.193:
-; RV32I-NEXT:    mv s3, t5
-; RV32I-NEXT:  .LBB16_194:
 ; RV32I-NEXT:    srli a1, a4, 16
 ; RV32I-NEXT:    lui a7, 16
 ; RV32I-NEXT:    srli a6, a4, 24
@@ -8334,34 +8377,45 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
 ; RV32I-NEXT:    lw s11, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 80
 ; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB16_191:
+; RV32I-NEXT:    or a3, t1, t3
+; RV32I-NEXT:    li t0, 0
+; RV32I-NEXT:    beqz t6, .LBB16_176
+; RV32I-NEXT:  .LBB16_192:
+; RV32I-NEXT:    mv t0, a6
+; RV32I-NEXT:    beq a5, s4, .LBB16_177
+; RV32I-NEXT:    j .LBB16_178
+; RV32I-NEXT:  .LBB16_193:
+; RV32I-NEXT:    mv a6, s10
+; RV32I-NEXT:    bne a5, a7, .LBB16_180
+; RV32I-NEXT:  .LBB16_194:
+; RV32I-NEXT:    or a3, s11, a6
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    beqz t6, .LBB16_181
 ; RV32I-NEXT:  .LBB16_195:
-; RV32I-NEXT:    or t3, t1, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB16_177
+; RV32I-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, s1, .LBB16_182
 ; RV32I-NEXT:  .LBB16_196:
-; RV32I-NEXT:    mv a3, a6
-; RV32I-NEXT:    beq a5, s8, .LBB16_178
-; RV32I-NEXT:    j .LBB16_179
+; RV32I-NEXT:    lw a3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, a6
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    beqz t6, .LBB16_183
 ; RV32I-NEXT:  .LBB16_197:
-; RV32I-NEXT:    lw a3, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne a5, a6, .LBB16_181
+; RV32I-NEXT:    lw a6, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    beq a5, s0, .LBB16_184
+; RV32I-NEXT:    j .LBB16_185
 ; RV32I-NEXT:  .LBB16_198:
-; RV32I-NEXT:    lw a6, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t3, a6, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB16_182
+; RV32I-NEXT:    mv a6, s8
+; RV32I-NEXT:    bne a5, a7, .LBB16_187
 ; RV32I-NEXT:  .LBB16_199:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne a5, s4, .LBB16_183
+; RV32I-NEXT:    lw a3, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, a6
+; RV32I-NEXT:    li a6, 7
+; RV32I-NEXT:    beq a5, a6, .LBB16_188
 ; RV32I-NEXT:  .LBB16_200:
-; RV32I-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t3, a6, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB16_184
-; RV32I-NEXT:  .LBB16_201:
-; RV32I-NEXT:    mv a3, s10
-; RV32I-NEXT:    beq a5, s0, .LBB16_185
-; RV32I-NEXT:    j .LBB16_186
+; RV32I-NEXT:    mv t4, a3
+; RV32I-NEXT:    bnez a1, .LBB16_189
+; RV32I-NEXT:    j .LBB16_190
   %src = load i256, ptr %src.ptr, align 1
   %wordOff = load i256, ptr %wordOff.ptr, align 1
   %bitOff = shl i256 %wordOff, 5
@@ -8709,653 +8763,658 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
 ; RV32I-NEXT:    or a1, a1, t0
 ; RV32I-NEXT:    slli a5, a5, 16
 ; RV32I-NEXT:    slli a1, a1, 16
-; RV32I-NEXT:    or a6, a5, a3
+; RV32I-NEXT:    or a3, a5, a3
 ; RV32I-NEXT:    or a1, a1, a4
 ; RV32I-NEXT:    slli a1, a1, 6
 ; RV32I-NEXT:    srli a5, a1, 5
-; RV32I-NEXT:    sll t5, a6, a1
-; RV32I-NEXT:    li s5, 1
-; RV32I-NEXT:    mv a4, t5
+; RV32I-NEXT:    sll t4, a3, a1
+; RV32I-NEXT:    li s1, 1
+; RV32I-NEXT:    mv a6, t4
 ; RV32I-NEXT:    beqz a5, .LBB17_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    li a4, 0
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:  .LBB17_2:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    li s0, 2
-; RV32I-NEXT:    beq a5, s5, .LBB17_4
+; RV32I-NEXT:    li a4, 0
+; RV32I-NEXT:    li s3, 2
+; RV32I-NEXT:    beq a5, s1, .LBB17_4
 ; RV32I-NEXT:  # %bb.3:
-; RV32I-NEXT:    mv a3, a4
+; RV32I-NEXT:    mv a4, a6
 ; RV32I-NEXT:  .LBB17_4:
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    beq a5, s0, .LBB17_6
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    beq a5, s3, .LBB17_6
 ; RV32I-NEXT:  # %bb.5:
-; RV32I-NEXT:    mv a7, a3
+; RV32I-NEXT:    mv a6, a4
 ; RV32I-NEXT:  .LBB17_6:
 ; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    li s8, 3
-; RV32I-NEXT:    li s10, 4
-; RV32I-NEXT:    beq a5, s8, .LBB17_8
+; RV32I-NEXT:    li s4, 3
+; RV32I-NEXT:    li s5, 4
+; RV32I-NEXT:    beq a5, s4, .LBB17_8
 ; RV32I-NEXT:  # %bb.7:
-; RV32I-NEXT:    mv a4, a7
+; RV32I-NEXT:    mv a4, a6
 ; RV32I-NEXT:  .LBB17_8:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    li s1, 5
-; RV32I-NEXT:    beq a5, s10, .LBB17_10
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    li s9, 5
+; RV32I-NEXT:    beq a5, s5, .LBB17_10
 ; RV32I-NEXT:  # %bb.9:
-; RV32I-NEXT:    mv t0, a4
+; RV32I-NEXT:    mv a7, a4
 ; RV32I-NEXT:  .LBB17_10:
-; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    lbu t1, 7(a0)
-; RV32I-NEXT:    li s6, 6
-; RV32I-NEXT:    beq a5, s1, .LBB17_12
+; RV32I-NEXT:    li s0, 6
+; RV32I-NEXT:    beq a5, s9, .LBB17_12
 ; RV32I-NEXT:  # %bb.11:
-; RV32I-NEXT:    mv a7, t0
+; RV32I-NEXT:    mv a6, a7
 ; RV32I-NEXT:  .LBB17_12:
 ; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    lbu t0, 5(a0)
-; RV32I-NEXT:    lbu a3, 6(a0)
+; RV32I-NEXT:    lbu a7, 5(a0)
+; RV32I-NEXT:    lbu t0, 6(a0)
 ; RV32I-NEXT:    slli t2, t1, 8
-; RV32I-NEXT:    beq a5, s6, .LBB17_14
+; RV32I-NEXT:    beq a5, s0, .LBB17_14
 ; RV32I-NEXT:  # %bb.13:
-; RV32I-NEXT:    mv a4, a7
+; RV32I-NEXT:    mv a4, a6
 ; RV32I-NEXT:  .LBB17_14:
-; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    lbu t1, 4(a0)
-; RV32I-NEXT:    or t2, t2, a3
+; RV32I-NEXT:    or t2, t2, t0
 ; RV32I-NEXT:    li ra, 7
-; RV32I-NEXT:    slli t0, t0, 8
+; RV32I-NEXT:    slli a7, a7, 8
 ; RV32I-NEXT:    beq a5, ra, .LBB17_16
 ; RV32I-NEXT:  # %bb.15:
-; RV32I-NEXT:    mv a7, a4
+; RV32I-NEXT:    mv a6, a4
 ; RV32I-NEXT:  .LBB17_16:
-; RV32I-NEXT:    or a3, t0, t1
+; RV32I-NEXT:    or t0, a7, t1
 ; RV32I-NEXT:    slli t2, t2, 16
 ; RV32I-NEXT:    andi t6, a1, 31
-; RV32I-NEXT:    mv a4, a6
+; RV32I-NEXT:    mv a4, a3
 ; RV32I-NEXT:    beqz a1, .LBB17_18
 ; RV32I-NEXT:  # %bb.17:
-; RV32I-NEXT:    mv a4, a7
+; RV32I-NEXT:    mv a4, a6
 ; RV32I-NEXT:  .LBB17_18:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    or a7, t2, a3
-; RV32I-NEXT:    neg s3, t6
-; RV32I-NEXT:    srl s4, a6, s3
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    or a6, t2, t0
+; RV32I-NEXT:    neg s10, t6
+; RV32I-NEXT:    srl s8, a3, s10
 ; RV32I-NEXT:    beqz t6, .LBB17_20
 ; RV32I-NEXT:  # %bb.19:
-; RV32I-NEXT:    mv t0, s4
+; RV32I-NEXT:    mv a7, s8
 ; RV32I-NEXT:  .LBB17_20:
-; RV32I-NEXT:    sll s9, a7, a1
+; RV32I-NEXT:    sll s7, a6, a1
 ; RV32I-NEXT:    beqz a5, .LBB17_22
 ; RV32I-NEXT:  # %bb.21:
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    mv a6, t5
-; RV32I-NEXT:    bne a5, s5, .LBB17_23
+; RV32I-NEXT:    mv a7, t4
+; RV32I-NEXT:    bne a5, s1, .LBB17_23
 ; RV32I-NEXT:    j .LBB17_24
 ; RV32I-NEXT:  .LBB17_22:
-; RV32I-NEXT:    or a3, s9, t0
-; RV32I-NEXT:    mv a6, t5
-; RV32I-NEXT:    beq a5, s5, .LBB17_24
+; RV32I-NEXT:    or a3, s7, a7
+; RV32I-NEXT:    mv a7, t4
+; RV32I-NEXT:    beq a5, s1, .LBB17_24
 ; RV32I-NEXT:  .LBB17_23:
-; RV32I-NEXT:    mv a6, a3
+; RV32I-NEXT:    mv a7, a3
 ; RV32I-NEXT:  .LBB17_24:
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bne a5, s0, .LBB17_40
+; RV32I-NEXT:    bne a5, s3, .LBB17_40
 ; RV32I-NEXT:  # %bb.25:
-; RV32I-NEXT:    li a6, 0
-; RV32I-NEXT:    bne a5, s8, .LBB17_41
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    bne a5, s4, .LBB17_41
 ; RV32I-NEXT:  .LBB17_26:
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beq a5, s10, .LBB17_28
+; RV32I-NEXT:    beq a5, s5, .LBB17_28
 ; RV32I-NEXT:  .LBB17_27:
-; RV32I-NEXT:    mv a3, a6
+; RV32I-NEXT:    mv a3, a7
 ; RV32I-NEXT:  .LBB17_28:
 ; RV32I-NEXT:    lbu t2, 11(a0)
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    beq a5, s1, .LBB17_30
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    beq a5, s9, .LBB17_30
 ; RV32I-NEXT:  # %bb.29:
-; RV32I-NEXT:    mv t0, a3
+; RV32I-NEXT:    mv a7, a3
 ; RV32I-NEXT:  .LBB17_30:
-; RV32I-NEXT:    li a6, 0
-; RV32I-NEXT:    lbu t1, 9(a0)
-; RV32I-NEXT:    lbu a3, 10(a0)
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lbu t0, 9(a0)
+; RV32I-NEXT:    lbu t1, 10(a0)
 ; RV32I-NEXT:    slli t2, t2, 8
-; RV32I-NEXT:    beq a5, s6, .LBB17_32
+; RV32I-NEXT:    beq a5, s0, .LBB17_32
 ; RV32I-NEXT:  # %bb.31:
-; RV32I-NEXT:    mv a6, t0
+; RV32I-NEXT:    mv a3, a7
 ; RV32I-NEXT:  .LBB17_32:
-; RV32I-NEXT:    li t0, 0
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    lbu t3, 8(a0)
-; RV32I-NEXT:    or t2, t2, a3
-; RV32I-NEXT:    slli t1, t1, 8
+; RV32I-NEXT:    or t1, t2, t1
+; RV32I-NEXT:    slli t0, t0, 8
 ; RV32I-NEXT:    beq a5, ra, .LBB17_34
 ; RV32I-NEXT:  # %bb.33:
-; RV32I-NEXT:    mv t0, a6
+; RV32I-NEXT:    mv a7, a3
 ; RV32I-NEXT:  .LBB17_34:
-; RV32I-NEXT:    or a3, t1, t3
-; RV32I-NEXT:    slli a6, t2, 16
-; RV32I-NEXT:    mv t2, a7
+; RV32I-NEXT:    or a3, t0, t3
+; RV32I-NEXT:    slli t1, t1, 16
+; RV32I-NEXT:    mv t2, a6
 ; RV32I-NEXT:    beqz a1, .LBB17_36
 ; RV32I-NEXT:  # %bb.35:
-; RV32I-NEXT:    mv t2, t0
+; RV32I-NEXT:    mv t2, a7
 ; RV32I-NEXT:  .LBB17_36:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    or a6, a6, a3
-; RV32I-NEXT:    srl a3, a7, s3
-; RV32I-NEXT:    sw a3, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    or a3, t1, a3
+; RV32I-NEXT:    srl a6, a6, s10
+; RV32I-NEXT:    sw a6, 24(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    beqz t6, .LBB17_38
 ; RV32I-NEXT:  # %bb.37:
-; RV32I-NEXT:    lw t0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a7, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:  .LBB17_38:
-; RV32I-NEXT:    sll s7, a6, a1
+; RV32I-NEXT:    sll s6, a3, a1
 ; RV32I-NEXT:    beqz a5, .LBB17_42
 ; RV32I-NEXT:  # %bb.39:
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    bnez t6, .LBB17_43
 ; RV32I-NEXT:    j .LBB17_44
 ; RV32I-NEXT:  .LBB17_40:
-; RV32I-NEXT:    mv a3, a6
-; RV32I-NEXT:    li a6, 0
-; RV32I-NEXT:    beq a5, s8, .LBB17_26
+; RV32I-NEXT:    mv a3, a7
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    beq a5, s4, .LBB17_26
 ; RV32I-NEXT:  .LBB17_41:
-; RV32I-NEXT:    mv a6, a3
+; RV32I-NEXT:    mv a7, a3
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bne a5, s10, .LBB17_27
+; RV32I-NEXT:    bne a5, s5, .LBB17_27
 ; RV32I-NEXT:    j .LBB17_28
 ; RV32I-NEXT:  .LBB17_42:
-; RV32I-NEXT:    or a7, s7, t0
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a6, s6, a7
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_44
 ; RV32I-NEXT:  .LBB17_43:
-; RV32I-NEXT:    mv a3, s4
+; RV32I-NEXT:    mv a7, s8
 ; RV32I-NEXT:  .LBB17_44:
-; RV32I-NEXT:    beq a5, s5, .LBB17_61
+; RV32I-NEXT:    beq a5, s1, .LBB17_61
 ; RV32I-NEXT:  # %bb.45:
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    bne a5, s0, .LBB17_62
+; RV32I-NEXT:    mv a7, t4
+; RV32I-NEXT:    bne a5, s3, .LBB17_62
 ; RV32I-NEXT:  .LBB17_46:
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    bne a5, s8, .LBB17_63
+; RV32I-NEXT:    li t0, 0
+; RV32I-NEXT:    bne a5, s4, .LBB17_63
 ; RV32I-NEXT:  .LBB17_47:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beq a5, s10, .LBB17_49
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    beq a5, s5, .LBB17_49
 ; RV32I-NEXT:  .LBB17_48:
-; RV32I-NEXT:    mv a3, a7
+; RV32I-NEXT:    mv a6, t0
 ; RV32I-NEXT:  .LBB17_49:
 ; RV32I-NEXT:    lbu t3, 15(a0)
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    beq a5, s1, .LBB17_51
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    beq a5, s9, .LBB17_51
 ; RV32I-NEXT:  # %bb.50:
-; RV32I-NEXT:    mv t0, a3
+; RV32I-NEXT:    mv a7, a6
 ; RV32I-NEXT:  .LBB17_51:
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    lbu t1, 13(a0)
-; RV32I-NEXT:    lbu a3, 14(a0)
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    lbu t0, 13(a0)
+; RV32I-NEXT:    lbu t1, 14(a0)
 ; RV32I-NEXT:    slli t3, t3, 8
-; RV32I-NEXT:    beq a5, s6, .LBB17_53
+; RV32I-NEXT:    beq a5, s0, .LBB17_53
 ; RV32I-NEXT:  # %bb.52:
-; RV32I-NEXT:    mv a7, t0
+; RV32I-NEXT:    mv a6, a7
 ; RV32I-NEXT:  .LBB17_53:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    lbu t4, 12(a0)
-; RV32I-NEXT:    or t3, t3, a3
-; RV32I-NEXT:    slli t1, t1, 8
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    lbu t5, 12(a0)
+; RV32I-NEXT:    or t1, t3, t1
+; RV32I-NEXT:    slli t0, t0, 8
 ; RV32I-NEXT:    beq a5, ra, .LBB17_55
 ; RV32I-NEXT:  # %bb.54:
-; RV32I-NEXT:    mv t0, a7
+; RV32I-NEXT:    mv a7, a6
 ; RV32I-NEXT:  .LBB17_55:
-; RV32I-NEXT:    or a3, t1, t4
-; RV32I-NEXT:    slli t3, t3, 16
-; RV32I-NEXT:    mv s2, a6
+; RV32I-NEXT:    or t0, t0, t5
+; RV32I-NEXT:    slli t1, t1, 16
+; RV32I-NEXT:    mv s2, a3
 ; RV32I-NEXT:    beqz a1, .LBB17_57
 ; RV32I-NEXT:  # %bb.56:
-; RV32I-NEXT:    mv s2, t0
+; RV32I-NEXT:    mv s2, a7
 ; RV32I-NEXT:  .LBB17_57:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    or a7, t3, a3
-; RV32I-NEXT:    srl a3, a6, s3
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    or a7, t1, t0
+; RV32I-NEXT:    srl a3, a3, s10
 ; RV32I-NEXT:    sw a3, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    beqz t6, .LBB17_59
 ; RV32I-NEXT:  # %bb.58:
-; RV32I-NEXT:    lw t0, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:  .LBB17_59:
 ; RV32I-NEXT:    sll a3, a7, a1
-; RV32I-NEXT:    sw a3, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    beqz a5, .LBB17_64
 ; RV32I-NEXT:  # %bb.60:
-; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    bnez t6, .LBB17_65
 ; RV32I-NEXT:    j .LBB17_66
 ; RV32I-NEXT:  .LBB17_61:
-; RV32I-NEXT:    or a7, s9, a3
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    beq a5, s0, .LBB17_46
+; RV32I-NEXT:    or a6, s7, a7
+; RV32I-NEXT:    mv a7, t4
+; RV32I-NEXT:    beq a5, s3, .LBB17_46
 ; RV32I-NEXT:  .LBB17_62:
-; RV32I-NEXT:    mv a3, a7
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    beq a5, s8, .LBB17_47
+; RV32I-NEXT:    mv a7, a6
+; RV32I-NEXT:    li t0, 0
+; RV32I-NEXT:    beq a5, s4, .LBB17_47
 ; RV32I-NEXT:  .LBB17_63:
-; RV32I-NEXT:    mv a7, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bne a5, s10, .LBB17_48
+; RV32I-NEXT:    mv t0, a7
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    bne a5, s5, .LBB17_48
 ; RV32I-NEXT:    j .LBB17_49
 ; RV32I-NEXT:  .LBB17_64:
-; RV32I-NEXT:    or a6, a3, t0
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, a3, a6
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_66
 ; RV32I-NEXT:  .LBB17_65:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a6, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:  .LBB17_66:
-; RV32I-NEXT:    beq a5, s5, .LBB17_84
+; RV32I-NEXT:    beq a5, s1, .LBB17_84
 ; RV32I-NEXT:  # %bb.67:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    bnez t6, .LBB17_85
 ; RV32I-NEXT:  .LBB17_68:
-; RV32I-NEXT:    beq a5, s0, .LBB17_86
+; RV32I-NEXT:    beq a5, s3, .LBB17_86
 ; RV32I-NEXT:  .LBB17_69:
-; RV32I-NEXT:    mv t0, t5
-; RV32I-NEXT:    bne a5, s8, .LBB17_87
+; RV32I-NEXT:    mv a6, t4
+; RV32I-NEXT:    bne a5, s4, .LBB17_87
 ; RV32I-NEXT:  .LBB17_70:
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beq a5, s10, .LBB17_72
+; RV32I-NEXT:    beq a5, s5, .LBB17_72
 ; RV32I-NEXT:  .LBB17_71:
-; RV32I-NEXT:    mv a3, t0
+; RV32I-NEXT:    mv a3, a6
 ; RV32I-NEXT:  .LBB17_72:
 ; RV32I-NEXT:    lbu t3, 19(a0)
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    beq a5, s1, .LBB17_74
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    beq a5, s9, .LBB17_74
 ; RV32I-NEXT:  # %bb.73:
-; RV32I-NEXT:    mv t0, a3
+; RV32I-NEXT:    mv a6, a3
 ; RV32I-NEXT:  .LBB17_74:
-; RV32I-NEXT:    li a6, 0
-; RV32I-NEXT:    lbu t1, 17(a0)
-; RV32I-NEXT:    lbu a3, 18(a0)
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lbu t0, 17(a0)
+; RV32I-NEXT:    lbu t1, 18(a0)
 ; RV32I-NEXT:    slli t3, t3, 8
-; RV32I-NEXT:    beq a5, s6, .LBB17_76
+; RV32I-NEXT:    beq a5, s0, .LBB17_76
 ; RV32I-NEXT:  # %bb.75:
-; RV32I-NEXT:    mv a6, t0
+; RV32I-NEXT:    mv a3, a6
 ; RV32I-NEXT:  .LBB17_76:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    lbu t4, 16(a0)
-; RV32I-NEXT:    or t3, t3, a3
-; RV32I-NEXT:    slli t1, t1, 8
+; RV32I-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    lbu t5, 16(a0)
+; RV32I-NEXT:    or t1, t3, t1
+; RV32I-NEXT:    slli t0, t0, 8
 ; RV32I-NEXT:    beq a5, ra, .LBB17_78
 ; RV32I-NEXT:  # %bb.77:
-; RV32I-NEXT:    mv t0, a6
+; RV32I-NEXT:    mv a6, a3
 ; RV32I-NEXT:  .LBB17_78:
-; RV32I-NEXT:    or a3, t1, t4
-; RV32I-NEXT:    slli t3, t3, 16
+; RV32I-NEXT:    or t0, t0, t5
+; RV32I-NEXT:    slli t1, t1, 16
 ; RV32I-NEXT:    mv s6, a7
 ; RV32I-NEXT:    beqz a1, .LBB17_80
 ; RV32I-NEXT:  # %bb.79:
-; RV32I-NEXT:    mv s6, t0
+; RV32I-NEXT:    mv s6, a6
 ; RV32I-NEXT:  .LBB17_80:
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    or a6, t3, a3
-; RV32I-NEXT:    srl s10, a7, s3
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a6, t1, t0
+; RV32I-NEXT:    srl s10, a7, s10
 ; RV32I-NEXT:    beqz t6, .LBB17_82
 ; RV32I-NEXT:  # %bb.81:
-; RV32I-NEXT:    mv t0, s10
+; RV32I-NEXT:    mv a3, s10
 ; RV32I-NEXT:  .LBB17_82:
 ; RV32I-NEXT:    sll s11, a6, a1
 ; RV32I-NEXT:    beqz a5, .LBB17_88
 ; RV32I-NEXT:  # %bb.83:
-; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    bnez t6, .LBB17_89
 ; RV32I-NEXT:    j .LBB17_90
 ; RV32I-NEXT:  .LBB17_84:
-; RV32I-NEXT:    or a6, s7, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, s6, a6
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_68
 ; RV32I-NEXT:  .LBB17_85:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    bne a5, s0, .LBB17_69
+; RV32I-NEXT:    mv a6, s8
+; RV32I-NEXT:    bne a5, s3, .LBB17_69
 ; RV32I-NEXT:  .LBB17_86:
-; RV32I-NEXT:    or a6, s9, a3
-; RV32I-NEXT:    mv t0, t5
-; RV32I-NEXT:    beq a5, s8, .LBB17_70
+; RV32I-NEXT:    or a3, s7, a6
+; RV32I-NEXT:    mv a6, t4
+; RV32I-NEXT:    beq a5, s4, .LBB17_70
 ; RV32I-NEXT:  .LBB17_87:
-; RV32I-NEXT:    mv t0, a6
+; RV32I-NEXT:    mv a6, a3
 ; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bne a5, s10, .LBB17_71
+; RV32I-NEXT:    bne a5, s5, .LBB17_71
 ; RV32I-NEXT:    j .LBB17_72
 ; RV32I-NEXT:  .LBB17_88:
-; RV32I-NEXT:    or a7, s11, t0
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, s11, a3
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_90
 ; RV32I-NEXT:  .LBB17_89:
-; RV32I-NEXT:    lw a3, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a7, 20(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:  .LBB17_90:
-; RV32I-NEXT:    beq a5, s5, .LBB17_110
+; RV32I-NEXT:    beq a5, s1, .LBB17_109
 ; RV32I-NEXT:  # %bb.91:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB17_111
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    bnez t6, .LBB17_110
 ; RV32I-NEXT:  .LBB17_92:
-; RV32I-NEXT:    beq a5, s0, .LBB17_112
+; RV32I-NEXT:    beq a5, s3, .LBB17_111
 ; RV32I-NEXT:  .LBB17_93:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB17_113
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    bnez t6, .LBB17_112
 ; RV32I-NEXT:  .LBB17_94:
-; RV32I-NEXT:    bne a5, s8, .LBB17_96
+; RV32I-NEXT:    beq a5, s4, .LBB17_113
 ; RV32I-NEXT:  .LBB17_95:
-; RV32I-NEXT:    or a7, s9, a3
+; RV32I-NEXT:    mv t0, t4
+; RV32I-NEXT:    beq a5, s5, .LBB17_97
 ; RV32I-NEXT:  .LBB17_96:
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    li t0, 4
-; RV32I-NEXT:    beq a5, t0, .LBB17_98
-; RV32I-NEXT:  # %bb.97:
-; RV32I-NEXT:    mv a3, a7
-; RV32I-NEXT:  .LBB17_98:
-; RV32I-NEXT:    lbu t3, 23(a0)
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    beq a5, s1, .LBB17_100
-; RV32I-NEXT:  # %bb.99:
 ; RV32I-NEXT:    mv t0, a3
-; RV32I-NEXT:  .LBB17_100:
+; RV32I-NEXT:  .LBB17_97:
+; RV32I-NEXT:    lbu t3, 23(a0)
 ; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    lbu t1, 21(a0)
-; RV32I-NEXT:    lbu a3, 22(a0)
-; RV32I-NEXT:    slli t3, t3, 8
-; RV32I-NEXT:    li t4, 6
-; RV32I-NEXT:    beq a5, t4, .LBB17_102
-; RV32I-NEXT:  # %bb.101:
+; RV32I-NEXT:    beq a5, s9, .LBB17_99
+; RV32I-NEXT:  # %bb.98:
 ; RV32I-NEXT:    mv a7, t0
-; RV32I-NEXT:  .LBB17_102:
-; RV32I-NEXT:    sw s9, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s4, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    li t0, 0
-; RV32I-NEXT:    lbu t4, 20(a0)
-; RV32I-NEXT:    or t3, t3, a3
-; RV32I-NEXT:    slli t1, t1, 8
-; RV32I-NEXT:    beq a5, ra, .LBB17_104
-; RV32I-NEXT:  # %bb.103:
-; RV32I-NEXT:    mv t0, a7
-; RV32I-NEXT:  .LBB17_104:
-; RV32I-NEXT:    or a3, t1, t4
-; RV32I-NEXT:    slli t3, t3, 16
+; RV32I-NEXT:  .LBB17_99:
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lbu t0, 21(a0)
+; RV32I-NEXT:    lbu t1, 22(a0)
+; RV32I-NEXT:    slli t3, t3, 8
+; RV32I-NEXT:    beq a5, s0, .LBB17_101
+; RV32I-NEXT:  # %bb.100:
+; RV32I-NEXT:    mv a3, a7
+; RV32I-NEXT:  .LBB17_101:
+; RV32I-NEXT:    sw s7, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a7, 0
+; RV32I-NEXT:    lbu t5, 20(a0)
+; RV32I-NEXT:    or t1, t3, t1
+; RV32I-NEXT:    slli t0, t0, 8
+; RV32I-NEXT:    beq a5, ra, .LBB17_103
+; RV32I-NEXT:  # %bb.102:
+; RV32I-NEXT:    mv a7, a3
+; RV32I-NEXT:  .LBB17_103:
+; RV32I-NEXT:    or t0, t0, t5
+; RV32I-NEXT:    slli t1, t1, 16
 ; RV32I-NEXT:    mv s9, a6
-; RV32I-NEXT:    beqz a1, .LBB17_106
-; RV32I-NEXT:  # %bb.105:
-; RV32I-NEXT:    mv s9, t0
-; RV32I-NEXT:  .LBB17_106:
-; RV32I-NEXT:    li t1, 0
-; RV32I-NEXT:    or t0, t3, a3
-; RV32I-NEXT:    srl a6, a6, s3
-; RV32I-NEXT:    beqz t6, .LBB17_108
-; RV32I-NEXT:  # %bb.107:
-; RV32I-NEXT:    mv t1, a6
-; RV32I-NEXT:  .LBB17_108:
+; RV32I-NEXT:    beqz a1, .LBB17_105
+; RV32I-NEXT:  # %bb.104:
+; RV32I-NEXT:    mv s9, a7
+; RV32I-NEXT:  .LBB17_105:
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or t0, t1, t0
+; RV32I-NEXT:    neg a7, t6
+; RV32I-NEXT:    srl a6, a6, a7
+; RV32I-NEXT:    beqz t6, .LBB17_107
+; RV32I-NEXT:  # %bb.106:
+; RV32I-NEXT:    mv a3, a6
+; RV32I-NEXT:  .LBB17_107:
 ; RV32I-NEXT:    sll a7, t0, a1
 ; RV32I-NEXT:    beqz a5, .LBB17_114
-; RV32I-NEXT:  # %bb.109:
-; RV32I-NEXT:    li t1, 0
+; RV32I-NEXT:  # %bb.108:
 ; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li t1, 0
 ; RV32I-NEXT:    bnez t6, .LBB17_115
 ; RV32I-NEXT:    j .LBB17_116
-; RV32I-NEXT:  .LBB17_110:
-; RV32I-NEXT:    lw a7, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or a7, a7, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:  .LBB17_109:
+; RV32I-NEXT:    lw a3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, a7
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_92
+; RV32I-NEXT:  .LBB17_110:
+; RV32I-NEXT:    lw a7, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, s3, .LBB17_93
 ; RV32I-NEXT:  .LBB17_111:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne a5, s0, .LBB17_93
-; RV32I-NEXT:  .LBB17_112:
-; RV32I-NEXT:    or a7, s7, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lw a3, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, a7
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_94
+; RV32I-NEXT:  .LBB17_112:
+; RV32I-NEXT:    mv a7, s8
+; RV32I-NEXT:    bne a5, s4, .LBB17_95
 ; RV32I-NEXT:  .LBB17_113:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    beq a5, s8, .LBB17_95
-; RV32I-NEXT:    j .LBB17_96
+; RV32I-NEXT:    or a3, s7, a7
+; RV32I-NEXT:    mv t0, t4
+; RV32I-NEXT:    bne a5, s5, .LBB17_96
+; RV32I-NEXT:    j .LBB17_97
 ; RV32I-NEXT:  .LBB17_114:
-; RV32I-NEXT:    or t1, a7, t1
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, a7, a3
+; RV32I-NEXT:    li t1, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_116
 ; RV32I-NEXT:  .LBB17_115:
-; RV32I-NEXT:    mv a3, s10
+; RV32I-NEXT:    mv t1, s10
 ; RV32I-NEXT:  .LBB17_116:
-; RV32I-NEXT:    beq a5, s5, .LBB17_138
+; RV32I-NEXT:    beq a5, s1, .LBB17_137
 ; RV32I-NEXT:  # %bb.117:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB17_139
+; RV32I-NEXT:    li t1, 0
+; RV32I-NEXT:    bnez t6, .LBB17_138
 ; RV32I-NEXT:  .LBB17_118:
-; RV32I-NEXT:    beq a5, s0, .LBB17_140
+; RV32I-NEXT:    beq a5, s3, .LBB17_139
 ; RV32I-NEXT:  .LBB17_119:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t6, .LBB17_141
+; RV32I-NEXT:    li t1, 0
+; RV32I-NEXT:    bnez t6, .LBB17_140
 ; RV32I-NEXT:  .LBB17_120:
-; RV32I-NEXT:    bne a5, s8, .LBB17_122
+; RV32I-NEXT:    beq a5, s4, .LBB17_141
 ; RV32I-NEXT:  .LBB17_121:
-; RV32I-NEXT:    or t1, s7, a3
+; RV32I-NEXT:    li t1, 0
+; RV32I-NEXT:    beqz t6, .LBB17_123
 ; RV32I-NEXT:  .LBB17_122:
-; RV32I-NEXT:    li s4, 1
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t6, .LBB17_124
-; RV32I-NEXT:  # %bb.123:
-; RV32I-NEXT:    lw a3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:  .LBB17_124:
-; RV32I-NEXT:    li s5, 3
-; RV32I-NEXT:    li s8, 2
-; RV32I-NEXT:    li t3, 4
-; RV32I-NEXT:    bne a5, t3, .LBB17_126
-; RV32I-NEXT:  # %bb.125:
-; RV32I-NEXT:    lw t1, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t1, t1, a3
-; RV32I-NEXT:  .LBB17_126:
-; RV32I-NEXT:    lbu s0, 27(a0)
-; RV32I-NEXT:    mv t3, t5
-; RV32I-NEXT:    beq a5, s1, .LBB17_128
-; RV32I-NEXT:  # %bb.127:
-; RV32I-NEXT:    mv t3, t1
-; RV32I-NEXT:  .LBB17_128:
+; RV32I-NEXT:    mv t1, s8
+; RV32I-NEXT:  .LBB17_123:
+; RV32I-NEXT:    li s7, 1
+; RV32I-NEXT:    li s1, 3
+; RV32I-NEXT:    bne a5, s5, .LBB17_125
+; RV32I-NEXT:  # %bb.124:
+; RV32I-NEXT:    lw a3, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, t1
+; RV32I-NEXT:  .LBB17_125:
+; RV32I-NEXT:    li s4, 2
+; RV32I-NEXT:    lbu t5, 27(a0)
+; RV32I-NEXT:    mv t1, t4
+; RV32I-NEXT:    li t3, 5
+; RV32I-NEXT:    beq a5, t3, .LBB17_127
+; RV32I-NEXT:  # %bb.126:
+; RV32I-NEXT:    mv t1, a3
+; RV32I-NEXT:  .LBB17_127:
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lbu s3, 25(a0)
+; RV32I-NEXT:    lbu t3, 26(a0)
+; RV32I-NEXT:    slli t5, t5, 8
+; RV32I-NEXT:    beq a5, s0, .LBB17_129
+; RV32I-NEXT:  # %bb.128:
+; RV32I-NEXT:    mv a3, t1
+; RV32I-NEXT:  .LBB17_129:
 ; RV32I-NEXT:    li t1, 0
-; RV32I-NEXT:    lbu t4, 25(a0)
-; RV32I-NEXT:    lbu a3, 26(a0)
-; RV32I-NEXT:    slli s0, s0, 8
-; RV32I-NEXT:    li s1, 6
-; RV32I-NEXT:    beq a5, s1, .LBB17_130
-; RV32I-NEXT:  # %bb.129:
-; RV32I-NEXT:    mv t1, t3
-; RV32I-NEXT:  .LBB17_130:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    lbu s1, 24(a0)
-; RV32I-NEXT:    or s0, s0, a3
-; RV32I-NEXT:    slli a3, t4, 8
-; RV32I-NEXT:    beq a5, ra, .LBB17_132
-; RV32I-NEXT:  # %bb.131:
-; RV32I-NEXT:    mv t3, t1
-; RV32I-NEXT:  .LBB17_132:
-; RV32I-NEXT:    or a3, a3, s1
-; RV32I-NEXT:    slli s0, s0, 16
+; RV32I-NEXT:    lbu s0, 24(a0)
+; RV32I-NEXT:    or t5, t5, t3
+; RV32I-NEXT:    slli s3, s3, 8
+; RV32I-NEXT:    beq a5, ra, .LBB17_131
+; RV32I-NEXT:  # %bb.130:
+; RV32I-NEXT:    mv t1, a3
+; RV32I-NEXT:  .LBB17_131:
+; RV32I-NEXT:    or t3, s3, s0
+; RV32I-NEXT:    slli t5, t5, 16
 ; RV32I-NEXT:    mv ra, t0
-; RV32I-NEXT:    beqz a1, .LBB17_134
-; RV32I-NEXT:  # %bb.133:
-; RV32I-NEXT:    mv ra, t3
-; RV32I-NEXT:  .LBB17_134:
-; RV32I-NEXT:    li t4, 0
-; RV32I-NEXT:    or t3, s0, a3
-; RV32I-NEXT:    srl t0, t0, s3
-; RV32I-NEXT:    li s0, 5
-; RV32I-NEXT:    beqz t6, .LBB17_136
-; RV32I-NEXT:  # %bb.135:
-; RV32I-NEXT:    mv t4, t0
-; RV32I-NEXT:  .LBB17_136:
-; RV32I-NEXT:    sll t1, t3, a1
+; RV32I-NEXT:    beqz a1, .LBB17_133
+; RV32I-NEXT:  # %bb.132:
+; RV32I-NEXT:    mv ra, t1
+; RV32I-NEXT:  .LBB17_133:
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or s3, t5, t3
+; RV32I-NEXT:    neg t1, t6
+; RV32I-NEXT:    srl t0, t0, t1
+; RV32I-NEXT:    li t5, 6
+; RV32I-NEXT:    beqz t6, .LBB17_135
+; RV32I-NEXT:  # %bb.134:
+; RV32I-NEXT:    mv a3, t0
+; RV32I-NEXT:  .LBB17_135:
+; RV32I-NEXT:    sll t1, s3, a1
 ; RV32I-NEXT:    beqz a5, .LBB17_142
-; RV32I-NEXT:  # %bb.137:
-; RV32I-NEXT:    li t4, 0
+; RV32I-NEXT:  # %bb.136:
 ; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bnez t6, .LBB17_143
 ; RV32I-NEXT:    j .LBB17_144
-; RV32I-NEXT:  .LBB17_138:
-; RV32I-NEXT:    or t1, s11, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:  .LBB17_137:
+; RV32I-NEXT:    or a3, s11, t1
+; RV32I-NEXT:    li t1, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_118
+; RV32I-NEXT:  .LBB17_138:
+; RV32I-NEXT:    lw t1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, s3, .LBB17_119
 ; RV32I-NEXT:  .LBB17_139:
-; RV32I-NEXT:    lw a3, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne a5, s0, .LBB17_119
-; RV32I-NEXT:  .LBB17_140:
-; RV32I-NEXT:    lw t1, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t1, t1, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lw a3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, t1
+; RV32I-NEXT:    li t1, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_120
+; RV32I-NEXT:  .LBB17_140:
+; RV32I-NEXT:    lw t1, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, s4, .LBB17_121
 ; RV32I-NEXT:  .LBB17_141:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    beq a5, s8, .LBB17_121
-; RV32I-NEXT:    j .LBB17_122
+; RV32I-NEXT:    lw a3, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, t1
+; RV32I-NEXT:    li t1, 0
+; RV32I-NEXT:    bnez t6, .LBB17_122
+; RV32I-NEXT:    j .LBB17_123
 ; RV32I-NEXT:  .LBB17_142:
-; RV32I-NEXT:    or t4, t1, t4
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, t1, a3
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_144
 ; RV32I-NEXT:  .LBB17_143:
-; RV32I-NEXT:    mv a3, a6
+; RV32I-NEXT:    mv t3, a6
 ; RV32I-NEXT:  .LBB17_144:
-; RV32I-NEXT:    beq a5, s4, .LBB17_164
+; RV32I-NEXT:    beq a5, s7, .LBB17_164
 ; RV32I-NEXT:  # %bb.145:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bnez t6, .LBB17_165
 ; RV32I-NEXT:  .LBB17_146:
-; RV32I-NEXT:    beq a5, s8, .LBB17_166
+; RV32I-NEXT:    beq a5, s4, .LBB17_166
 ; RV32I-NEXT:  .LBB17_147:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bnez t6, .LBB17_167
 ; RV32I-NEXT:  .LBB17_148:
-; RV32I-NEXT:    beq a5, s5, .LBB17_168
+; RV32I-NEXT:    beq a5, s1, .LBB17_168
 ; RV32I-NEXT:  .LBB17_149:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bnez t6, .LBB17_169
 ; RV32I-NEXT:  .LBB17_150:
-; RV32I-NEXT:    li s1, 4
-; RV32I-NEXT:    beq a5, s1, .LBB17_170
+; RV32I-NEXT:    beq a5, s5, .LBB17_170
 ; RV32I-NEXT:  .LBB17_151:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bnez t6, .LBB17_171
 ; RV32I-NEXT:  .LBB17_152:
+; RV32I-NEXT:    li s0, 5
 ; RV32I-NEXT:    bne a5, s0, .LBB17_154
 ; RV32I-NEXT:  .LBB17_153:
-; RV32I-NEXT:    lw t4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t4, t4, a3
+; RV32I-NEXT:    lw a3, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, t3
 ; RV32I-NEXT:  .LBB17_154:
-; RV32I-NEXT:    lbu s0, 31(a0)
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    li s1, 6
-; RV32I-NEXT:    beq a5, s1, .LBB17_156
+; RV32I-NEXT:    lbu s1, 31(a0)
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    beq a5, t5, .LBB17_156
 ; RV32I-NEXT:  # %bb.155:
-; RV32I-NEXT:    mv a3, t4
+; RV32I-NEXT:    mv t3, a3
 ; RV32I-NEXT:  .LBB17_156:
-; RV32I-NEXT:    li t4, 0
-; RV32I-NEXT:    lbu s5, 29(a0)
-; RV32I-NEXT:    lbu s1, 30(a0)
-; RV32I-NEXT:    slli s0, s0, 8
-; RV32I-NEXT:    li s4, 7
-; RV32I-NEXT:    beq a5, s4, .LBB17_158
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lbu s0, 29(a0)
+; RV32I-NEXT:    lbu t5, 30(a0)
+; RV32I-NEXT:    slli s1, s1, 8
+; RV32I-NEXT:    li s5, 7
+; RV32I-NEXT:    beq a5, s5, .LBB17_158
 ; RV32I-NEXT:  # %bb.157:
-; RV32I-NEXT:    mv t4, a3
+; RV32I-NEXT:    mv a3, t3
 ; RV32I-NEXT:  .LBB17_158:
-; RV32I-NEXT:    lbu a3, 28(a0)
-; RV32I-NEXT:    slli s5, s5, 8
-; RV32I-NEXT:    or s0, s0, s1
-; RV32I-NEXT:    mv a0, t3
+; RV32I-NEXT:    lbu t3, 28(a0)
+; RV32I-NEXT:    slli s0, s0, 8
+; RV32I-NEXT:    or t5, s1, t5
+; RV32I-NEXT:    mv a0, s3
 ; RV32I-NEXT:    beqz a1, .LBB17_160
 ; RV32I-NEXT:  # %bb.159:
-; RV32I-NEXT:    mv a0, t4
+; RV32I-NEXT:    mv a0, a3
 ; RV32I-NEXT:  .LBB17_160:
-; RV32I-NEXT:    li t4, 0
-; RV32I-NEXT:    or a3, s5, a3
-; RV32I-NEXT:    slli s0, s0, 16
-; RV32I-NEXT:    li s1, 5
-; RV32I-NEXT:    li s4, 4
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or t3, s0, t3
+; RV32I-NEXT:    slli t5, t5, 16
+; RV32I-NEXT:    li s0, 5
+; RV32I-NEXT:    li s1, 4
 ; RV32I-NEXT:    beqz t6, .LBB17_162
 ; RV32I-NEXT:  # %bb.161:
-; RV32I-NEXT:    srl t4, t3, s3
+; RV32I-NEXT:    neg a3, t6
+; RV32I-NEXT:    srl a3, s3, a3
 ; RV32I-NEXT:  .LBB17_162:
-; RV32I-NEXT:    or s3, s0, a3
-; RV32I-NEXT:    li s0, 6
-; RV32I-NEXT:    li s5, 1
+; RV32I-NEXT:    or s3, t5, t3
+; RV32I-NEXT:    li t5, 6
 ; RV32I-NEXT:    beqz a5, .LBB17_172
 ; RV32I-NEXT:  # %bb.163:
-; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    bnez t6, .LBB17_173
 ; RV32I-NEXT:    j .LBB17_174
 ; RV32I-NEXT:  .LBB17_164:
-; RV32I-NEXT:    or t4, a7, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, a7, t3
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_146
 ; RV32I-NEXT:  .LBB17_165:
-; RV32I-NEXT:    mv a3, s10
-; RV32I-NEXT:    bne a5, s8, .LBB17_147
+; RV32I-NEXT:    mv t3, s10
+; RV32I-NEXT:    bne a5, s4, .LBB17_147
 ; RV32I-NEXT:  .LBB17_166:
-; RV32I-NEXT:    or t4, s11, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, s11, t3
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_148
 ; RV32I-NEXT:  .LBB17_167:
-; RV32I-NEXT:    lw a3, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne a5, s5, .LBB17_149
+; RV32I-NEXT:    lw t3, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, s1, .LBB17_149
 ; RV32I-NEXT:  .LBB17_168:
-; RV32I-NEXT:    lw t4, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t4, t4, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lw a3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, t3
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_150
 ; RV32I-NEXT:  .LBB17_169:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    li s1, 4
-; RV32I-NEXT:    bne a5, s1, .LBB17_151
+; RV32I-NEXT:    lw t3, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, s5, .LBB17_151
 ; RV32I-NEXT:  .LBB17_170:
-; RV32I-NEXT:    or t4, s7, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lw a3, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, t3
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_152
 ; RV32I-NEXT:  .LBB17_171:
-; RV32I-NEXT:    lw a3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv t3, s8
+; RV32I-NEXT:    li s0, 5
 ; RV32I-NEXT:    beq a5, s0, .LBB17_153
 ; RV32I-NEXT:    j .LBB17_154
 ; RV32I-NEXT:  .LBB17_172:
-; RV32I-NEXT:    sll a3, s3, a1
-; RV32I-NEXT:    or t3, a3, t4
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    sll t3, s3, a1
+; RV32I-NEXT:    or a3, t3, a3
+; RV32I-NEXT:    li t3, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_174
 ; RV32I-NEXT:  .LBB17_173:
-; RV32I-NEXT:    mv a3, t0
+; RV32I-NEXT:    mv t3, t0
 ; RV32I-NEXT:  .LBB17_174:
-; RV32I-NEXT:    beq a5, s5, .LBB17_190
+; RV32I-NEXT:    beq a5, s7, .LBB17_190
 ; RV32I-NEXT:  # %bb.175:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li t0, 0
 ; RV32I-NEXT:    bnez t6, .LBB17_191
 ; RV32I-NEXT:  .LBB17_176:
-; RV32I-NEXT:    bne a5, s8, .LBB17_178
+; RV32I-NEXT:    bne a5, s4, .LBB17_178
 ; RV32I-NEXT:  .LBB17_177:
-; RV32I-NEXT:    or t3, a7, a3
+; RV32I-NEXT:    or a3, a7, t0
 ; RV32I-NEXT:  .LBB17_178:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    li a6, 3
+; RV32I-NEXT:    li a6, 0
+; RV32I-NEXT:    li a7, 3
 ; RV32I-NEXT:    bnez t6, .LBB17_192
 ; RV32I-NEXT:  # %bb.179:
-; RV32I-NEXT:    beq a5, a6, .LBB17_193
+; RV32I-NEXT:    beq a5, a7, .LBB17_193
 ; RV32I-NEXT:  .LBB17_180:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    bnez t6, .LBB17_194
 ; RV32I-NEXT:  .LBB17_181:
-; RV32I-NEXT:    beq a5, s4, .LBB17_195
+; RV32I-NEXT:    beq a5, s1, .LBB17_195
 ; RV32I-NEXT:  .LBB17_182:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    bnez t6, .LBB17_196
 ; RV32I-NEXT:  .LBB17_183:
-; RV32I-NEXT:    beq a5, s1, .LBB17_197
+; RV32I-NEXT:    beq a5, s0, .LBB17_197
 ; RV32I-NEXT:  .LBB17_184:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    bnez t6, .LBB17_198
 ; RV32I-NEXT:  .LBB17_185:
-; RV32I-NEXT:    beq a5, s0, .LBB17_199
+; RV32I-NEXT:    beq a5, t5, .LBB17_199
 ; RV32I-NEXT:  .LBB17_186:
-; RV32I-NEXT:    li a3, 7
-; RV32I-NEXT:    bne a5, a3, .LBB17_200
+; RV32I-NEXT:    li a6, 7
+; RV32I-NEXT:    bne a5, a6, .LBB17_200
 ; RV32I-NEXT:  .LBB17_187:
 ; RV32I-NEXT:    beqz a1, .LBB17_189
 ; RV32I-NEXT:  .LBB17_188:
-; RV32I-NEXT:    mv s3, t5
+; RV32I-NEXT:    mv s3, t4
 ; RV32I-NEXT:  .LBB17_189:
 ; RV32I-NEXT:    srli a1, a4, 16
 ; RV32I-NEXT:    lui a7, 16
@@ -9439,45 +9498,46 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
 ; RV32I-NEXT:    addi sp, sp, 80
 ; RV32I-NEXT:    ret
 ; RV32I-NEXT:  .LBB17_190:
-; RV32I-NEXT:    or t3, t1, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, t1, t3
+; RV32I-NEXT:    li t0, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_176
 ; RV32I-NEXT:  .LBB17_191:
-; RV32I-NEXT:    mv a3, a6
-; RV32I-NEXT:    beq a5, s8, .LBB17_177
+; RV32I-NEXT:    mv t0, a6
+; RV32I-NEXT:    beq a5, s4, .LBB17_177
 ; RV32I-NEXT:    j .LBB17_178
 ; RV32I-NEXT:  .LBB17_192:
-; RV32I-NEXT:    mv a3, s10
-; RV32I-NEXT:    bne a5, a6, .LBB17_180
+; RV32I-NEXT:    mv a6, s10
+; RV32I-NEXT:    bne a5, a7, .LBB17_180
 ; RV32I-NEXT:  .LBB17_193:
-; RV32I-NEXT:    or t3, s11, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a3, s11, a6
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_181
 ; RV32I-NEXT:  .LBB17_194:
-; RV32I-NEXT:    lw a3, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne a5, s4, .LBB17_182
+; RV32I-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, s1, .LBB17_182
 ; RV32I-NEXT:  .LBB17_195:
-; RV32I-NEXT:    lw a6, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t3, a6, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lw a3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, a6
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_183
 ; RV32I-NEXT:  .LBB17_196:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne a5, s1, .LBB17_184
+; RV32I-NEXT:    lw a6, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    bne a5, s0, .LBB17_184
 ; RV32I-NEXT:  .LBB17_197:
-; RV32I-NEXT:    or t3, s7, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    lw a3, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, a6
+; RV32I-NEXT:    li a6, 0
 ; RV32I-NEXT:    beqz t6, .LBB17_185
 ; RV32I-NEXT:  .LBB17_198:
-; RV32I-NEXT:    lw a3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne a5, s0, .LBB17_186
+; RV32I-NEXT:    mv a6, s8
+; RV32I-NEXT:    bne a5, t5, .LBB17_186
 ; RV32I-NEXT:  .LBB17_199:
-; RV32I-NEXT:    lw a6, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or t3, a6, a3
-; RV32I-NEXT:    li a3, 7
-; RV32I-NEXT:    beq a5, a3, .LBB17_187
+; RV32I-NEXT:    lw a3, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a3, a3, a6
+; RV32I-NEXT:    li a6, 7
+; RV32I-NEXT:    beq a5, a6, .LBB17_187
 ; RV32I-NEXT:  .LBB17_200:
-; RV32I-NEXT:    mv t5, t3
+; RV32I-NEXT:    mv t4, a3
 ; RV32I-NEXT:    bnez a1, .LBB17_188
 ; RV32I-NEXT:    j .LBB17_189
   %src = load i256, ptr %src.ptr, align 1
@@ -9863,223 +9923,231 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    slli t4, t4, 16
 ; RV32I-NEXT:    slli t5, s0, 16
 ; RV32I-NEXT:    slli s4, s4, 16
-; RV32I-NEXT:    slli a3, a1, 16
+; RV32I-NEXT:    slli t0, a1, 16
 ; RV32I-NEXT:    or s5, t4, t1
 ; RV32I-NEXT:    or a1, s4, s1
-; RV32I-NEXT:    or t0, a3, s2
+; RV32I-NEXT:    or t0, t0, s2
 ; RV32I-NEXT:    slli t0, t0, 3
 ; RV32I-NEXT:    srli t1, t0, 5
 ; RV32I-NEXT:    andi t4, t0, 31
-; RV32I-NEXT:    neg a3, t4
+; RV32I-NEXT:    neg ra, t4
 ; RV32I-NEXT:    beqz t4, .LBB18_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sll a5, s5, a3
+; RV32I-NEXT:    sll a5, s5, ra
 ; RV32I-NEXT:  .LBB18_2:
-; RV32I-NEXT:    or s10, t6, a4
+; RV32I-NEXT:    or s2, t6, a4
 ; RV32I-NEXT:    lbu t6, 12(a0)
 ; RV32I-NEXT:    lbu s0, 19(a0)
 ; RV32I-NEXT:    slli s1, a7, 8
 ; RV32I-NEXT:    or a6, t3, a6
-; RV32I-NEXT:    or a4, t5, t2
+; RV32I-NEXT:    or a3, t5, t2
 ; RV32I-NEXT:    srai t2, a1, 31
 ; RV32I-NEXT:    beqz t1, .LBB18_4
 ; RV32I-NEXT:  # %bb.3:
+; RV32I-NEXT:    mv a4, s2
 ; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    j .LBB18_5
 ; RV32I-NEXT:  .LBB18_4:
-; RV32I-NEXT:    srl a7, s10, t0
+; RV32I-NEXT:    mv a4, s2
+; RV32I-NEXT:    srl a7, s2, t0
 ; RV32I-NEXT:    or a5, a7, a5
 ; RV32I-NEXT:  .LBB18_5:
 ; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    lbu s3, 17(a0)
+; RV32I-NEXT:    lbu s2, 17(a0)
 ; RV32I-NEXT:    lbu t3, 18(a0)
-; RV32I-NEXT:    slli s0, s0, 8
-; RV32I-NEXT:    or s4, s1, t6
+; RV32I-NEXT:    slli s4, s0, 8
+; RV32I-NEXT:    or s3, s1, t6
 ; RV32I-NEXT:    slli a6, a6, 16
-; RV32I-NEXT:    li s6, 1
-; RV32I-NEXT:    sll s2, a4, a3
+; RV32I-NEXT:    li t5, 1
+; RV32I-NEXT:    sll s8, a3, ra
 ; RV32I-NEXT:    beqz t4, .LBB18_7
 ; RV32I-NEXT:  # %bb.6:
-; RV32I-NEXT:    mv a7, s2
+; RV32I-NEXT:    mv a7, s8
 ; RV32I-NEXT:  .LBB18_7:
-; RV32I-NEXT:    lbu t5, 16(a0)
-; RV32I-NEXT:    lbu t6, 23(a0)
-; RV32I-NEXT:    slli s1, s3, 8
-; RV32I-NEXT:    or s0, s0, t3
-; RV32I-NEXT:    srl s3, s5, t0
-; RV32I-NEXT:    or a6, a6, s4
-; RV32I-NEXT:    bne t1, s6, .LBB18_9
+; RV32I-NEXT:    lbu t6, 16(a0)
+; RV32I-NEXT:    lbu s0, 23(a0)
+; RV32I-NEXT:    slli s2, s2, 8
+; RV32I-NEXT:    or s1, s4, t3
+; RV32I-NEXT:    srl t3, s5, t0
+; RV32I-NEXT:    or a6, a6, s3
+; RV32I-NEXT:    sw t3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    beq t1, t5, .LBB18_9
 ; RV32I-NEXT:  # %bb.8:
-; RV32I-NEXT:    or a5, s3, a7
+; RV32I-NEXT:    li t5, 1
+; RV32I-NEXT:    j .LBB18_10
 ; RV32I-NEXT:  .LBB18_9:
+; RV32I-NEXT:    li t5, 1
+; RV32I-NEXT:    or a5, t3, a7
+; RV32I-NEXT:  .LBB18_10:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    lbu s6, 21(a0)
+; RV32I-NEXT:    lbu s3, 21(a0)
 ; RV32I-NEXT:    lbu a7, 22(a0)
-; RV32I-NEXT:    slli s4, t6, 8
-; RV32I-NEXT:    or s7, s1, t5
-; RV32I-NEXT:    slli s8, s0, 16
-; RV32I-NEXT:    li s9, 2
-; RV32I-NEXT:    sll s0, a6, a3
-; RV32I-NEXT:    beqz t4, .LBB18_11
-; RV32I-NEXT:  # %bb.10:
-; RV32I-NEXT:    mv t3, s0
-; RV32I-NEXT:  .LBB18_11:
-; RV32I-NEXT:    lbu t5, 20(a0)
-; RV32I-NEXT:    lbu t6, 27(a0)
-; RV32I-NEXT:    slli s6, s6, 8
-; RV32I-NEXT:    or s4, s4, a7
-; RV32I-NEXT:    srl s1, a4, t0
-; RV32I-NEXT:    or a7, s8, s7
-; RV32I-NEXT:    bne t1, s9, .LBB18_13
-; RV32I-NEXT:  # %bb.12:
-; RV32I-NEXT:    or a5, s1, t3
-; RV32I-NEXT:  .LBB18_13:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    lbu s8, 25(a0)
-; RV32I-NEXT:    lbu s7, 26(a0)
-; RV32I-NEXT:    slli t6, t6, 8
-; RV32I-NEXT:    or s6, s6, t5
-; RV32I-NEXT:    slli s9, s4, 16
-; RV32I-NEXT:    li s11, 3
-; RV32I-NEXT:    sll t5, a7, a3
-; RV32I-NEXT:    beqz t4, .LBB18_15
-; RV32I-NEXT:  # %bb.14:
-; RV32I-NEXT:    mv t3, t5
-; RV32I-NEXT:  .LBB18_15:
-; RV32I-NEXT:    lbu s4, 24(a0)
-; RV32I-NEXT:    slli s8, s8, 8
-; RV32I-NEXT:    or s7, t6, s7
+; RV32I-NEXT:    slli s7, s0, 8
+; RV32I-NEXT:    or s4, s2, t6
+; RV32I-NEXT:    slli s9, s1, 16
+; RV32I-NEXT:    li t6, 2
+; RV32I-NEXT:    sll s6, a6, ra
+; RV32I-NEXT:    beqz t4, .LBB18_12
+; RV32I-NEXT:  # %bb.11:
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:  .LBB18_12:
+; RV32I-NEXT:    lbu s0, 20(a0)
+; RV32I-NEXT:    lbu s1, 27(a0)
+; RV32I-NEXT:    slli s3, s3, 8
+; RV32I-NEXT:    or s2, s7, a7
+; RV32I-NEXT:    srl s7, a3, t0
+; RV32I-NEXT:    or a7, s9, s4
+; RV32I-NEXT:    sw a3, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s7, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    bne t1, t6, .LBB18_14
+; RV32I-NEXT:  # %bb.13:
+; RV32I-NEXT:    or a5, s7, t3
+; RV32I-NEXT:  .LBB18_14:
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    lbu s7, 25(a0)
+; RV32I-NEXT:    lbu s4, 26(a0)
+; RV32I-NEXT:    slli s11, s1, 8
+; RV32I-NEXT:    or s9, s3, s0
+; RV32I-NEXT:    slli s2, s2, 16
+; RV32I-NEXT:    li a3, 3
+; RV32I-NEXT:    sll s10, a7, ra
+; RV32I-NEXT:    beqz t4, .LBB18_16
+; RV32I-NEXT:  # %bb.15:
+; RV32I-NEXT:    mv t3, s10
+; RV32I-NEXT:  .LBB18_16:
+; RV32I-NEXT:    lbu s1, 24(a0)
+; RV32I-NEXT:    slli s7, s7, 8
+; RV32I-NEXT:    or s3, s11, s4
 ; RV32I-NEXT:    srl t6, a6, t0
-; RV32I-NEXT:    or a0, s9, s6
-; RV32I-NEXT:    sw s5, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s10, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    bne t1, s11, .LBB18_17
-; RV32I-NEXT:  # %bb.16:
+; RV32I-NEXT:    or a0, s2, s9
+; RV32I-NEXT:    bne t1, a3, .LBB18_18
+; RV32I-NEXT:  # %bb.17:
 ; RV32I-NEXT:    or a5, t6, t3
-; RV32I-NEXT:  .LBB18_17:
-; RV32I-NEXT:    li s6, 0
-; RV32I-NEXT:    or t3, s8, s4
-; RV32I-NEXT:    slli s7, s7, 16
-; RV32I-NEXT:    li s10, 4
-; RV32I-NEXT:    sll s11, a0, a3
-; RV32I-NEXT:    beqz t4, .LBB18_19
-; RV32I-NEXT:  # %bb.18:
-; RV32I-NEXT:    mv s6, s11
-; RV32I-NEXT:  .LBB18_19:
-; RV32I-NEXT:    srl s4, a7, t0
-; RV32I-NEXT:    or t3, s7, t3
-; RV32I-NEXT:    sw s4, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    bne t1, s10, .LBB18_21
-; RV32I-NEXT:  # %bb.20:
-; RV32I-NEXT:    or a5, s4, s6
-; RV32I-NEXT:  .LBB18_21:
-; RV32I-NEXT:    li s4, 0
-; RV32I-NEXT:    li s5, 5
-; RV32I-NEXT:    sll s6, t3, a3
-; RV32I-NEXT:    sw s6, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    beqz t4, .LBB18_23
-; RV32I-NEXT:  # %bb.22:
-; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:  .LBB18_23:
-; RV32I-NEXT:    srl s6, a0, t0
-; RV32I-NEXT:    beq t1, s5, .LBB18_25
-; RV32I-NEXT:  # %bb.24:
-; RV32I-NEXT:    mv ra, s6
-; RV32I-NEXT:    j .LBB18_26
-; RV32I-NEXT:  .LBB18_25:
-; RV32I-NEXT:    mv ra, s6
-; RV32I-NEXT:    or a5, s6, s4
+; RV32I-NEXT:  .LBB18_18:
+; RV32I-NEXT:    li s2, 0
+; RV32I-NEXT:    or t3, s7, s1
+; RV32I-NEXT:    slli s3, s3, 16
+; RV32I-NEXT:    sll s11, a0, ra
+; RV32I-NEXT:    beqz t4, .LBB18_20
+; RV32I-NEXT:  # %bb.19:
+; RV32I-NEXT:    mv s2, s11
+; RV32I-NEXT:  .LBB18_20:
+; RV32I-NEXT:    srl t6, a7, t0
+; RV32I-NEXT:    or t3, s3, t3
+; RV32I-NEXT:    li a3, 4
+; RV32I-NEXT:    bne t1, a3, .LBB18_22
+; RV32I-NEXT:  # %bb.21:
+; RV32I-NEXT:    or a5, t6, s2
+; RV32I-NEXT:  .LBB18_22:
+; RV32I-NEXT:    li s3, 0
+; RV32I-NEXT:    li s0, 5
+; RV32I-NEXT:    sll s9, t3, ra
+; RV32I-NEXT:    beqz t4, .LBB18_24
+; RV32I-NEXT:  # %bb.23:
+; RV32I-NEXT:    mv s3, s9
+; RV32I-NEXT:  .LBB18_24:
+; RV32I-NEXT:    srl a3, a0, t0
+; RV32I-NEXT:    beq t1, s0, .LBB18_26
+; RV32I-NEXT:  # %bb.25:
+; RV32I-NEXT:    mv s1, a3
+; RV32I-NEXT:    j .LBB18_27
 ; RV32I-NEXT:  .LBB18_26:
+; RV32I-NEXT:    mv s1, a3
+; RV32I-NEXT:    or a5, a3, s3
+; RV32I-NEXT:  .LBB18_27:
 ; RV32I-NEXT:    li s4, 0
-; RV32I-NEXT:    li s8, 6
-; RV32I-NEXT:    sll s7, a1, a3
-; RV32I-NEXT:    beqz t4, .LBB18_28
-; RV32I-NEXT:  # %bb.27:
+; RV32I-NEXT:    li s3, 6
+; RV32I-NEXT:    sll s7, a1, ra
+; RV32I-NEXT:    beqz t4, .LBB18_29
+; RV32I-NEXT:  # %bb.28:
 ; RV32I-NEXT:    mv s4, s7
-; RV32I-NEXT:  .LBB18_28:
-; RV32I-NEXT:    srl s5, t3, t0
-; RV32I-NEXT:    beq t1, s8, .LBB18_30
-; RV32I-NEXT:  # %bb.29:
-; RV32I-NEXT:    mv s9, s5
-; RV32I-NEXT:    j .LBB18_31
-; RV32I-NEXT:  .LBB18_30:
-; RV32I-NEXT:    mv s9, s5
-; RV32I-NEXT:    or a5, s5, s4
+; RV32I-NEXT:  .LBB18_29:
+; RV32I-NEXT:    srl s0, t3, t0
+; RV32I-NEXT:    sw s5, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s2, t6
+; RV32I-NEXT:    bne t1, s3, .LBB18_31
+; RV32I-NEXT:  # %bb.30:
+; RV32I-NEXT:    or a5, s0, s4
 ; RV32I-NEXT:  .LBB18_31:
 ; RV32I-NEXT:    li s5, 0
-; RV32I-NEXT:    li s6, 7
-; RV32I-NEXT:    sll s4, t2, a3
+; RV32I-NEXT:    li s4, 7
+; RV32I-NEXT:    sll t6, t2, ra
 ; RV32I-NEXT:    beqz t4, .LBB18_33
 ; RV32I-NEXT:  # %bb.32:
-; RV32I-NEXT:    mv s5, s4
+; RV32I-NEXT:    mv s5, t6
 ; RV32I-NEXT:  .LBB18_33:
 ; RV32I-NEXT:    srl a3, a1, t0
-; RV32I-NEXT:    bne t1, s6, .LBB18_35
+; RV32I-NEXT:    mv ra, a4
+; RV32I-NEXT:    beq t1, s4, .LBB18_35
 ; RV32I-NEXT:  # %bb.34:
-; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    mv a4, a3
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    bnez t0, .LBB18_36
+; RV32I-NEXT:    j .LBB18_37
 ; RV32I-NEXT:  .LBB18_35:
-; RV32I-NEXT:    li s5, 3
-; RV32I-NEXT:    mv s6, a3
-; RV32I-NEXT:    bnez t0, .LBB18_39
-; RV32I-NEXT:  # %bb.36:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_40
+; RV32I-NEXT:    mv a4, a3
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    beqz t0, .LBB18_37
+; RV32I-NEXT:  .LBB18_36:
+; RV32I-NEXT:    mv ra, a5
 ; RV32I-NEXT:  .LBB18_37:
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    li s0, 3
+; RV32I-NEXT:    beqz t4, .LBB18_39
+; RV32I-NEXT:  # %bb.38:
+; RV32I-NEXT:    mv a5, s8
+; RV32I-NEXT:  .LBB18_39:
 ; RV32I-NEXT:    beqz t1, .LBB18_41
-; RV32I-NEXT:  .LBB18_38:
+; RV32I-NEXT:  # %bb.40:
 ; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    j .LBB18_42
-; RV32I-NEXT:  .LBB18_39:
-; RV32I-NEXT:    sw a5, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_37
-; RV32I-NEXT:  .LBB18_40:
-; RV32I-NEXT:    mv a3, s2
-; RV32I-NEXT:    bnez t1, .LBB18_38
 ; RV32I-NEXT:  .LBB18_41:
-; RV32I-NEXT:    or a5, s3, a3
+; RV32I-NEXT:    lw s5, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a5, s5, a5
 ; RV32I-NEXT:  .LBB18_42:
-; RV32I-NEXT:    li s2, 1
-; RV32I-NEXT:    li s3, 2
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_61
+; RV32I-NEXT:    mv s8, a4
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB18_44
 ; RV32I-NEXT:  # %bb.43:
-; RV32I-NEXT:    beq t1, s2, .LBB18_62
+; RV32I-NEXT:    mv s5, s6
 ; RV32I-NEXT:  .LBB18_44:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_63
-; RV32I-NEXT:  .LBB18_45:
-; RV32I-NEXT:    beq t1, s3, .LBB18_64
+; RV32I-NEXT:    bne t1, t5, .LBB18_46
+; RV32I-NEXT:  # %bb.45:
+; RV32I-NEXT:    lw a4, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a5, a4, s5
 ; RV32I-NEXT:  .LBB18_46:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_65
-; RV32I-NEXT:  .LBB18_47:
-; RV32I-NEXT:    beq t1, s5, .LBB18_66
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    li t5, 2
+; RV32I-NEXT:    bnez t4, .LBB18_61
+; RV32I-NEXT:  # %bb.47:
+; RV32I-NEXT:    beq t1, t5, .LBB18_62
 ; RV32I-NEXT:  .LBB18_48:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_67
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB18_63
 ; RV32I-NEXT:  .LBB18_49:
-; RV32I-NEXT:    bne t1, s10, .LBB18_51
+; RV32I-NEXT:    beq t1, s0, .LBB18_64
 ; RV32I-NEXT:  .LBB18_50:
-; RV32I-NEXT:    or a5, ra, a3
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB18_65
 ; RV32I-NEXT:  .LBB18_51:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    li s10, 5
-; RV32I-NEXT:    bnez t4, .LBB18_68
-; RV32I-NEXT:  # %bb.52:
-; RV32I-NEXT:    beq t1, s10, .LBB18_69
+; RV32I-NEXT:    li a4, 4
+; RV32I-NEXT:    beq t1, a4, .LBB18_66
+; RV32I-NEXT:  .LBB18_52:
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB18_67
 ; RV32I-NEXT:  .LBB18_53:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_70
+; RV32I-NEXT:    li a4, 5
+; RV32I-NEXT:    beq t1, a4, .LBB18_68
 ; RV32I-NEXT:  .LBB18_54:
-; RV32I-NEXT:    bne t1, s8, .LBB18_56
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB18_69
 ; RV32I-NEXT:  .LBB18_55:
-; RV32I-NEXT:    or a5, s6, a3
+; RV32I-NEXT:    beq t1, s3, .LBB18_70
 ; RV32I-NEXT:  .LBB18_56:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    li s8, 7
-; RV32I-NEXT:    bne t1, s8, .LBB18_71
-; RV32I-NEXT:  # %bb.57:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s4, .LBB18_71
+; RV32I-NEXT:  .LBB18_57:
 ; RV32I-NEXT:    bnez t0, .LBB18_72
 ; RV32I-NEXT:  .LBB18_58:
 ; RV32I-NEXT:    li a5, 0
@@ -10090,565 +10158,570 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    j .LBB18_75
 ; RV32I-NEXT:  .LBB18_61:
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:    bne t1, s2, .LBB18_44
+; RV32I-NEXT:    mv s5, s10
+; RV32I-NEXT:    bne t1, t5, .LBB18_48
 ; RV32I-NEXT:  .LBB18_62:
-; RV32I-NEXT:    or a5, s1, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_45
+; RV32I-NEXT:    srl a4, a6, t0
+; RV32I-NEXT:    or a5, a4, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB18_49
 ; RV32I-NEXT:  .LBB18_63:
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    bne t1, s3, .LBB18_46
+; RV32I-NEXT:    mv s5, s11
+; RV32I-NEXT:    bne t1, s0, .LBB18_50
 ; RV32I-NEXT:  .LBB18_64:
-; RV32I-NEXT:    or a5, t6, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_47
+; RV32I-NEXT:    or a5, s2, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB18_51
 ; RV32I-NEXT:  .LBB18_65:
-; RV32I-NEXT:    mv a3, s11
-; RV32I-NEXT:    bne t1, s5, .LBB18_48
+; RV32I-NEXT:    mv s5, s9
+; RV32I-NEXT:    li a4, 4
+; RV32I-NEXT:    bne t1, a4, .LBB18_52
 ; RV32I-NEXT:  .LBB18_66:
-; RV32I-NEXT:    lw a5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or a5, a5, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_49
+; RV32I-NEXT:    or a5, s1, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB18_53
 ; RV32I-NEXT:  .LBB18_67:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    beq t1, s10, .LBB18_50
-; RV32I-NEXT:    j .LBB18_51
+; RV32I-NEXT:    mv s5, s7
+; RV32I-NEXT:    li a4, 5
+; RV32I-NEXT:    bne t1, a4, .LBB18_54
 ; RV32I-NEXT:  .LBB18_68:
-; RV32I-NEXT:    mv a3, s7
-; RV32I-NEXT:    bne t1, s10, .LBB18_53
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB18_55
 ; RV32I-NEXT:  .LBB18_69:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_54
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:    bne t1, s3, .LBB18_56
 ; RV32I-NEXT:  .LBB18_70:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    beq t1, s8, .LBB18_55
-; RV32I-NEXT:    j .LBB18_56
+; RV32I-NEXT:    or a5, s8, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s4, .LBB18_57
 ; RV32I-NEXT:  .LBB18_71:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:    beqz t0, .LBB18_58
 ; RV32I-NEXT:  .LBB18_72:
-; RV32I-NEXT:    sw a3, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 24(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    li a5, 0
 ; RV32I-NEXT:    beqz t4, .LBB18_59
 ; RV32I-NEXT:  .LBB18_73:
-; RV32I-NEXT:    mv a5, s0
+; RV32I-NEXT:    mv a5, s6
 ; RV32I-NEXT:    bnez t1, .LBB18_60
 ; RV32I-NEXT:  .LBB18_74:
-; RV32I-NEXT:    or a5, s1, a5
+; RV32I-NEXT:    lw a4, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a5, a4, a5
 ; RV32I-NEXT:  .LBB18_75:
-; RV32I-NEXT:    li s0, 4
-; RV32I-NEXT:    li s1, 6
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li s6, 1
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    bnez t4, .LBB18_91
 ; RV32I-NEXT:  # %bb.76:
-; RV32I-NEXT:    beq t1, s2, .LBB18_92
+; RV32I-NEXT:    beq t1, s6, .LBB18_92
 ; RV32I-NEXT:  .LBB18_77:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    bnez t4, .LBB18_93
 ; RV32I-NEXT:  .LBB18_78:
-; RV32I-NEXT:    beq t1, s3, .LBB18_94
+; RV32I-NEXT:    beq t1, t5, .LBB18_94
 ; RV32I-NEXT:  .LBB18_79:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    bnez t4, .LBB18_95
 ; RV32I-NEXT:  .LBB18_80:
-; RV32I-NEXT:    beq t1, s5, .LBB18_96
+; RV32I-NEXT:    beq t1, s0, .LBB18_96
 ; RV32I-NEXT:  .LBB18_81:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    bnez t4, .LBB18_97
 ; RV32I-NEXT:  .LBB18_82:
-; RV32I-NEXT:    beq t1, s0, .LBB18_98
+; RV32I-NEXT:    li a4, 4
+; RV32I-NEXT:    beq t1, a4, .LBB18_98
 ; RV32I-NEXT:  .LBB18_83:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    bnez t4, .LBB18_99
 ; RV32I-NEXT:  .LBB18_84:
-; RV32I-NEXT:    beq t1, s10, .LBB18_100
+; RV32I-NEXT:    li a4, 5
+; RV32I-NEXT:    beq t1, a4, .LBB18_100
 ; RV32I-NEXT:  .LBB18_85:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s1, .LBB18_101
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s3, .LBB18_101
 ; RV32I-NEXT:  .LBB18_86:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s8, .LBB18_102
+; RV32I-NEXT:    bne t1, s4, .LBB18_102
 ; RV32I-NEXT:  .LBB18_87:
 ; RV32I-NEXT:    bnez t0, .LBB18_103
 ; RV32I-NEXT:  .LBB18_88:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a5, 0
 ; RV32I-NEXT:    bnez t4, .LBB18_104
 ; RV32I-NEXT:  .LBB18_89:
 ; RV32I-NEXT:    beqz t1, .LBB18_105
 ; RV32I-NEXT:  .LBB18_90:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_106
-; RV32I-NEXT:    j .LBB18_107
+; RV32I-NEXT:    j .LBB18_106
 ; RV32I-NEXT:  .LBB18_91:
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    bne t1, s2, .LBB18_77
+; RV32I-NEXT:    mv s5, s10
+; RV32I-NEXT:    bne t1, s6, .LBB18_77
 ; RV32I-NEXT:  .LBB18_92:
-; RV32I-NEXT:    or a5, t6, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    srl a4, a6, t0
+; RV32I-NEXT:    or a5, a4, s5
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    beqz t4, .LBB18_78
 ; RV32I-NEXT:  .LBB18_93:
-; RV32I-NEXT:    mv a3, s11
-; RV32I-NEXT:    bne t1, s3, .LBB18_79
+; RV32I-NEXT:    mv s5, s11
+; RV32I-NEXT:    bne t1, t5, .LBB18_79
 ; RV32I-NEXT:  .LBB18_94:
-; RV32I-NEXT:    lw a5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or a5, a5, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a5, s2, s5
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    beqz t4, .LBB18_80
 ; RV32I-NEXT:  .LBB18_95:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne t1, s5, .LBB18_81
+; RV32I-NEXT:    mv s5, s9
+; RV32I-NEXT:    bne t1, s0, .LBB18_81
 ; RV32I-NEXT:  .LBB18_96:
-; RV32I-NEXT:    or a5, ra, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a5, s1, s5
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    beqz t4, .LBB18_82
 ; RV32I-NEXT:  .LBB18_97:
-; RV32I-NEXT:    mv a3, s7
-; RV32I-NEXT:    bne t1, s0, .LBB18_83
+; RV32I-NEXT:    mv s5, s7
+; RV32I-NEXT:    li a4, 4
+; RV32I-NEXT:    bne t1, a4, .LBB18_83
 ; RV32I-NEXT:  .LBB18_98:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    beqz t4, .LBB18_84
 ; RV32I-NEXT:  .LBB18_99:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    bne t1, s10, .LBB18_85
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:    li a4, 5
+; RV32I-NEXT:    bne t1, a4, .LBB18_85
 ; RV32I-NEXT:  .LBB18_100:
-; RV32I-NEXT:    or a5, s6, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s1, .LBB18_86
+; RV32I-NEXT:    or a5, s8, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s3, .LBB18_86
 ; RV32I-NEXT:  .LBB18_101:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s8, .LBB18_87
+; RV32I-NEXT:    beq t1, s4, .LBB18_87
 ; RV32I-NEXT:  .LBB18_102:
-; RV32I-NEXT:    mv a5, a3
+; RV32I-NEXT:    mv a5, s5
 ; RV32I-NEXT:    beqz t0, .LBB18_88
 ; RV32I-NEXT:  .LBB18_103:
-; RV32I-NEXT:    mv a4, a5
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    sw a5, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a5, 0
 ; RV32I-NEXT:    beqz t4, .LBB18_89
 ; RV32I-NEXT:  .LBB18_104:
-; RV32I-NEXT:    mv a3, t5
+; RV32I-NEXT:    mv a5, s10
 ; RV32I-NEXT:    bnez t1, .LBB18_90
 ; RV32I-NEXT:  .LBB18_105:
-; RV32I-NEXT:    or a5, t6, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_107
+; RV32I-NEXT:    srl a4, a6, t0
+; RV32I-NEXT:    or a5, a4, a5
 ; RV32I-NEXT:  .LBB18_106:
-; RV32I-NEXT:    mv a3, s11
-; RV32I-NEXT:  .LBB18_107:
-; RV32I-NEXT:    beq t1, s2, .LBB18_121
-; RV32I-NEXT:  # %bb.108:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_122
+; RV32I-NEXT:    lw a4, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    li s10, 4
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB18_124
+; RV32I-NEXT:  # %bb.107:
+; RV32I-NEXT:    beq t1, s6, .LBB18_125
+; RV32I-NEXT:  .LBB18_108:
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB18_126
 ; RV32I-NEXT:  .LBB18_109:
-; RV32I-NEXT:    beq t1, s3, .LBB18_123
+; RV32I-NEXT:    beq t1, t5, .LBB18_127
 ; RV32I-NEXT:  .LBB18_110:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_124
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB18_128
 ; RV32I-NEXT:  .LBB18_111:
-; RV32I-NEXT:    beq t1, s5, .LBB18_125
+; RV32I-NEXT:    beq t1, s0, .LBB18_129
 ; RV32I-NEXT:  .LBB18_112:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_126
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB18_130
 ; RV32I-NEXT:  .LBB18_113:
-; RV32I-NEXT:    beq t1, s0, .LBB18_127
+; RV32I-NEXT:    mv s0, a3
+; RV32I-NEXT:    bne t1, s10, .LBB18_115
 ; RV32I-NEXT:  .LBB18_114:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s10, .LBB18_128
+; RV32I-NEXT:    or a5, s8, s5
 ; RV32I-NEXT:  .LBB18_115:
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s1, .LBB18_129
-; RV32I-NEXT:  .LBB18_116:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s8, .LBB18_130
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    li a3, 5
+; RV32I-NEXT:    beq t1, a3, .LBB18_117
+; RV32I-NEXT:  # %bb.116:
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:  .LBB18_117:
-; RV32I-NEXT:    bnez t0, .LBB18_131
-; RV32I-NEXT:  .LBB18_118:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_132
-; RV32I-NEXT:  .LBB18_119:
-; RV32I-NEXT:    beqz t1, .LBB18_133
-; RV32I-NEXT:  .LBB18_120:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    j .LBB18_134
+; RV32I-NEXT:    beq t1, s3, .LBB18_119
+; RV32I-NEXT:  # %bb.118:
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:  .LBB18_119:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    bne t1, s4, .LBB18_131
+; RV32I-NEXT:  # %bb.120:
+; RV32I-NEXT:    li s0, 3
+; RV32I-NEXT:    bnez t0, .LBB18_132
 ; RV32I-NEXT:  .LBB18_121:
-; RV32I-NEXT:    lw a5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or a5, a5, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_109
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    bnez t4, .LBB18_133
 ; RV32I-NEXT:  .LBB18_122:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne t1, s3, .LBB18_110
+; RV32I-NEXT:    beqz t1, .LBB18_134
 ; RV32I-NEXT:  .LBB18_123:
-; RV32I-NEXT:    or a5, ra, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_111
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    j .LBB18_135
 ; RV32I-NEXT:  .LBB18_124:
-; RV32I-NEXT:    mv a3, s7
-; RV32I-NEXT:    bne t1, s5, .LBB18_112
+; RV32I-NEXT:    mv s5, s11
+; RV32I-NEXT:    bne t1, s6, .LBB18_108
 ; RV32I-NEXT:  .LBB18_125:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_113
+; RV32I-NEXT:    or a5, s2, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB18_109
 ; RV32I-NEXT:  .LBB18_126:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    bne t1, s0, .LBB18_114
+; RV32I-NEXT:    mv s5, s9
+; RV32I-NEXT:    bne t1, t5, .LBB18_110
 ; RV32I-NEXT:  .LBB18_127:
-; RV32I-NEXT:    or a5, s6, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s10, .LBB18_115
+; RV32I-NEXT:    or a5, s1, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB18_111
 ; RV32I-NEXT:  .LBB18_128:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s1, .LBB18_116
+; RV32I-NEXT:    mv s5, s7
+; RV32I-NEXT:    bne t1, s0, .LBB18_112
 ; RV32I-NEXT:  .LBB18_129:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s8, .LBB18_117
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB18_113
 ; RV32I-NEXT:  .LBB18_130:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    beqz t0, .LBB18_118
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:    mv s0, a3
+; RV32I-NEXT:    beq t1, s10, .LBB18_114
+; RV32I-NEXT:    j .LBB18_115
 ; RV32I-NEXT:  .LBB18_131:
-; RV32I-NEXT:    mv a6, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_119
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    li s0, 3
+; RV32I-NEXT:    beqz t0, .LBB18_121
 ; RV32I-NEXT:  .LBB18_132:
-; RV32I-NEXT:    mv a3, s11
-; RV32I-NEXT:    bnez t1, .LBB18_120
+; RV32I-NEXT:    mv a6, s5
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    beqz t4, .LBB18_122
 ; RV32I-NEXT:  .LBB18_133:
-; RV32I-NEXT:    lw a5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or a5, a5, a3
+; RV32I-NEXT:    mv a5, s11
+; RV32I-NEXT:    bnez t1, .LBB18_123
 ; RV32I-NEXT:  .LBB18_134:
-; RV32I-NEXT:    lw s11, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_148
-; RV32I-NEXT:  # %bb.135:
-; RV32I-NEXT:    beq t1, s2, .LBB18_149
-; RV32I-NEXT:  .LBB18_136:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_150
+; RV32I-NEXT:    or a5, s2, a5
+; RV32I-NEXT:  .LBB18_135:
+; RV32I-NEXT:    li s2, 5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB18_149
+; RV32I-NEXT:  # %bb.136:
+; RV32I-NEXT:    beq t1, s6, .LBB18_150
 ; RV32I-NEXT:  .LBB18_137:
-; RV32I-NEXT:    beq t1, s3, .LBB18_151
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB18_151
 ; RV32I-NEXT:  .LBB18_138:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_152
+; RV32I-NEXT:    beq t1, t5, .LBB18_152
 ; RV32I-NEXT:  .LBB18_139:
-; RV32I-NEXT:    beq t1, s5, .LBB18_153
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB18_153
 ; RV32I-NEXT:  .LBB18_140:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s0, .LBB18_154
+; RV32I-NEXT:    beq t1, s0, .LBB18_154
 ; RV32I-NEXT:  .LBB18_141:
-; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    mv s5, t2
 ; RV32I-NEXT:    bne t1, s10, .LBB18_155
 ; RV32I-NEXT:  .LBB18_142:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s1, .LBB18_156
-; RV32I-NEXT:  .LBB18_143:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s8, .LBB18_157
+; RV32I-NEXT:    bne t1, s2, .LBB18_156
+; RV32I-NEXT:  .LBB18_143:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s3, .LBB18_157
 ; RV32I-NEXT:  .LBB18_144:
-; RV32I-NEXT:    bnez t0, .LBB18_158
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    bne t1, s4, .LBB18_158
 ; RV32I-NEXT:  .LBB18_145:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_159
+; RV32I-NEXT:    bnez t0, .LBB18_159
 ; RV32I-NEXT:  .LBB18_146:
-; RV32I-NEXT:    beqz t1, .LBB18_160
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    bnez t4, .LBB18_160
 ; RV32I-NEXT:  .LBB18_147:
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_161
-; RV32I-NEXT:    j .LBB18_162
+; RV32I-NEXT:    beqz t1, .LBB18_161
 ; RV32I-NEXT:  .LBB18_148:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne t1, s2, .LBB18_136
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB18_162
+; RV32I-NEXT:    j .LBB18_163
 ; RV32I-NEXT:  .LBB18_149:
-; RV32I-NEXT:    or a5, ra, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_137
+; RV32I-NEXT:    mv s5, s9
+; RV32I-NEXT:    bne t1, s6, .LBB18_137
 ; RV32I-NEXT:  .LBB18_150:
-; RV32I-NEXT:    mv a3, s7
-; RV32I-NEXT:    bne t1, s3, .LBB18_138
+; RV32I-NEXT:    or a5, s1, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB18_138
 ; RV32I-NEXT:  .LBB18_151:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_139
+; RV32I-NEXT:    mv s5, s7
+; RV32I-NEXT:    bne t1, t5, .LBB18_139
 ; RV32I-NEXT:  .LBB18_152:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    bne t1, s5, .LBB18_140
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB18_140
 ; RV32I-NEXT:  .LBB18_153:
-; RV32I-NEXT:    or a5, s6, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s0, .LBB18_141
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:    bne t1, s0, .LBB18_141
 ; RV32I-NEXT:  .LBB18_154:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    or a5, s8, s5
+; RV32I-NEXT:    mv s5, t2
 ; RV32I-NEXT:    beq t1, s10, .LBB18_142
 ; RV32I-NEXT:  .LBB18_155:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s1, .LBB18_143
-; RV32I-NEXT:  .LBB18_156:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s8, .LBB18_144
+; RV32I-NEXT:    beq t1, s2, .LBB18_143
+; RV32I-NEXT:  .LBB18_156:
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s3, .LBB18_144
 ; RV32I-NEXT:  .LBB18_157:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    beqz t0, .LBB18_145
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    beq t1, s4, .LBB18_145
 ; RV32I-NEXT:  .LBB18_158:
-; RV32I-NEXT:    mv a7, a5
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_146
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    beqz t0, .LBB18_146
 ; RV32I-NEXT:  .LBB18_159:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bnez t1, .LBB18_147
+; RV32I-NEXT:    mv a7, a5
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    beqz t4, .LBB18_147
 ; RV32I-NEXT:  .LBB18_160:
-; RV32I-NEXT:    or a5, ra, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_162
+; RV32I-NEXT:    mv a5, s9
+; RV32I-NEXT:    bnez t1, .LBB18_148
 ; RV32I-NEXT:  .LBB18_161:
-; RV32I-NEXT:    mv a3, s7
+; RV32I-NEXT:    or a5, s1, a5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB18_163
 ; RV32I-NEXT:  .LBB18_162:
-; RV32I-NEXT:    beq t1, s2, .LBB18_174
-; RV32I-NEXT:  # %bb.163:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_175
-; RV32I-NEXT:  .LBB18_164:
-; RV32I-NEXT:    beq t1, s3, .LBB18_176
+; RV32I-NEXT:    mv s5, s7
+; RV32I-NEXT:  .LBB18_163:
+; RV32I-NEXT:    beq t1, s6, .LBB18_175
+; RV32I-NEXT:  # %bb.164:
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB18_176
 ; RV32I-NEXT:  .LBB18_165:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s5, .LBB18_177
+; RV32I-NEXT:    beq t1, t5, .LBB18_177
 ; RV32I-NEXT:  .LBB18_166:
-; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    mv s5, t2
 ; RV32I-NEXT:    bne t1, s0, .LBB18_178
 ; RV32I-NEXT:  .LBB18_167:
-; RV32I-NEXT:    mv a3, t2
+; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    bne t1, s10, .LBB18_179
 ; RV32I-NEXT:  .LBB18_168:
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s1, .LBB18_180
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s2, .LBB18_180
 ; RV32I-NEXT:  .LBB18_169:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s8, .LBB18_181
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    bne t1, s3, .LBB18_181
 ; RV32I-NEXT:  .LBB18_170:
-; RV32I-NEXT:    bnez t0, .LBB18_182
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s4, .LBB18_182
 ; RV32I-NEXT:  .LBB18_171:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_183
+; RV32I-NEXT:    bnez t0, .LBB18_183
 ; RV32I-NEXT:  .LBB18_172:
-; RV32I-NEXT:    beqz t1, .LBB18_184
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    bnez t4, .LBB18_184
 ; RV32I-NEXT:  .LBB18_173:
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_185
-; RV32I-NEXT:    j .LBB18_186
+; RV32I-NEXT:    beqz t1, .LBB18_185
 ; RV32I-NEXT:  .LBB18_174:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_164
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB18_186
+; RV32I-NEXT:    j .LBB18_187
 ; RV32I-NEXT:  .LBB18_175:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    bne t1, s3, .LBB18_165
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB18_165
 ; RV32I-NEXT:  .LBB18_176:
-; RV32I-NEXT:    or a5, s6, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s5, .LBB18_166
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:    bne t1, t5, .LBB18_166
 ; RV32I-NEXT:  .LBB18_177:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    or a5, s8, s5
+; RV32I-NEXT:    mv s5, t2
 ; RV32I-NEXT:    beq t1, s0, .LBB18_167
 ; RV32I-NEXT:  .LBB18_178:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    beq t1, s10, .LBB18_168
 ; RV32I-NEXT:  .LBB18_179:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s1, .LBB18_169
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s2, .LBB18_169
 ; RV32I-NEXT:  .LBB18_180:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s8, .LBB18_170
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    beq t1, s3, .LBB18_170
 ; RV32I-NEXT:  .LBB18_181:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    beqz t0, .LBB18_171
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s4, .LBB18_171
 ; RV32I-NEXT:  .LBB18_182:
-; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_172
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    beqz t0, .LBB18_172
 ; RV32I-NEXT:  .LBB18_183:
-; RV32I-NEXT:    mv a3, s7
-; RV32I-NEXT:    bnez t1, .LBB18_173
+; RV32I-NEXT:    mv a0, s5
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    beqz t4, .LBB18_173
 ; RV32I-NEXT:  .LBB18_184:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_186
+; RV32I-NEXT:    mv a5, s7
+; RV32I-NEXT:    bnez t1, .LBB18_174
 ; RV32I-NEXT:  .LBB18_185:
-; RV32I-NEXT:    mv a3, s4
+; RV32I-NEXT:    or a5, a3, a5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB18_187
 ; RV32I-NEXT:  .LBB18_186:
-; RV32I-NEXT:    beq t1, s2, .LBB18_197
-; RV32I-NEXT:  # %bb.187:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s3, .LBB18_198
-; RV32I-NEXT:  .LBB18_188:
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s5, .LBB18_199
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:  .LBB18_187:
+; RV32I-NEXT:    beq t1, s6, .LBB18_200
+; RV32I-NEXT:  # %bb.188:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, t5, .LBB18_201
 ; RV32I-NEXT:  .LBB18_189:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s0, .LBB18_200
-; RV32I-NEXT:  .LBB18_190:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s10, .LBB18_201
+; RV32I-NEXT:    bne t1, s0, .LBB18_202
+; RV32I-NEXT:  .LBB18_190:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s10, .LBB18_203
 ; RV32I-NEXT:  .LBB18_191:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s1, .LBB18_202
-; RV32I-NEXT:  .LBB18_192:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s8, .LBB18_203
+; RV32I-NEXT:    bne t1, s2, .LBB18_204
+; RV32I-NEXT:  .LBB18_192:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s3, .LBB18_205
 ; RV32I-NEXT:  .LBB18_193:
-; RV32I-NEXT:    bnez t0, .LBB18_204
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    bne t1, s4, .LBB18_206
 ; RV32I-NEXT:  .LBB18_194:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB18_205
+; RV32I-NEXT:    beqz t0, .LBB18_196
 ; RV32I-NEXT:  .LBB18_195:
-; RV32I-NEXT:    beqz t1, .LBB18_206
+; RV32I-NEXT:    mv t3, a5
 ; RV32I-NEXT:  .LBB18_196:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s2, .LBB18_207
-; RV32I-NEXT:    j .LBB18_208
-; RV32I-NEXT:  .LBB18_197:
-; RV32I-NEXT:    or a5, s6, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s3, .LBB18_188
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    beqz t4, .LBB18_198
+; RV32I-NEXT:  # %bb.197:
+; RV32I-NEXT:    mv a5, t6
 ; RV32I-NEXT:  .LBB18_198:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    beqz t1, .LBB18_207
+; RV32I-NEXT:  # %bb.199:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s5, .LBB18_189
-; RV32I-NEXT:  .LBB18_199:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s0, .LBB18_190
+; RV32I-NEXT:    mv t4, t2
+; RV32I-NEXT:    bne t1, s6, .LBB18_208
+; RV32I-NEXT:    j .LBB18_209
 ; RV32I-NEXT:  .LBB18_200:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s10, .LBB18_191
+; RV32I-NEXT:    or a5, s8, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, t5, .LBB18_189
 ; RV32I-NEXT:  .LBB18_201:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s1, .LBB18_192
-; RV32I-NEXT:  .LBB18_202:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s8, .LBB18_193
+; RV32I-NEXT:    beq t1, s0, .LBB18_190
+; RV32I-NEXT:  .LBB18_202:
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s10, .LBB18_191
 ; RV32I-NEXT:  .LBB18_203:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    beqz t0, .LBB18_194
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    beq t1, s2, .LBB18_192
 ; RV32I-NEXT:  .LBB18_204:
-; RV32I-NEXT:    mv t3, a5
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB18_195
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s3, .LBB18_193
 ; RV32I-NEXT:  .LBB18_205:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    bnez t1, .LBB18_196
-; RV32I-NEXT:  .LBB18_206:
-; RV32I-NEXT:    or a3, s6, a3
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s2, .LBB18_208
+; RV32I-NEXT:    beq t1, s4, .LBB18_194
+; RV32I-NEXT:  .LBB18_206:
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    bnez t0, .LBB18_195
+; RV32I-NEXT:    j .LBB18_196
 ; RV32I-NEXT:  .LBB18_207:
-; RV32I-NEXT:    mv a5, a3
+; RV32I-NEXT:    or a5, s8, a5
+; RV32I-NEXT:    mv t4, t2
+; RV32I-NEXT:    beq t1, s6, .LBB18_209
 ; RV32I-NEXT:  .LBB18_208:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s3, .LBB18_217
-; RV32I-NEXT:  # %bb.209:
+; RV32I-NEXT:    mv t4, a5
+; RV32I-NEXT:  .LBB18_209:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s5, .LBB18_218
-; RV32I-NEXT:  .LBB18_210:
-; RV32I-NEXT:    mv a3, t2
+; RV32I-NEXT:    bne t1, t5, .LBB18_218
+; RV32I-NEXT:  # %bb.210:
+; RV32I-NEXT:    mv t4, t2
 ; RV32I-NEXT:    bne t1, s0, .LBB18_219
 ; RV32I-NEXT:  .LBB18_211:
 ; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    bne t1, s10, .LBB18_220
 ; RV32I-NEXT:  .LBB18_212:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s1, .LBB18_221
+; RV32I-NEXT:    mv t4, t2
+; RV32I-NEXT:    bne t1, s2, .LBB18_221
 ; RV32I-NEXT:  .LBB18_213:
-; RV32I-NEXT:    bne t1, s8, .LBB18_222
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    bne t1, s3, .LBB18_222
 ; RV32I-NEXT:  .LBB18_214:
-; RV32I-NEXT:    beqz t0, .LBB18_216
+; RV32I-NEXT:    bne t1, s4, .LBB18_223
 ; RV32I-NEXT:  .LBB18_215:
-; RV32I-NEXT:    mv a1, t2
+; RV32I-NEXT:    beqz t0, .LBB18_217
 ; RV32I-NEXT:  .LBB18_216:
-; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv a1, t2
+; RV32I-NEXT:  .LBB18_217:
 ; RV32I-NEXT:    srli a5, ra, 16
 ; RV32I-NEXT:    lui t4, 16
 ; RV32I-NEXT:    srli t2, ra, 24
-; RV32I-NEXT:    srli t0, s11, 16
-; RV32I-NEXT:    srli t6, s11, 24
+; RV32I-NEXT:    srli t0, a3, 16
+; RV32I-NEXT:    srli t6, a3, 24
 ; RV32I-NEXT:    srli t1, a4, 16
 ; RV32I-NEXT:    srli s2, a4, 24
 ; RV32I-NEXT:    srli t5, a6, 16
 ; RV32I-NEXT:    srli s3, a6, 24
 ; RV32I-NEXT:    srli s1, a7, 16
-; RV32I-NEXT:    srli a3, a7, 24
+; RV32I-NEXT:    srli s6, a7, 24
 ; RV32I-NEXT:    srli s0, a0, 16
 ; RV32I-NEXT:    srli s5, a0, 24
 ; RV32I-NEXT:    srli s4, t3, 16
-; RV32I-NEXT:    srli s6, t3, 24
-; RV32I-NEXT:    srli s7, a1, 16
-; RV32I-NEXT:    srli s8, a1, 24
+; RV32I-NEXT:    srli s7, t3, 24
+; RV32I-NEXT:    srli s8, a1, 16
+; RV32I-NEXT:    srli s9, a1, 24
 ; RV32I-NEXT:    addi t4, t4, -1
-; RV32I-NEXT:    and s9, ra, t4
-; RV32I-NEXT:    and s10, s11, t4
-; RV32I-NEXT:    srli s9, s9, 8
+; RV32I-NEXT:    and s10, ra, t4
+; RV32I-NEXT:    and s11, a3, t4
+; RV32I-NEXT:    srli s10, s10, 8
 ; RV32I-NEXT:    sb ra, 0(a2)
-; RV32I-NEXT:    sb s9, 1(a2)
+; RV32I-NEXT:    sb s10, 1(a2)
 ; RV32I-NEXT:    sb a5, 2(a2)
 ; RV32I-NEXT:    sb t2, 3(a2)
 ; RV32I-NEXT:    and a5, a4, t4
-; RV32I-NEXT:    srli t2, s10, 8
-; RV32I-NEXT:    sb s11, 4(a2)
+; RV32I-NEXT:    srli t2, s11, 8
+; RV32I-NEXT:    sb a3, 4(a2)
 ; RV32I-NEXT:    sb t2, 5(a2)
 ; RV32I-NEXT:    sb t0, 6(a2)
 ; RV32I-NEXT:    sb t6, 7(a2)
-; RV32I-NEXT:    and t0, a6, t4
+; RV32I-NEXT:    and a3, a6, t4
 ; RV32I-NEXT:    srli a5, a5, 8
 ; RV32I-NEXT:    sb a4, 8(a2)
 ; RV32I-NEXT:    sb a5, 9(a2)
 ; RV32I-NEXT:    sb t1, 10(a2)
 ; RV32I-NEXT:    sb s2, 11(a2)
 ; RV32I-NEXT:    and a4, a7, t4
-; RV32I-NEXT:    srli a5, t0, 8
+; RV32I-NEXT:    srli a3, a3, 8
 ; RV32I-NEXT:    sb a6, 12(a2)
-; RV32I-NEXT:    sb a5, 13(a2)
+; RV32I-NEXT:    sb a3, 13(a2)
 ; RV32I-NEXT:    sb t5, 14(a2)
 ; RV32I-NEXT:    sb s3, 15(a2)
-; RV32I-NEXT:    and a5, a0, t4
+; RV32I-NEXT:    and a3, a0, t4
 ; RV32I-NEXT:    srli a4, a4, 8
 ; RV32I-NEXT:    sb a7, 16(a2)
 ; RV32I-NEXT:    sb a4, 17(a2)
 ; RV32I-NEXT:    sb s1, 18(a2)
-; RV32I-NEXT:    sb a3, 19(a2)
-; RV32I-NEXT:    and a3, t3, t4
-; RV32I-NEXT:    and a4, a1, t4
-; RV32I-NEXT:    srli a5, a5, 8
+; RV32I-NEXT:    sb s6, 19(a2)
+; RV32I-NEXT:    and a4, t3, t4
+; RV32I-NEXT:    and a5, a1, t4
 ; RV32I-NEXT:    srli a3, a3, 8
 ; RV32I-NEXT:    srli a4, a4, 8
+; RV32I-NEXT:    srli a5, a5, 8
 ; RV32I-NEXT:    sb a0, 20(a2)
-; RV32I-NEXT:    sb a5, 21(a2)
+; RV32I-NEXT:    sb a3, 21(a2)
 ; RV32I-NEXT:    sb s0, 22(a2)
 ; RV32I-NEXT:    sb s5, 23(a2)
 ; RV32I-NEXT:    sb t3, 24(a2)
-; RV32I-NEXT:    sb a3, 25(a2)
+; RV32I-NEXT:    sb a4, 25(a2)
 ; RV32I-NEXT:    sb s4, 26(a2)
-; RV32I-NEXT:    sb s6, 27(a2)
+; RV32I-NEXT:    sb s7, 27(a2)
 ; RV32I-NEXT:    sb a1, 28(a2)
-; RV32I-NEXT:    sb a4, 29(a2)
-; RV32I-NEXT:    sb s7, 30(a2)
-; RV32I-NEXT:    sb s8, 31(a2)
+; RV32I-NEXT:    sb a5, 29(a2)
+; RV32I-NEXT:    sb s8, 30(a2)
+; RV32I-NEXT:    sb s9, 31(a2)
 ; RV32I-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
@@ -10664,29 +10737,29 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    lw s11, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 80
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB18_217:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s5, .LBB18_210
 ; RV32I-NEXT:  .LBB18_218:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
+; RV32I-NEXT:    mv a5, t4
+; RV32I-NEXT:    mv t4, t2
 ; RV32I-NEXT:    beq t1, s0, .LBB18_211
 ; RV32I-NEXT:  .LBB18_219:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    mv t4, a5
 ; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    beq t1, s10, .LBB18_212
 ; RV32I-NEXT:  .LBB18_220:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s1, .LBB18_213
+; RV32I-NEXT:    mv a5, t4
+; RV32I-NEXT:    mv t4, t2
+; RV32I-NEXT:    beq t1, s2, .LBB18_213
 ; RV32I-NEXT:  .LBB18_221:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    beq t1, s8, .LBB18_214
+; RV32I-NEXT:    mv t4, a5
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    beq t1, s3, .LBB18_214
 ; RV32I-NEXT:  .LBB18_222:
-; RV32I-NEXT:    mv t2, a3
-; RV32I-NEXT:    bnez t0, .LBB18_215
-; RV32I-NEXT:    j .LBB18_216
+; RV32I-NEXT:    mv a5, t4
+; RV32I-NEXT:    beq t1, s4, .LBB18_215
+; RV32I-NEXT:  .LBB18_223:
+; RV32I-NEXT:    mv t2, a5
+; RV32I-NEXT:    bnez t0, .LBB18_216
+; RV32I-NEXT:    j .LBB18_217
   %src = load i256, ptr %src.ptr, align 1
   %byteOff = load i256, ptr %byteOff.ptr, align 1
   %bitOff = shl i256 %byteOff, 3
@@ -11070,223 +11143,231 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
 ; RV32I-NEXT:    slli t4, t4, 16
 ; RV32I-NEXT:    slli t5, s0, 16
 ; RV32I-NEXT:    slli s4, s4, 16
-; RV32I-NEXT:    slli a3, a1, 16
+; RV32I-NEXT:    slli t0, a1, 16
 ; RV32I-NEXT:    or s5, t4, t1
 ; RV32I-NEXT:    or a1, s4, s1
-; RV32I-NEXT:    or t0, a3, s2
+; RV32I-NEXT:    or t0, t0, s2
 ; RV32I-NEXT:    slli t0, t0, 5
 ; RV32I-NEXT:    srli t1, t0, 5
 ; RV32I-NEXT:    andi t4, t0, 31
-; RV32I-NEXT:    neg a3, t4
+; RV32I-NEXT:    neg ra, t4
 ; RV32I-NEXT:    beqz t4, .LBB19_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sll a5, s5, a3
+; RV32I-NEXT:    sll a5, s5, ra
 ; RV32I-NEXT:  .LBB19_2:
-; RV32I-NEXT:    or s10, t6, a4
+; RV32I-NEXT:    or s2, t6, a4
 ; RV32I-NEXT:    lbu t6, 12(a0)
 ; RV32I-NEXT:    lbu s0, 19(a0)
 ; RV32I-NEXT:    slli s1, a7, 8
 ; RV32I-NEXT:    or a6, t3, a6
-; RV32I-NEXT:    or a4, t5, t2
+; RV32I-NEXT:    or a3, t5, t2
 ; RV32I-NEXT:    srai t2, a1, 31
 ; RV32I-NEXT:    beqz t1, .LBB19_4
 ; RV32I-NEXT:  # %bb.3:
+; RV32I-NEXT:    mv a4, s2
 ; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    j .LBB19_5
 ; RV32I-NEXT:  .LBB19_4:
-; RV32I-NEXT:    srl a7, s10, t0
+; RV32I-NEXT:    mv a4, s2
+; RV32I-NEXT:    srl a7, s2, t0
 ; RV32I-NEXT:    or a5, a7, a5
 ; RV32I-NEXT:  .LBB19_5:
 ; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    lbu s3, 17(a0)
+; RV32I-NEXT:    lbu s2, 17(a0)
 ; RV32I-NEXT:    lbu t3, 18(a0)
-; RV32I-NEXT:    slli s0, s0, 8
-; RV32I-NEXT:    or s4, s1, t6
+; RV32I-NEXT:    slli s4, s0, 8
+; RV32I-NEXT:    or s3, s1, t6
 ; RV32I-NEXT:    slli a6, a6, 16
-; RV32I-NEXT:    li s6, 1
-; RV32I-NEXT:    sll s2, a4, a3
+; RV32I-NEXT:    li t5, 1
+; RV32I-NEXT:    sll s8, a3, ra
 ; RV32I-NEXT:    beqz t4, .LBB19_7
 ; RV32I-NEXT:  # %bb.6:
-; RV32I-NEXT:    mv a7, s2
+; RV32I-NEXT:    mv a7, s8
 ; RV32I-NEXT:  .LBB19_7:
-; RV32I-NEXT:    lbu t5, 16(a0)
-; RV32I-NEXT:    lbu t6, 23(a0)
-; RV32I-NEXT:    slli s1, s3, 8
-; RV32I-NEXT:    or s0, s0, t3
-; RV32I-NEXT:    srl s3, s5, t0
-; RV32I-NEXT:    or a6, a6, s4
-; RV32I-NEXT:    bne t1, s6, .LBB19_9
+; RV32I-NEXT:    lbu t6, 16(a0)
+; RV32I-NEXT:    lbu s0, 23(a0)
+; RV32I-NEXT:    slli s2, s2, 8
+; RV32I-NEXT:    or s1, s4, t3
+; RV32I-NEXT:    srl t3, s5, t0
+; RV32I-NEXT:    or a6, a6, s3
+; RV32I-NEXT:    sw t3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    beq t1, t5, .LBB19_9
 ; RV32I-NEXT:  # %bb.8:
-; RV32I-NEXT:    or a5, s3, a7
+; RV32I-NEXT:    li t5, 1
+; RV32I-NEXT:    j .LBB19_10
 ; RV32I-NEXT:  .LBB19_9:
+; RV32I-NEXT:    li t5, 1
+; RV32I-NEXT:    or a5, t3, a7
+; RV32I-NEXT:  .LBB19_10:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    lbu s6, 21(a0)
+; RV32I-NEXT:    lbu s3, 21(a0)
 ; RV32I-NEXT:    lbu a7, 22(a0)
-; RV32I-NEXT:    slli s4, t6, 8
-; RV32I-NEXT:    or s7, s1, t5
-; RV32I-NEXT:    slli s8, s0, 16
-; RV32I-NEXT:    li s9, 2
-; RV32I-NEXT:    sll s0, a6, a3
-; RV32I-NEXT:    beqz t4, .LBB19_11
-; RV32I-NEXT:  # %bb.10:
-; RV32I-NEXT:    mv t3, s0
-; RV32I-NEXT:  .LBB19_11:
-; RV32I-NEXT:    lbu t5, 20(a0)
-; RV32I-NEXT:    lbu t6, 27(a0)
-; RV32I-NEXT:    slli s6, s6, 8
-; RV32I-NEXT:    or s4, s4, a7
-; RV32I-NEXT:    srl s1, a4, t0
-; RV32I-NEXT:    or a7, s8, s7
-; RV32I-NEXT:    bne t1, s9, .LBB19_13
-; RV32I-NEXT:  # %bb.12:
-; RV32I-NEXT:    or a5, s1, t3
-; RV32I-NEXT:  .LBB19_13:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    lbu s8, 25(a0)
-; RV32I-NEXT:    lbu s7, 26(a0)
-; RV32I-NEXT:    slli t6, t6, 8
-; RV32I-NEXT:    or s6, s6, t5
-; RV32I-NEXT:    slli s9, s4, 16
-; RV32I-NEXT:    li s11, 3
-; RV32I-NEXT:    sll t5, a7, a3
-; RV32I-NEXT:    beqz t4, .LBB19_15
-; RV32I-NEXT:  # %bb.14:
-; RV32I-NEXT:    mv t3, t5
-; RV32I-NEXT:  .LBB19_15:
-; RV32I-NEXT:    lbu s4, 24(a0)
-; RV32I-NEXT:    slli s8, s8, 8
-; RV32I-NEXT:    or s7, t6, s7
+; RV32I-NEXT:    slli s7, s0, 8
+; RV32I-NEXT:    or s4, s2, t6
+; RV32I-NEXT:    slli s9, s1, 16
+; RV32I-NEXT:    li t6, 2
+; RV32I-NEXT:    sll s6, a6, ra
+; RV32I-NEXT:    beqz t4, .LBB19_12
+; RV32I-NEXT:  # %bb.11:
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:  .LBB19_12:
+; RV32I-NEXT:    lbu s0, 20(a0)
+; RV32I-NEXT:    lbu s1, 27(a0)
+; RV32I-NEXT:    slli s3, s3, 8
+; RV32I-NEXT:    or s2, s7, a7
+; RV32I-NEXT:    srl s7, a3, t0
+; RV32I-NEXT:    or a7, s9, s4
+; RV32I-NEXT:    sw a3, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s7, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    bne t1, t6, .LBB19_14
+; RV32I-NEXT:  # %bb.13:
+; RV32I-NEXT:    or a5, s7, t3
+; RV32I-NEXT:  .LBB19_14:
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    lbu s7, 25(a0)
+; RV32I-NEXT:    lbu s4, 26(a0)
+; RV32I-NEXT:    slli s11, s1, 8
+; RV32I-NEXT:    or s9, s3, s0
+; RV32I-NEXT:    slli s2, s2, 16
+; RV32I-NEXT:    li a3, 3
+; RV32I-NEXT:    sll s10, a7, ra
+; RV32I-NEXT:    beqz t4, .LBB19_16
+; RV32I-NEXT:  # %bb.15:
+; RV32I-NEXT:    mv t3, s10
+; RV32I-NEXT:  .LBB19_16:
+; RV32I-NEXT:    lbu s1, 24(a0)
+; RV32I-NEXT:    slli s7, s7, 8
+; RV32I-NEXT:    or s3, s11, s4
 ; RV32I-NEXT:    srl t6, a6, t0
-; RV32I-NEXT:    or a0, s9, s6
-; RV32I-NEXT:    sw s5, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s10, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    bne t1, s11, .LBB19_17
-; RV32I-NEXT:  # %bb.16:
+; RV32I-NEXT:    or a0, s2, s9
+; RV32I-NEXT:    bne t1, a3, .LBB19_18
+; RV32I-NEXT:  # %bb.17:
 ; RV32I-NEXT:    or a5, t6, t3
-; RV32I-NEXT:  .LBB19_17:
-; RV32I-NEXT:    li s6, 0
-; RV32I-NEXT:    or t3, s8, s4
-; RV32I-NEXT:    slli s7, s7, 16
-; RV32I-NEXT:    li s10, 4
-; RV32I-NEXT:    sll s11, a0, a3
-; RV32I-NEXT:    beqz t4, .LBB19_19
-; RV32I-NEXT:  # %bb.18:
-; RV32I-NEXT:    mv s6, s11
-; RV32I-NEXT:  .LBB19_19:
-; RV32I-NEXT:    srl s4, a7, t0
-; RV32I-NEXT:    or t3, s7, t3
-; RV32I-NEXT:    sw s4, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    bne t1, s10, .LBB19_21
-; RV32I-NEXT:  # %bb.20:
-; RV32I-NEXT:    or a5, s4, s6
-; RV32I-NEXT:  .LBB19_21:
-; RV32I-NEXT:    li s4, 0
-; RV32I-NEXT:    li s5, 5
-; RV32I-NEXT:    sll s6, t3, a3
-; RV32I-NEXT:    sw s6, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    beqz t4, .LBB19_23
-; RV32I-NEXT:  # %bb.22:
-; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:  .LBB19_23:
-; RV32I-NEXT:    srl s6, a0, t0
-; RV32I-NEXT:    beq t1, s5, .LBB19_25
-; RV32I-NEXT:  # %bb.24:
-; RV32I-NEXT:    mv ra, s6
-; RV32I-NEXT:    j .LBB19_26
-; RV32I-NEXT:  .LBB19_25:
-; RV32I-NEXT:    mv ra, s6
-; RV32I-NEXT:    or a5, s6, s4
+; RV32I-NEXT:  .LBB19_18:
+; RV32I-NEXT:    li s2, 0
+; RV32I-NEXT:    or t3, s7, s1
+; RV32I-NEXT:    slli s3, s3, 16
+; RV32I-NEXT:    sll s11, a0, ra
+; RV32I-NEXT:    beqz t4, .LBB19_20
+; RV32I-NEXT:  # %bb.19:
+; RV32I-NEXT:    mv s2, s11
+; RV32I-NEXT:  .LBB19_20:
+; RV32I-NEXT:    srl t6, a7, t0
+; RV32I-NEXT:    or t3, s3, t3
+; RV32I-NEXT:    li a3, 4
+; RV32I-NEXT:    bne t1, a3, .LBB19_22
+; RV32I-NEXT:  # %bb.21:
+; RV32I-NEXT:    or a5, t6, s2
+; RV32I-NEXT:  .LBB19_22:
+; RV32I-NEXT:    li s3, 0
+; RV32I-NEXT:    li s0, 5
+; RV32I-NEXT:    sll s9, t3, ra
+; RV32I-NEXT:    beqz t4, .LBB19_24
+; RV32I-NEXT:  # %bb.23:
+; RV32I-NEXT:    mv s3, s9
+; RV32I-NEXT:  .LBB19_24:
+; RV32I-NEXT:    srl a3, a0, t0
+; RV32I-NEXT:    beq t1, s0, .LBB19_26
+; RV32I-NEXT:  # %bb.25:
+; RV32I-NEXT:    mv s1, a3
+; RV32I-NEXT:    j .LBB19_27
 ; RV32I-NEXT:  .LBB19_26:
+; RV32I-NEXT:    mv s1, a3
+; RV32I-NEXT:    or a5, a3, s3
+; RV32I-NEXT:  .LBB19_27:
 ; RV32I-NEXT:    li s4, 0
-; RV32I-NEXT:    li s8, 6
-; RV32I-NEXT:    sll s7, a1, a3
-; RV32I-NEXT:    beqz t4, .LBB19_28
-; RV32I-NEXT:  # %bb.27:
+; RV32I-NEXT:    li s3, 6
+; RV32I-NEXT:    sll s7, a1, ra
+; RV32I-NEXT:    beqz t4, .LBB19_29
+; RV32I-NEXT:  # %bb.28:
 ; RV32I-NEXT:    mv s4, s7
-; RV32I-NEXT:  .LBB19_28:
-; RV32I-NEXT:    srl s5, t3, t0
-; RV32I-NEXT:    beq t1, s8, .LBB19_30
-; RV32I-NEXT:  # %bb.29:
-; RV32I-NEXT:    mv s9, s5
-; RV32I-NEXT:    j .LBB19_31
-; RV32I-NEXT:  .LBB19_30:
-; RV32I-NEXT:    mv s9, s5
-; RV32I-NEXT:    or a5, s5, s4
+; RV32I-NEXT:  .LBB19_29:
+; RV32I-NEXT:    srl s0, t3, t0
+; RV32I-NEXT:    sw s5, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s2, t6
+; RV32I-NEXT:    bne t1, s3, .LBB19_31
+; RV32I-NEXT:  # %bb.30:
+; RV32I-NEXT:    or a5, s0, s4
 ; RV32I-NEXT:  .LBB19_31:
 ; RV32I-NEXT:    li s5, 0
-; RV32I-NEXT:    li s6, 7
-; RV32I-NEXT:    sll s4, t2, a3
+; RV32I-NEXT:    li s4, 7
+; RV32I-NEXT:    sll t6, t2, ra
 ; RV32I-NEXT:    beqz t4, .LBB19_33
 ; RV32I-NEXT:  # %bb.32:
-; RV32I-NEXT:    mv s5, s4
+; RV32I-NEXT:    mv s5, t6
 ; RV32I-NEXT:  .LBB19_33:
 ; RV32I-NEXT:    srl a3, a1, t0
-; RV32I-NEXT:    bne t1, s6, .LBB19_35
+; RV32I-NEXT:    mv ra, a4
+; RV32I-NEXT:    beq t1, s4, .LBB19_35
 ; RV32I-NEXT:  # %bb.34:
-; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    mv a4, a3
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    bnez t0, .LBB19_36
+; RV32I-NEXT:    j .LBB19_37
 ; RV32I-NEXT:  .LBB19_35:
-; RV32I-NEXT:    li s5, 3
-; RV32I-NEXT:    mv s6, a3
-; RV32I-NEXT:    bnez t0, .LBB19_39
-; RV32I-NEXT:  # %bb.36:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_40
+; RV32I-NEXT:    mv a4, a3
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    beqz t0, .LBB19_37
+; RV32I-NEXT:  .LBB19_36:
+; RV32I-NEXT:    mv ra, a5
 ; RV32I-NEXT:  .LBB19_37:
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    li s0, 3
+; RV32I-NEXT:    beqz t4, .LBB19_39
+; RV32I-NEXT:  # %bb.38:
+; RV32I-NEXT:    mv a5, s8
+; RV32I-NEXT:  .LBB19_39:
 ; RV32I-NEXT:    beqz t1, .LBB19_41
-; RV32I-NEXT:  .LBB19_38:
+; RV32I-NEXT:  # %bb.40:
 ; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    j .LBB19_42
-; RV32I-NEXT:  .LBB19_39:
-; RV32I-NEXT:    sw a5, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_37
-; RV32I-NEXT:  .LBB19_40:
-; RV32I-NEXT:    mv a3, s2
-; RV32I-NEXT:    bnez t1, .LBB19_38
 ; RV32I-NEXT:  .LBB19_41:
-; RV32I-NEXT:    or a5, s3, a3
+; RV32I-NEXT:    lw s5, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a5, s5, a5
 ; RV32I-NEXT:  .LBB19_42:
-; RV32I-NEXT:    li s2, 1
-; RV32I-NEXT:    li s3, 2
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_61
+; RV32I-NEXT:    mv s8, a4
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB19_44
 ; RV32I-NEXT:  # %bb.43:
-; RV32I-NEXT:    beq t1, s2, .LBB19_62
+; RV32I-NEXT:    mv s5, s6
 ; RV32I-NEXT:  .LBB19_44:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_63
-; RV32I-NEXT:  .LBB19_45:
-; RV32I-NEXT:    beq t1, s3, .LBB19_64
+; RV32I-NEXT:    bne t1, t5, .LBB19_46
+; RV32I-NEXT:  # %bb.45:
+; RV32I-NEXT:    lw a4, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a5, a4, s5
 ; RV32I-NEXT:  .LBB19_46:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_65
-; RV32I-NEXT:  .LBB19_47:
-; RV32I-NEXT:    beq t1, s5, .LBB19_66
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    li t5, 2
+; RV32I-NEXT:    bnez t4, .LBB19_61
+; RV32I-NEXT:  # %bb.47:
+; RV32I-NEXT:    beq t1, t5, .LBB19_62
 ; RV32I-NEXT:  .LBB19_48:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_67
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB19_63
 ; RV32I-NEXT:  .LBB19_49:
-; RV32I-NEXT:    bne t1, s10, .LBB19_51
+; RV32I-NEXT:    beq t1, s0, .LBB19_64
 ; RV32I-NEXT:  .LBB19_50:
-; RV32I-NEXT:    or a5, ra, a3
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB19_65
 ; RV32I-NEXT:  .LBB19_51:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    li s10, 5
-; RV32I-NEXT:    bnez t4, .LBB19_68
-; RV32I-NEXT:  # %bb.52:
-; RV32I-NEXT:    beq t1, s10, .LBB19_69
+; RV32I-NEXT:    li a4, 4
+; RV32I-NEXT:    beq t1, a4, .LBB19_66
+; RV32I-NEXT:  .LBB19_52:
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB19_67
 ; RV32I-NEXT:  .LBB19_53:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_70
+; RV32I-NEXT:    li a4, 5
+; RV32I-NEXT:    beq t1, a4, .LBB19_68
 ; RV32I-NEXT:  .LBB19_54:
-; RV32I-NEXT:    bne t1, s8, .LBB19_56
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB19_69
 ; RV32I-NEXT:  .LBB19_55:
-; RV32I-NEXT:    or a5, s6, a3
+; RV32I-NEXT:    beq t1, s3, .LBB19_70
 ; RV32I-NEXT:  .LBB19_56:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    li s8, 7
-; RV32I-NEXT:    bne t1, s8, .LBB19_71
-; RV32I-NEXT:  # %bb.57:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s4, .LBB19_71
+; RV32I-NEXT:  .LBB19_57:
 ; RV32I-NEXT:    bnez t0, .LBB19_72
 ; RV32I-NEXT:  .LBB19_58:
 ; RV32I-NEXT:    li a5, 0
@@ -11297,565 +11378,570 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
 ; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    j .LBB19_75
 ; RV32I-NEXT:  .LBB19_61:
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:    bne t1, s2, .LBB19_44
+; RV32I-NEXT:    mv s5, s10
+; RV32I-NEXT:    bne t1, t5, .LBB19_48
 ; RV32I-NEXT:  .LBB19_62:
-; RV32I-NEXT:    or a5, s1, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_45
+; RV32I-NEXT:    srl a4, a6, t0
+; RV32I-NEXT:    or a5, a4, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB19_49
 ; RV32I-NEXT:  .LBB19_63:
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    bne t1, s3, .LBB19_46
+; RV32I-NEXT:    mv s5, s11
+; RV32I-NEXT:    bne t1, s0, .LBB19_50
 ; RV32I-NEXT:  .LBB19_64:
-; RV32I-NEXT:    or a5, t6, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_47
+; RV32I-NEXT:    or a5, s2, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB19_51
 ; RV32I-NEXT:  .LBB19_65:
-; RV32I-NEXT:    mv a3, s11
-; RV32I-NEXT:    bne t1, s5, .LBB19_48
+; RV32I-NEXT:    mv s5, s9
+; RV32I-NEXT:    li a4, 4
+; RV32I-NEXT:    bne t1, a4, .LBB19_52
 ; RV32I-NEXT:  .LBB19_66:
-; RV32I-NEXT:    lw a5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or a5, a5, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_49
+; RV32I-NEXT:    or a5, s1, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB19_53
 ; RV32I-NEXT:  .LBB19_67:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    beq t1, s10, .LBB19_50
-; RV32I-NEXT:    j .LBB19_51
+; RV32I-NEXT:    mv s5, s7
+; RV32I-NEXT:    li a4, 5
+; RV32I-NEXT:    bne t1, a4, .LBB19_54
 ; RV32I-NEXT:  .LBB19_68:
-; RV32I-NEXT:    mv a3, s7
-; RV32I-NEXT:    bne t1, s10, .LBB19_53
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB19_55
 ; RV32I-NEXT:  .LBB19_69:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_54
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:    bne t1, s3, .LBB19_56
 ; RV32I-NEXT:  .LBB19_70:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    beq t1, s8, .LBB19_55
-; RV32I-NEXT:    j .LBB19_56
+; RV32I-NEXT:    or a5, s8, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s4, .LBB19_57
 ; RV32I-NEXT:  .LBB19_71:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:    beqz t0, .LBB19_58
 ; RV32I-NEXT:  .LBB19_72:
-; RV32I-NEXT:    sw a3, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 24(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    li a5, 0
 ; RV32I-NEXT:    beqz t4, .LBB19_59
 ; RV32I-NEXT:  .LBB19_73:
-; RV32I-NEXT:    mv a5, s0
+; RV32I-NEXT:    mv a5, s6
 ; RV32I-NEXT:    bnez t1, .LBB19_60
 ; RV32I-NEXT:  .LBB19_74:
-; RV32I-NEXT:    or a5, s1, a5
+; RV32I-NEXT:    lw a4, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a5, a4, a5
 ; RV32I-NEXT:  .LBB19_75:
-; RV32I-NEXT:    li s0, 4
-; RV32I-NEXT:    li s1, 6
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li s6, 1
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    bnez t4, .LBB19_91
 ; RV32I-NEXT:  # %bb.76:
-; RV32I-NEXT:    beq t1, s2, .LBB19_92
+; RV32I-NEXT:    beq t1, s6, .LBB19_92
 ; RV32I-NEXT:  .LBB19_77:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    bnez t4, .LBB19_93
 ; RV32I-NEXT:  .LBB19_78:
-; RV32I-NEXT:    beq t1, s3, .LBB19_94
+; RV32I-NEXT:    beq t1, t5, .LBB19_94
 ; RV32I-NEXT:  .LBB19_79:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    bnez t4, .LBB19_95
 ; RV32I-NEXT:  .LBB19_80:
-; RV32I-NEXT:    beq t1, s5, .LBB19_96
+; RV32I-NEXT:    beq t1, s0, .LBB19_96
 ; RV32I-NEXT:  .LBB19_81:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    bnez t4, .LBB19_97
 ; RV32I-NEXT:  .LBB19_82:
-; RV32I-NEXT:    beq t1, s0, .LBB19_98
+; RV32I-NEXT:    li a4, 4
+; RV32I-NEXT:    beq t1, a4, .LBB19_98
 ; RV32I-NEXT:  .LBB19_83:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    bnez t4, .LBB19_99
 ; RV32I-NEXT:  .LBB19_84:
-; RV32I-NEXT:    beq t1, s10, .LBB19_100
+; RV32I-NEXT:    li a4, 5
+; RV32I-NEXT:    beq t1, a4, .LBB19_100
 ; RV32I-NEXT:  .LBB19_85:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s1, .LBB19_101
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s3, .LBB19_101
 ; RV32I-NEXT:  .LBB19_86:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s8, .LBB19_102
+; RV32I-NEXT:    bne t1, s4, .LBB19_102
 ; RV32I-NEXT:  .LBB19_87:
 ; RV32I-NEXT:    bnez t0, .LBB19_103
 ; RV32I-NEXT:  .LBB19_88:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a5, 0
 ; RV32I-NEXT:    bnez t4, .LBB19_104
 ; RV32I-NEXT:  .LBB19_89:
 ; RV32I-NEXT:    beqz t1, .LBB19_105
 ; RV32I-NEXT:  .LBB19_90:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_106
-; RV32I-NEXT:    j .LBB19_107
+; RV32I-NEXT:    j .LBB19_106
 ; RV32I-NEXT:  .LBB19_91:
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    bne t1, s2, .LBB19_77
+; RV32I-NEXT:    mv s5, s10
+; RV32I-NEXT:    bne t1, s6, .LBB19_77
 ; RV32I-NEXT:  .LBB19_92:
-; RV32I-NEXT:    or a5, t6, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    srl a4, a6, t0
+; RV32I-NEXT:    or a5, a4, s5
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    beqz t4, .LBB19_78
 ; RV32I-NEXT:  .LBB19_93:
-; RV32I-NEXT:    mv a3, s11
-; RV32I-NEXT:    bne t1, s3, .LBB19_79
+; RV32I-NEXT:    mv s5, s11
+; RV32I-NEXT:    bne t1, t5, .LBB19_79
 ; RV32I-NEXT:  .LBB19_94:
-; RV32I-NEXT:    lw a5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or a5, a5, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a5, s2, s5
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    beqz t4, .LBB19_80
 ; RV32I-NEXT:  .LBB19_95:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne t1, s5, .LBB19_81
+; RV32I-NEXT:    mv s5, s9
+; RV32I-NEXT:    bne t1, s0, .LBB19_81
 ; RV32I-NEXT:  .LBB19_96:
-; RV32I-NEXT:    or a5, ra, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a5, s1, s5
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    beqz t4, .LBB19_82
 ; RV32I-NEXT:  .LBB19_97:
-; RV32I-NEXT:    mv a3, s7
-; RV32I-NEXT:    bne t1, s0, .LBB19_83
+; RV32I-NEXT:    mv s5, s7
+; RV32I-NEXT:    li a4, 4
+; RV32I-NEXT:    bne t1, a4, .LBB19_83
 ; RV32I-NEXT:  .LBB19_98:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    beqz t4, .LBB19_84
 ; RV32I-NEXT:  .LBB19_99:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    bne t1, s10, .LBB19_85
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:    li a4, 5
+; RV32I-NEXT:    bne t1, a4, .LBB19_85
 ; RV32I-NEXT:  .LBB19_100:
-; RV32I-NEXT:    or a5, s6, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s1, .LBB19_86
+; RV32I-NEXT:    or a5, s8, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s3, .LBB19_86
 ; RV32I-NEXT:  .LBB19_101:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s8, .LBB19_87
+; RV32I-NEXT:    beq t1, s4, .LBB19_87
 ; RV32I-NEXT:  .LBB19_102:
-; RV32I-NEXT:    mv a5, a3
+; RV32I-NEXT:    mv a5, s5
 ; RV32I-NEXT:    beqz t0, .LBB19_88
 ; RV32I-NEXT:  .LBB19_103:
-; RV32I-NEXT:    mv a4, a5
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    sw a5, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a5, 0
 ; RV32I-NEXT:    beqz t4, .LBB19_89
 ; RV32I-NEXT:  .LBB19_104:
-; RV32I-NEXT:    mv a3, t5
+; RV32I-NEXT:    mv a5, s10
 ; RV32I-NEXT:    bnez t1, .LBB19_90
 ; RV32I-NEXT:  .LBB19_105:
-; RV32I-NEXT:    or a5, t6, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_107
+; RV32I-NEXT:    srl a4, a6, t0
+; RV32I-NEXT:    or a5, a4, a5
 ; RV32I-NEXT:  .LBB19_106:
-; RV32I-NEXT:    mv a3, s11
-; RV32I-NEXT:  .LBB19_107:
-; RV32I-NEXT:    beq t1, s2, .LBB19_121
-; RV32I-NEXT:  # %bb.108:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_122
+; RV32I-NEXT:    lw a4, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    li s10, 4
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB19_124
+; RV32I-NEXT:  # %bb.107:
+; RV32I-NEXT:    beq t1, s6, .LBB19_125
+; RV32I-NEXT:  .LBB19_108:
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB19_126
 ; RV32I-NEXT:  .LBB19_109:
-; RV32I-NEXT:    beq t1, s3, .LBB19_123
+; RV32I-NEXT:    beq t1, t5, .LBB19_127
 ; RV32I-NEXT:  .LBB19_110:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_124
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB19_128
 ; RV32I-NEXT:  .LBB19_111:
-; RV32I-NEXT:    beq t1, s5, .LBB19_125
+; RV32I-NEXT:    beq t1, s0, .LBB19_129
 ; RV32I-NEXT:  .LBB19_112:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_126
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB19_130
 ; RV32I-NEXT:  .LBB19_113:
-; RV32I-NEXT:    beq t1, s0, .LBB19_127
+; RV32I-NEXT:    mv s0, a3
+; RV32I-NEXT:    bne t1, s10, .LBB19_115
 ; RV32I-NEXT:  .LBB19_114:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s10, .LBB19_128
+; RV32I-NEXT:    or a5, s8, s5
 ; RV32I-NEXT:  .LBB19_115:
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s1, .LBB19_129
-; RV32I-NEXT:  .LBB19_116:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s8, .LBB19_130
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    li a3, 5
+; RV32I-NEXT:    beq t1, a3, .LBB19_117
+; RV32I-NEXT:  # %bb.116:
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:  .LBB19_117:
-; RV32I-NEXT:    bnez t0, .LBB19_131
-; RV32I-NEXT:  .LBB19_118:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_132
-; RV32I-NEXT:  .LBB19_119:
-; RV32I-NEXT:    beqz t1, .LBB19_133
-; RV32I-NEXT:  .LBB19_120:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    j .LBB19_134
+; RV32I-NEXT:    beq t1, s3, .LBB19_119
+; RV32I-NEXT:  # %bb.118:
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:  .LBB19_119:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    bne t1, s4, .LBB19_131
+; RV32I-NEXT:  # %bb.120:
+; RV32I-NEXT:    li s0, 3
+; RV32I-NEXT:    bnez t0, .LBB19_132
 ; RV32I-NEXT:  .LBB19_121:
-; RV32I-NEXT:    lw a5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or a5, a5, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_109
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    bnez t4, .LBB19_133
 ; RV32I-NEXT:  .LBB19_122:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne t1, s3, .LBB19_110
+; RV32I-NEXT:    beqz t1, .LBB19_134
 ; RV32I-NEXT:  .LBB19_123:
-; RV32I-NEXT:    or a5, ra, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_111
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    j .LBB19_135
 ; RV32I-NEXT:  .LBB19_124:
-; RV32I-NEXT:    mv a3, s7
-; RV32I-NEXT:    bne t1, s5, .LBB19_112
+; RV32I-NEXT:    mv s5, s11
+; RV32I-NEXT:    bne t1, s6, .LBB19_108
 ; RV32I-NEXT:  .LBB19_125:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_113
+; RV32I-NEXT:    or a5, s2, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB19_109
 ; RV32I-NEXT:  .LBB19_126:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    bne t1, s0, .LBB19_114
+; RV32I-NEXT:    mv s5, s9
+; RV32I-NEXT:    bne t1, t5, .LBB19_110
 ; RV32I-NEXT:  .LBB19_127:
-; RV32I-NEXT:    or a5, s6, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s10, .LBB19_115
+; RV32I-NEXT:    or a5, s1, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB19_111
 ; RV32I-NEXT:  .LBB19_128:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s1, .LBB19_116
+; RV32I-NEXT:    mv s5, s7
+; RV32I-NEXT:    bne t1, s0, .LBB19_112
 ; RV32I-NEXT:  .LBB19_129:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s8, .LBB19_117
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB19_113
 ; RV32I-NEXT:  .LBB19_130:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    beqz t0, .LBB19_118
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:    mv s0, a3
+; RV32I-NEXT:    beq t1, s10, .LBB19_114
+; RV32I-NEXT:    j .LBB19_115
 ; RV32I-NEXT:  .LBB19_131:
-; RV32I-NEXT:    mv a6, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_119
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    li s0, 3
+; RV32I-NEXT:    beqz t0, .LBB19_121
 ; RV32I-NEXT:  .LBB19_132:
-; RV32I-NEXT:    mv a3, s11
-; RV32I-NEXT:    bnez t1, .LBB19_120
+; RV32I-NEXT:    mv a6, s5
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    beqz t4, .LBB19_122
 ; RV32I-NEXT:  .LBB19_133:
-; RV32I-NEXT:    lw a5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or a5, a5, a3
+; RV32I-NEXT:    mv a5, s11
+; RV32I-NEXT:    bnez t1, .LBB19_123
 ; RV32I-NEXT:  .LBB19_134:
-; RV32I-NEXT:    lw s11, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_148
-; RV32I-NEXT:  # %bb.135:
-; RV32I-NEXT:    beq t1, s2, .LBB19_149
-; RV32I-NEXT:  .LBB19_136:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_150
+; RV32I-NEXT:    or a5, s2, a5
+; RV32I-NEXT:  .LBB19_135:
+; RV32I-NEXT:    li s2, 5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB19_149
+; RV32I-NEXT:  # %bb.136:
+; RV32I-NEXT:    beq t1, s6, .LBB19_150
 ; RV32I-NEXT:  .LBB19_137:
-; RV32I-NEXT:    beq t1, s3, .LBB19_151
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB19_151
 ; RV32I-NEXT:  .LBB19_138:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_152
+; RV32I-NEXT:    beq t1, t5, .LBB19_152
 ; RV32I-NEXT:  .LBB19_139:
-; RV32I-NEXT:    beq t1, s5, .LBB19_153
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB19_153
 ; RV32I-NEXT:  .LBB19_140:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s0, .LBB19_154
+; RV32I-NEXT:    beq t1, s0, .LBB19_154
 ; RV32I-NEXT:  .LBB19_141:
-; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    mv s5, t2
 ; RV32I-NEXT:    bne t1, s10, .LBB19_155
 ; RV32I-NEXT:  .LBB19_142:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s1, .LBB19_156
-; RV32I-NEXT:  .LBB19_143:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s8, .LBB19_157
+; RV32I-NEXT:    bne t1, s2, .LBB19_156
+; RV32I-NEXT:  .LBB19_143:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s3, .LBB19_157
 ; RV32I-NEXT:  .LBB19_144:
-; RV32I-NEXT:    bnez t0, .LBB19_158
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    bne t1, s4, .LBB19_158
 ; RV32I-NEXT:  .LBB19_145:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_159
+; RV32I-NEXT:    bnez t0, .LBB19_159
 ; RV32I-NEXT:  .LBB19_146:
-; RV32I-NEXT:    beqz t1, .LBB19_160
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    bnez t4, .LBB19_160
 ; RV32I-NEXT:  .LBB19_147:
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_161
-; RV32I-NEXT:    j .LBB19_162
+; RV32I-NEXT:    beqz t1, .LBB19_161
 ; RV32I-NEXT:  .LBB19_148:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne t1, s2, .LBB19_136
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB19_162
+; RV32I-NEXT:    j .LBB19_163
 ; RV32I-NEXT:  .LBB19_149:
-; RV32I-NEXT:    or a5, ra, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_137
+; RV32I-NEXT:    mv s5, s9
+; RV32I-NEXT:    bne t1, s6, .LBB19_137
 ; RV32I-NEXT:  .LBB19_150:
-; RV32I-NEXT:    mv a3, s7
-; RV32I-NEXT:    bne t1, s3, .LBB19_138
+; RV32I-NEXT:    or a5, s1, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB19_138
 ; RV32I-NEXT:  .LBB19_151:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_139
+; RV32I-NEXT:    mv s5, s7
+; RV32I-NEXT:    bne t1, t5, .LBB19_139
 ; RV32I-NEXT:  .LBB19_152:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    bne t1, s5, .LBB19_140
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB19_140
 ; RV32I-NEXT:  .LBB19_153:
-; RV32I-NEXT:    or a5, s6, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s0, .LBB19_141
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:    bne t1, s0, .LBB19_141
 ; RV32I-NEXT:  .LBB19_154:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    or a5, s8, s5
+; RV32I-NEXT:    mv s5, t2
 ; RV32I-NEXT:    beq t1, s10, .LBB19_142
 ; RV32I-NEXT:  .LBB19_155:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s1, .LBB19_143
-; RV32I-NEXT:  .LBB19_156:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s8, .LBB19_144
+; RV32I-NEXT:    beq t1, s2, .LBB19_143
+; RV32I-NEXT:  .LBB19_156:
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s3, .LBB19_144
 ; RV32I-NEXT:  .LBB19_157:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    beqz t0, .LBB19_145
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    beq t1, s4, .LBB19_145
 ; RV32I-NEXT:  .LBB19_158:
-; RV32I-NEXT:    mv a7, a5
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_146
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    beqz t0, .LBB19_146
 ; RV32I-NEXT:  .LBB19_159:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bnez t1, .LBB19_147
+; RV32I-NEXT:    mv a7, a5
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    beqz t4, .LBB19_147
 ; RV32I-NEXT:  .LBB19_160:
-; RV32I-NEXT:    or a5, ra, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_162
+; RV32I-NEXT:    mv a5, s9
+; RV32I-NEXT:    bnez t1, .LBB19_148
 ; RV32I-NEXT:  .LBB19_161:
-; RV32I-NEXT:    mv a3, s7
+; RV32I-NEXT:    or a5, s1, a5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB19_163
 ; RV32I-NEXT:  .LBB19_162:
-; RV32I-NEXT:    beq t1, s2, .LBB19_174
-; RV32I-NEXT:  # %bb.163:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_175
-; RV32I-NEXT:  .LBB19_164:
-; RV32I-NEXT:    beq t1, s3, .LBB19_176
+; RV32I-NEXT:    mv s5, s7
+; RV32I-NEXT:  .LBB19_163:
+; RV32I-NEXT:    beq t1, s6, .LBB19_175
+; RV32I-NEXT:  # %bb.164:
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB19_176
 ; RV32I-NEXT:  .LBB19_165:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s5, .LBB19_177
+; RV32I-NEXT:    beq t1, t5, .LBB19_177
 ; RV32I-NEXT:  .LBB19_166:
-; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    mv s5, t2
 ; RV32I-NEXT:    bne t1, s0, .LBB19_178
 ; RV32I-NEXT:  .LBB19_167:
-; RV32I-NEXT:    mv a3, t2
+; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    bne t1, s10, .LBB19_179
 ; RV32I-NEXT:  .LBB19_168:
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s1, .LBB19_180
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s2, .LBB19_180
 ; RV32I-NEXT:  .LBB19_169:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s8, .LBB19_181
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    bne t1, s3, .LBB19_181
 ; RV32I-NEXT:  .LBB19_170:
-; RV32I-NEXT:    bnez t0, .LBB19_182
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s4, .LBB19_182
 ; RV32I-NEXT:  .LBB19_171:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_183
+; RV32I-NEXT:    bnez t0, .LBB19_183
 ; RV32I-NEXT:  .LBB19_172:
-; RV32I-NEXT:    beqz t1, .LBB19_184
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    bnez t4, .LBB19_184
 ; RV32I-NEXT:  .LBB19_173:
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_185
-; RV32I-NEXT:    j .LBB19_186
+; RV32I-NEXT:    beqz t1, .LBB19_185
 ; RV32I-NEXT:  .LBB19_174:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_164
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB19_186
+; RV32I-NEXT:    j .LBB19_187
 ; RV32I-NEXT:  .LBB19_175:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    bne t1, s3, .LBB19_165
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB19_165
 ; RV32I-NEXT:  .LBB19_176:
-; RV32I-NEXT:    or a5, s6, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s5, .LBB19_166
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:    bne t1, t5, .LBB19_166
 ; RV32I-NEXT:  .LBB19_177:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    or a5, s8, s5
+; RV32I-NEXT:    mv s5, t2
 ; RV32I-NEXT:    beq t1, s0, .LBB19_167
 ; RV32I-NEXT:  .LBB19_178:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    beq t1, s10, .LBB19_168
 ; RV32I-NEXT:  .LBB19_179:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s1, .LBB19_169
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s2, .LBB19_169
 ; RV32I-NEXT:  .LBB19_180:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s8, .LBB19_170
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    beq t1, s3, .LBB19_170
 ; RV32I-NEXT:  .LBB19_181:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    beqz t0, .LBB19_171
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s4, .LBB19_171
 ; RV32I-NEXT:  .LBB19_182:
-; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_172
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    beqz t0, .LBB19_172
 ; RV32I-NEXT:  .LBB19_183:
-; RV32I-NEXT:    mv a3, s7
-; RV32I-NEXT:    bnez t1, .LBB19_173
+; RV32I-NEXT:    mv a0, s5
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    beqz t4, .LBB19_173
 ; RV32I-NEXT:  .LBB19_184:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_186
+; RV32I-NEXT:    mv a5, s7
+; RV32I-NEXT:    bnez t1, .LBB19_174
 ; RV32I-NEXT:  .LBB19_185:
-; RV32I-NEXT:    mv a3, s4
+; RV32I-NEXT:    or a5, a3, a5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB19_187
 ; RV32I-NEXT:  .LBB19_186:
-; RV32I-NEXT:    beq t1, s2, .LBB19_197
-; RV32I-NEXT:  # %bb.187:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s3, .LBB19_198
-; RV32I-NEXT:  .LBB19_188:
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s5, .LBB19_199
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:  .LBB19_187:
+; RV32I-NEXT:    beq t1, s6, .LBB19_200
+; RV32I-NEXT:  # %bb.188:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, t5, .LBB19_201
 ; RV32I-NEXT:  .LBB19_189:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s0, .LBB19_200
-; RV32I-NEXT:  .LBB19_190:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s10, .LBB19_201
+; RV32I-NEXT:    bne t1, s0, .LBB19_202
+; RV32I-NEXT:  .LBB19_190:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s10, .LBB19_203
 ; RV32I-NEXT:  .LBB19_191:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s1, .LBB19_202
-; RV32I-NEXT:  .LBB19_192:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s8, .LBB19_203
+; RV32I-NEXT:    bne t1, s2, .LBB19_204
+; RV32I-NEXT:  .LBB19_192:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s3, .LBB19_205
 ; RV32I-NEXT:  .LBB19_193:
-; RV32I-NEXT:    bnez t0, .LBB19_204
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    bne t1, s4, .LBB19_206
 ; RV32I-NEXT:  .LBB19_194:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB19_205
+; RV32I-NEXT:    beqz t0, .LBB19_196
 ; RV32I-NEXT:  .LBB19_195:
-; RV32I-NEXT:    beqz t1, .LBB19_206
+; RV32I-NEXT:    mv t3, a5
 ; RV32I-NEXT:  .LBB19_196:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s2, .LBB19_207
-; RV32I-NEXT:    j .LBB19_208
-; RV32I-NEXT:  .LBB19_197:
-; RV32I-NEXT:    or a5, s6, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s3, .LBB19_188
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    beqz t4, .LBB19_198
+; RV32I-NEXT:  # %bb.197:
+; RV32I-NEXT:    mv a5, t6
 ; RV32I-NEXT:  .LBB19_198:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    beqz t1, .LBB19_207
+; RV32I-NEXT:  # %bb.199:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s5, .LBB19_189
-; RV32I-NEXT:  .LBB19_199:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s0, .LBB19_190
+; RV32I-NEXT:    mv t4, t2
+; RV32I-NEXT:    bne t1, s6, .LBB19_208
+; RV32I-NEXT:    j .LBB19_209
 ; RV32I-NEXT:  .LBB19_200:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s10, .LBB19_191
+; RV32I-NEXT:    or a5, s8, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, t5, .LBB19_189
 ; RV32I-NEXT:  .LBB19_201:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s1, .LBB19_192
-; RV32I-NEXT:  .LBB19_202:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s8, .LBB19_193
+; RV32I-NEXT:    beq t1, s0, .LBB19_190
+; RV32I-NEXT:  .LBB19_202:
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s10, .LBB19_191
 ; RV32I-NEXT:  .LBB19_203:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    beqz t0, .LBB19_194
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    beq t1, s2, .LBB19_192
 ; RV32I-NEXT:  .LBB19_204:
-; RV32I-NEXT:    mv t3, a5
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB19_195
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s3, .LBB19_193
 ; RV32I-NEXT:  .LBB19_205:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    bnez t1, .LBB19_196
-; RV32I-NEXT:  .LBB19_206:
-; RV32I-NEXT:    or a3, s6, a3
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s2, .LBB19_208
+; RV32I-NEXT:    beq t1, s4, .LBB19_194
+; RV32I-NEXT:  .LBB19_206:
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    bnez t0, .LBB19_195
+; RV32I-NEXT:    j .LBB19_196
 ; RV32I-NEXT:  .LBB19_207:
-; RV32I-NEXT:    mv a5, a3
+; RV32I-NEXT:    or a5, s8, a5
+; RV32I-NEXT:    mv t4, t2
+; RV32I-NEXT:    beq t1, s6, .LBB19_209
 ; RV32I-NEXT:  .LBB19_208:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s3, .LBB19_217
-; RV32I-NEXT:  # %bb.209:
+; RV32I-NEXT:    mv t4, a5
+; RV32I-NEXT:  .LBB19_209:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s5, .LBB19_218
-; RV32I-NEXT:  .LBB19_210:
-; RV32I-NEXT:    mv a3, t2
+; RV32I-NEXT:    bne t1, t5, .LBB19_218
+; RV32I-NEXT:  # %bb.210:
+; RV32I-NEXT:    mv t4, t2
 ; RV32I-NEXT:    bne t1, s0, .LBB19_219
 ; RV32I-NEXT:  .LBB19_211:
 ; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    bne t1, s10, .LBB19_220
 ; RV32I-NEXT:  .LBB19_212:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s1, .LBB19_221
+; RV32I-NEXT:    mv t4, t2
+; RV32I-NEXT:    bne t1, s2, .LBB19_221
 ; RV32I-NEXT:  .LBB19_213:
-; RV32I-NEXT:    bne t1, s8, .LBB19_222
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    bne t1, s3, .LBB19_222
 ; RV32I-NEXT:  .LBB19_214:
-; RV32I-NEXT:    beqz t0, .LBB19_216
+; RV32I-NEXT:    bne t1, s4, .LBB19_223
 ; RV32I-NEXT:  .LBB19_215:
-; RV32I-NEXT:    mv a1, t2
+; RV32I-NEXT:    beqz t0, .LBB19_217
 ; RV32I-NEXT:  .LBB19_216:
-; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv a1, t2
+; RV32I-NEXT:  .LBB19_217:
 ; RV32I-NEXT:    srli a5, ra, 16
 ; RV32I-NEXT:    lui t4, 16
 ; RV32I-NEXT:    srli t2, ra, 24
-; RV32I-NEXT:    srli t0, s11, 16
-; RV32I-NEXT:    srli t6, s11, 24
+; RV32I-NEXT:    srli t0, a3, 16
+; RV32I-NEXT:    srli t6, a3, 24
 ; RV32I-NEXT:    srli t1, a4, 16
 ; RV32I-NEXT:    srli s2, a4, 24
 ; RV32I-NEXT:    srli t5, a6, 16
 ; RV32I-NEXT:    srli s3, a6, 24
 ; RV32I-NEXT:    srli s1, a7, 16
-; RV32I-NEXT:    srli a3, a7, 24
+; RV32I-NEXT:    srli s6, a7, 24
 ; RV32I-NEXT:    srli s0, a0, 16
 ; RV32I-NEXT:    srli s5, a0, 24
 ; RV32I-NEXT:    srli s4, t3, 16
-; RV32I-NEXT:    srli s6, t3, 24
-; RV32I-NEXT:    srli s7, a1, 16
-; RV32I-NEXT:    srli s8, a1, 24
+; RV32I-NEXT:    srli s7, t3, 24
+; RV32I-NEXT:    srli s8, a1, 16
+; RV32I-NEXT:    srli s9, a1, 24
 ; RV32I-NEXT:    addi t4, t4, -1
-; RV32I-NEXT:    and s9, ra, t4
-; RV32I-NEXT:    and s10, s11, t4
-; RV32I-NEXT:    srli s9, s9, 8
+; RV32I-NEXT:    and s10, ra, t4
+; RV32I-NEXT:    and s11, a3, t4
+; RV32I-NEXT:    srli s10, s10, 8
 ; RV32I-NEXT:    sb ra, 0(a2)
-; RV32I-NEXT:    sb s9, 1(a2)
+; RV32I-NEXT:    sb s10, 1(a2)
 ; RV32I-NEXT:    sb a5, 2(a2)
 ; RV32I-NEXT:    sb t2, 3(a2)
 ; RV32I-NEXT:    and a5, a4, t4
-; RV32I-NEXT:    srli t2, s10, 8
-; RV32I-NEXT:    sb s11, 4(a2)
+; RV32I-NEXT:    srli t2, s11, 8
+; RV32I-NEXT:    sb a3, 4(a2)
 ; RV32I-NEXT:    sb t2, 5(a2)
 ; RV32I-NEXT:    sb t0, 6(a2)
 ; RV32I-NEXT:    sb t6, 7(a2)
-; RV32I-NEXT:    and t0, a6, t4
+; RV32I-NEXT:    and a3, a6, t4
 ; RV32I-NEXT:    srli a5, a5, 8
 ; RV32I-NEXT:    sb a4, 8(a2)
 ; RV32I-NEXT:    sb a5, 9(a2)
 ; RV32I-NEXT:    sb t1, 10(a2)
 ; RV32I-NEXT:    sb s2, 11(a2)
 ; RV32I-NEXT:    and a4, a7, t4
-; RV32I-NEXT:    srli a5, t0, 8
+; RV32I-NEXT:    srli a3, a3, 8
 ; RV32I-NEXT:    sb a6, 12(a2)
-; RV32I-NEXT:    sb a5, 13(a2)
+; RV32I-NEXT:    sb a3, 13(a2)
 ; RV32I-NEXT:    sb t5, 14(a2)
 ; RV32I-NEXT:    sb s3, 15(a2)
-; RV32I-NEXT:    and a5, a0, t4
+; RV32I-NEXT:    and a3, a0, t4
 ; RV32I-NEXT:    srli a4, a4, 8
 ; RV32I-NEXT:    sb a7, 16(a2)
 ; RV32I-NEXT:    sb a4, 17(a2)
 ; RV32I-NEXT:    sb s1, 18(a2)
-; RV32I-NEXT:    sb a3, 19(a2)
-; RV32I-NEXT:    and a3, t3, t4
-; RV32I-NEXT:    and a4, a1, t4
-; RV32I-NEXT:    srli a5, a5, 8
+; RV32I-NEXT:    sb s6, 19(a2)
+; RV32I-NEXT:    and a4, t3, t4
+; RV32I-NEXT:    and a5, a1, t4
 ; RV32I-NEXT:    srli a3, a3, 8
 ; RV32I-NEXT:    srli a4, a4, 8
+; RV32I-NEXT:    srli a5, a5, 8
 ; RV32I-NEXT:    sb a0, 20(a2)
-; RV32I-NEXT:    sb a5, 21(a2)
+; RV32I-NEXT:    sb a3, 21(a2)
 ; RV32I-NEXT:    sb s0, 22(a2)
 ; RV32I-NEXT:    sb s5, 23(a2)
 ; RV32I-NEXT:    sb t3, 24(a2)
-; RV32I-NEXT:    sb a3, 25(a2)
+; RV32I-NEXT:    sb a4, 25(a2)
 ; RV32I-NEXT:    sb s4, 26(a2)
-; RV32I-NEXT:    sb s6, 27(a2)
+; RV32I-NEXT:    sb s7, 27(a2)
 ; RV32I-NEXT:    sb a1, 28(a2)
-; RV32I-NEXT:    sb a4, 29(a2)
-; RV32I-NEXT:    sb s7, 30(a2)
-; RV32I-NEXT:    sb s8, 31(a2)
+; RV32I-NEXT:    sb a5, 29(a2)
+; RV32I-NEXT:    sb s8, 30(a2)
+; RV32I-NEXT:    sb s9, 31(a2)
 ; RV32I-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
@@ -11871,29 +11957,29 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
 ; RV32I-NEXT:    lw s11, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 80
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB19_217:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s5, .LBB19_210
 ; RV32I-NEXT:  .LBB19_218:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
+; RV32I-NEXT:    mv a5, t4
+; RV32I-NEXT:    mv t4, t2
 ; RV32I-NEXT:    beq t1, s0, .LBB19_211
 ; RV32I-NEXT:  .LBB19_219:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    mv t4, a5
 ; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    beq t1, s10, .LBB19_212
 ; RV32I-NEXT:  .LBB19_220:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s1, .LBB19_213
+; RV32I-NEXT:    mv a5, t4
+; RV32I-NEXT:    mv t4, t2
+; RV32I-NEXT:    beq t1, s2, .LBB19_213
 ; RV32I-NEXT:  .LBB19_221:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    beq t1, s8, .LBB19_214
+; RV32I-NEXT:    mv t4, a5
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    beq t1, s3, .LBB19_214
 ; RV32I-NEXT:  .LBB19_222:
-; RV32I-NEXT:    mv t2, a3
-; RV32I-NEXT:    bnez t0, .LBB19_215
-; RV32I-NEXT:    j .LBB19_216
+; RV32I-NEXT:    mv a5, t4
+; RV32I-NEXT:    beq t1, s4, .LBB19_215
+; RV32I-NEXT:  .LBB19_223:
+; RV32I-NEXT:    mv t2, a5
+; RV32I-NEXT:    bnez t0, .LBB19_216
+; RV32I-NEXT:    j .LBB19_217
   %src = load i256, ptr %src.ptr, align 1
   %wordOff = load i256, ptr %wordOff.ptr, align 1
   %bitOff = shl i256 %wordOff, 5
@@ -12277,223 +12363,231 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
 ; RV32I-NEXT:    slli t4, t4, 16
 ; RV32I-NEXT:    slli t5, s0, 16
 ; RV32I-NEXT:    slli s4, s4, 16
-; RV32I-NEXT:    slli a3, a1, 16
+; RV32I-NEXT:    slli t0, a1, 16
 ; RV32I-NEXT:    or s5, t4, t1
 ; RV32I-NEXT:    or a1, s4, s1
-; RV32I-NEXT:    or t0, a3, s2
+; RV32I-NEXT:    or t0, t0, s2
 ; RV32I-NEXT:    slli t0, t0, 6
 ; RV32I-NEXT:    srli t1, t0, 5
 ; RV32I-NEXT:    andi t4, t0, 31
-; RV32I-NEXT:    neg a3, t4
+; RV32I-NEXT:    neg ra, t4
 ; RV32I-NEXT:    beqz t4, .LBB20_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sll a5, s5, a3
+; RV32I-NEXT:    sll a5, s5, ra
 ; RV32I-NEXT:  .LBB20_2:
-; RV32I-NEXT:    or s10, t6, a4
+; RV32I-NEXT:    or s2, t6, a4
 ; RV32I-NEXT:    lbu t6, 12(a0)
 ; RV32I-NEXT:    lbu s0, 19(a0)
 ; RV32I-NEXT:    slli s1, a7, 8
 ; RV32I-NEXT:    or a6, t3, a6
-; RV32I-NEXT:    or a4, t5, t2
+; RV32I-NEXT:    or a3, t5, t2
 ; RV32I-NEXT:    srai t2, a1, 31
 ; RV32I-NEXT:    beqz t1, .LBB20_4
 ; RV32I-NEXT:  # %bb.3:
+; RV32I-NEXT:    mv a4, s2
 ; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    j .LBB20_5
 ; RV32I-NEXT:  .LBB20_4:
-; RV32I-NEXT:    srl a7, s10, t0
+; RV32I-NEXT:    mv a4, s2
+; RV32I-NEXT:    srl a7, s2, t0
 ; RV32I-NEXT:    or a5, a7, a5
 ; RV32I-NEXT:  .LBB20_5:
 ; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    lbu s3, 17(a0)
+; RV32I-NEXT:    lbu s2, 17(a0)
 ; RV32I-NEXT:    lbu t3, 18(a0)
-; RV32I-NEXT:    slli s0, s0, 8
-; RV32I-NEXT:    or s4, s1, t6
+; RV32I-NEXT:    slli s4, s0, 8
+; RV32I-NEXT:    or s3, s1, t6
 ; RV32I-NEXT:    slli a6, a6, 16
-; RV32I-NEXT:    li s6, 1
-; RV32I-NEXT:    sll s2, a4, a3
+; RV32I-NEXT:    li t5, 1
+; RV32I-NEXT:    sll s8, a3, ra
 ; RV32I-NEXT:    beqz t4, .LBB20_7
 ; RV32I-NEXT:  # %bb.6:
-; RV32I-NEXT:    mv a7, s2
+; RV32I-NEXT:    mv a7, s8
 ; RV32I-NEXT:  .LBB20_7:
-; RV32I-NEXT:    lbu t5, 16(a0)
-; RV32I-NEXT:    lbu t6, 23(a0)
-; RV32I-NEXT:    slli s1, s3, 8
-; RV32I-NEXT:    or s0, s0, t3
-; RV32I-NEXT:    srl s3, s5, t0
-; RV32I-NEXT:    or a6, a6, s4
-; RV32I-NEXT:    bne t1, s6, .LBB20_9
+; RV32I-NEXT:    lbu t6, 16(a0)
+; RV32I-NEXT:    lbu s0, 23(a0)
+; RV32I-NEXT:    slli s2, s2, 8
+; RV32I-NEXT:    or s1, s4, t3
+; RV32I-NEXT:    srl t3, s5, t0
+; RV32I-NEXT:    or a6, a6, s3
+; RV32I-NEXT:    sw t3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    beq t1, t5, .LBB20_9
 ; RV32I-NEXT:  # %bb.8:
-; RV32I-NEXT:    or a5, s3, a7
+; RV32I-NEXT:    li t5, 1
+; RV32I-NEXT:    j .LBB20_10
 ; RV32I-NEXT:  .LBB20_9:
+; RV32I-NEXT:    li t5, 1
+; RV32I-NEXT:    or a5, t3, a7
+; RV32I-NEXT:  .LBB20_10:
 ; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    lbu s6, 21(a0)
+; RV32I-NEXT:    lbu s3, 21(a0)
 ; RV32I-NEXT:    lbu a7, 22(a0)
-; RV32I-NEXT:    slli s4, t6, 8
-; RV32I-NEXT:    or s7, s1, t5
-; RV32I-NEXT:    slli s8, s0, 16
-; RV32I-NEXT:    li s9, 2
-; RV32I-NEXT:    sll s0, a6, a3
-; RV32I-NEXT:    beqz t4, .LBB20_11
-; RV32I-NEXT:  # %bb.10:
-; RV32I-NEXT:    mv t3, s0
-; RV32I-NEXT:  .LBB20_11:
-; RV32I-NEXT:    lbu t5, 20(a0)
-; RV32I-NEXT:    lbu t6, 27(a0)
-; RV32I-NEXT:    slli s6, s6, 8
-; RV32I-NEXT:    or s4, s4, a7
-; RV32I-NEXT:    srl s1, a4, t0
-; RV32I-NEXT:    or a7, s8, s7
-; RV32I-NEXT:    bne t1, s9, .LBB20_13
-; RV32I-NEXT:  # %bb.12:
-; RV32I-NEXT:    or a5, s1, t3
-; RV32I-NEXT:  .LBB20_13:
-; RV32I-NEXT:    li t3, 0
-; RV32I-NEXT:    lbu s8, 25(a0)
-; RV32I-NEXT:    lbu s7, 26(a0)
-; RV32I-NEXT:    slli t6, t6, 8
-; RV32I-NEXT:    or s6, s6, t5
-; RV32I-NEXT:    slli s9, s4, 16
-; RV32I-NEXT:    li s11, 3
-; RV32I-NEXT:    sll t5, a7, a3
-; RV32I-NEXT:    beqz t4, .LBB20_15
-; RV32I-NEXT:  # %bb.14:
-; RV32I-NEXT:    mv t3, t5
-; RV32I-NEXT:  .LBB20_15:
-; RV32I-NEXT:    lbu s4, 24(a0)
-; RV32I-NEXT:    slli s8, s8, 8
-; RV32I-NEXT:    or s7, t6, s7
+; RV32I-NEXT:    slli s7, s0, 8
+; RV32I-NEXT:    or s4, s2, t6
+; RV32I-NEXT:    slli s9, s1, 16
+; RV32I-NEXT:    li t6, 2
+; RV32I-NEXT:    sll s6, a6, ra
+; RV32I-NEXT:    beqz t4, .LBB20_12
+; RV32I-NEXT:  # %bb.11:
+; RV32I-NEXT:    mv t3, s6
+; RV32I-NEXT:  .LBB20_12:
+; RV32I-NEXT:    lbu s0, 20(a0)
+; RV32I-NEXT:    lbu s1, 27(a0)
+; RV32I-NEXT:    slli s3, s3, 8
+; RV32I-NEXT:    or s2, s7, a7
+; RV32I-NEXT:    srl s7, a3, t0
+; RV32I-NEXT:    or a7, s9, s4
+; RV32I-NEXT:    sw a3, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s7, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    bne t1, t6, .LBB20_14
+; RV32I-NEXT:  # %bb.13:
+; RV32I-NEXT:    or a5, s7, t3
+; RV32I-NEXT:  .LBB20_14:
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    lbu s7, 25(a0)
+; RV32I-NEXT:    lbu s4, 26(a0)
+; RV32I-NEXT:    slli s11, s1, 8
+; RV32I-NEXT:    or s9, s3, s0
+; RV32I-NEXT:    slli s2, s2, 16
+; RV32I-NEXT:    li a3, 3
+; RV32I-NEXT:    sll s10, a7, ra
+; RV32I-NEXT:    beqz t4, .LBB20_16
+; RV32I-NEXT:  # %bb.15:
+; RV32I-NEXT:    mv t3, s10
+; RV32I-NEXT:  .LBB20_16:
+; RV32I-NEXT:    lbu s1, 24(a0)
+; RV32I-NEXT:    slli s7, s7, 8
+; RV32I-NEXT:    or s3, s11, s4
 ; RV32I-NEXT:    srl t6, a6, t0
-; RV32I-NEXT:    or a0, s9, s6
-; RV32I-NEXT:    sw s5, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s10, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    bne t1, s11, .LBB20_17
-; RV32I-NEXT:  # %bb.16:
+; RV32I-NEXT:    or a0, s2, s9
+; RV32I-NEXT:    bne t1, a3, .LBB20_18
+; RV32I-NEXT:  # %bb.17:
 ; RV32I-NEXT:    or a5, t6, t3
-; RV32I-NEXT:  .LBB20_17:
-; RV32I-NEXT:    li s6, 0
-; RV32I-NEXT:    or t3, s8, s4
-; RV32I-NEXT:    slli s7, s7, 16
-; RV32I-NEXT:    li s10, 4
-; RV32I-NEXT:    sll s11, a0, a3
-; RV32I-NEXT:    beqz t4, .LBB20_19
-; RV32I-NEXT:  # %bb.18:
-; RV32I-NEXT:    mv s6, s11
-; RV32I-NEXT:  .LBB20_19:
-; RV32I-NEXT:    srl s4, a7, t0
-; RV32I-NEXT:    or t3, s7, t3
-; RV32I-NEXT:    sw s4, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    bne t1, s10, .LBB20_21
-; RV32I-NEXT:  # %bb.20:
-; RV32I-NEXT:    or a5, s4, s6
-; RV32I-NEXT:  .LBB20_21:
-; RV32I-NEXT:    li s4, 0
-; RV32I-NEXT:    li s5, 5
-; RV32I-NEXT:    sll s6, t3, a3
-; RV32I-NEXT:    sw s6, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    beqz t4, .LBB20_23
-; RV32I-NEXT:  # %bb.22:
-; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:  .LBB20_23:
-; RV32I-NEXT:    srl s6, a0, t0
-; RV32I-NEXT:    beq t1, s5, .LBB20_25
-; RV32I-NEXT:  # %bb.24:
-; RV32I-NEXT:    mv ra, s6
-; RV32I-NEXT:    j .LBB20_26
-; RV32I-NEXT:  .LBB20_25:
-; RV32I-NEXT:    mv ra, s6
-; RV32I-NEXT:    or a5, s6, s4
+; RV32I-NEXT:  .LBB20_18:
+; RV32I-NEXT:    li s2, 0
+; RV32I-NEXT:    or t3, s7, s1
+; RV32I-NEXT:    slli s3, s3, 16
+; RV32I-NEXT:    sll s11, a0, ra
+; RV32I-NEXT:    beqz t4, .LBB20_20
+; RV32I-NEXT:  # %bb.19:
+; RV32I-NEXT:    mv s2, s11
+; RV32I-NEXT:  .LBB20_20:
+; RV32I-NEXT:    srl t6, a7, t0
+; RV32I-NEXT:    or t3, s3, t3
+; RV32I-NEXT:    li a3, 4
+; RV32I-NEXT:    bne t1, a3, .LBB20_22
+; RV32I-NEXT:  # %bb.21:
+; RV32I-NEXT:    or a5, t6, s2
+; RV32I-NEXT:  .LBB20_22:
+; RV32I-NEXT:    li s3, 0
+; RV32I-NEXT:    li s0, 5
+; RV32I-NEXT:    sll s9, t3, ra
+; RV32I-NEXT:    beqz t4, .LBB20_24
+; RV32I-NEXT:  # %bb.23:
+; RV32I-NEXT:    mv s3, s9
+; RV32I-NEXT:  .LBB20_24:
+; RV32I-NEXT:    srl a3, a0, t0
+; RV32I-NEXT:    beq t1, s0, .LBB20_26
+; RV32I-NEXT:  # %bb.25:
+; RV32I-NEXT:    mv s1, a3
+; RV32I-NEXT:    j .LBB20_27
 ; RV32I-NEXT:  .LBB20_26:
+; RV32I-NEXT:    mv s1, a3
+; RV32I-NEXT:    or a5, a3, s3
+; RV32I-NEXT:  .LBB20_27:
 ; RV32I-NEXT:    li s4, 0
-; RV32I-NEXT:    li s8, 6
-; RV32I-NEXT:    sll s7, a1, a3
-; RV32I-NEXT:    beqz t4, .LBB20_28
-; RV32I-NEXT:  # %bb.27:
+; RV32I-NEXT:    li s3, 6
+; RV32I-NEXT:    sll s7, a1, ra
+; RV32I-NEXT:    beqz t4, .LBB20_29
+; RV32I-NEXT:  # %bb.28:
 ; RV32I-NEXT:    mv s4, s7
-; RV32I-NEXT:  .LBB20_28:
-; RV32I-NEXT:    srl s5, t3, t0
-; RV32I-NEXT:    beq t1, s8, .LBB20_30
-; RV32I-NEXT:  # %bb.29:
-; RV32I-NEXT:    mv s9, s5
-; RV32I-NEXT:    j .LBB20_31
-; RV32I-NEXT:  .LBB20_30:
-; RV32I-NEXT:    mv s9, s5
-; RV32I-NEXT:    or a5, s5, s4
+; RV32I-NEXT:  .LBB20_29:
+; RV32I-NEXT:    srl s0, t3, t0
+; RV32I-NEXT:    sw s5, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s2, t6
+; RV32I-NEXT:    bne t1, s3, .LBB20_31
+; RV32I-NEXT:  # %bb.30:
+; RV32I-NEXT:    or a5, s0, s4
 ; RV32I-NEXT:  .LBB20_31:
 ; RV32I-NEXT:    li s5, 0
-; RV32I-NEXT:    li s6, 7
-; RV32I-NEXT:    sll s4, t2, a3
+; RV32I-NEXT:    li s4, 7
+; RV32I-NEXT:    sll t6, t2, ra
 ; RV32I-NEXT:    beqz t4, .LBB20_33
 ; RV32I-NEXT:  # %bb.32:
-; RV32I-NEXT:    mv s5, s4
+; RV32I-NEXT:    mv s5, t6
 ; RV32I-NEXT:  .LBB20_33:
 ; RV32I-NEXT:    srl a3, a1, t0
-; RV32I-NEXT:    bne t1, s6, .LBB20_35
+; RV32I-NEXT:    mv ra, a4
+; RV32I-NEXT:    beq t1, s4, .LBB20_35
 ; RV32I-NEXT:  # %bb.34:
-; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    mv a4, a3
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    bnez t0, .LBB20_36
+; RV32I-NEXT:    j .LBB20_37
 ; RV32I-NEXT:  .LBB20_35:
-; RV32I-NEXT:    li s5, 3
-; RV32I-NEXT:    mv s6, a3
-; RV32I-NEXT:    bnez t0, .LBB20_39
-; RV32I-NEXT:  # %bb.36:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_40
+; RV32I-NEXT:    mv a4, a3
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    beqz t0, .LBB20_37
+; RV32I-NEXT:  .LBB20_36:
+; RV32I-NEXT:    mv ra, a5
 ; RV32I-NEXT:  .LBB20_37:
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    li s0, 3
+; RV32I-NEXT:    beqz t4, .LBB20_39
+; RV32I-NEXT:  # %bb.38:
+; RV32I-NEXT:    mv a5, s8
+; RV32I-NEXT:  .LBB20_39:
 ; RV32I-NEXT:    beqz t1, .LBB20_41
-; RV32I-NEXT:  .LBB20_38:
+; RV32I-NEXT:  # %bb.40:
 ; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    j .LBB20_42
-; RV32I-NEXT:  .LBB20_39:
-; RV32I-NEXT:    sw a5, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_37
-; RV32I-NEXT:  .LBB20_40:
-; RV32I-NEXT:    mv a3, s2
-; RV32I-NEXT:    bnez t1, .LBB20_38
 ; RV32I-NEXT:  .LBB20_41:
-; RV32I-NEXT:    or a5, s3, a3
+; RV32I-NEXT:    lw s5, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a5, s5, a5
 ; RV32I-NEXT:  .LBB20_42:
-; RV32I-NEXT:    li s2, 1
-; RV32I-NEXT:    li s3, 2
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_61
+; RV32I-NEXT:    mv s8, a4
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB20_44
 ; RV32I-NEXT:  # %bb.43:
-; RV32I-NEXT:    beq t1, s2, .LBB20_62
+; RV32I-NEXT:    mv s5, s6
 ; RV32I-NEXT:  .LBB20_44:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_63
-; RV32I-NEXT:  .LBB20_45:
-; RV32I-NEXT:    beq t1, s3, .LBB20_64
+; RV32I-NEXT:    bne t1, t5, .LBB20_46
+; RV32I-NEXT:  # %bb.45:
+; RV32I-NEXT:    lw a4, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a5, a4, s5
 ; RV32I-NEXT:  .LBB20_46:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_65
-; RV32I-NEXT:  .LBB20_47:
-; RV32I-NEXT:    beq t1, s5, .LBB20_66
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    li t5, 2
+; RV32I-NEXT:    bnez t4, .LBB20_61
+; RV32I-NEXT:  # %bb.47:
+; RV32I-NEXT:    beq t1, t5, .LBB20_62
 ; RV32I-NEXT:  .LBB20_48:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_67
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB20_63
 ; RV32I-NEXT:  .LBB20_49:
-; RV32I-NEXT:    bne t1, s10, .LBB20_51
+; RV32I-NEXT:    beq t1, s0, .LBB20_64
 ; RV32I-NEXT:  .LBB20_50:
-; RV32I-NEXT:    or a5, ra, a3
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB20_65
 ; RV32I-NEXT:  .LBB20_51:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    li s10, 5
-; RV32I-NEXT:    bnez t4, .LBB20_68
-; RV32I-NEXT:  # %bb.52:
-; RV32I-NEXT:    beq t1, s10, .LBB20_69
+; RV32I-NEXT:    li a4, 4
+; RV32I-NEXT:    beq t1, a4, .LBB20_66
+; RV32I-NEXT:  .LBB20_52:
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB20_67
 ; RV32I-NEXT:  .LBB20_53:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_70
+; RV32I-NEXT:    li a4, 5
+; RV32I-NEXT:    beq t1, a4, .LBB20_68
 ; RV32I-NEXT:  .LBB20_54:
-; RV32I-NEXT:    bne t1, s8, .LBB20_56
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB20_69
 ; RV32I-NEXT:  .LBB20_55:
-; RV32I-NEXT:    or a5, s6, a3
+; RV32I-NEXT:    beq t1, s3, .LBB20_70
 ; RV32I-NEXT:  .LBB20_56:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    li s8, 7
-; RV32I-NEXT:    bne t1, s8, .LBB20_71
-; RV32I-NEXT:  # %bb.57:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s4, .LBB20_71
+; RV32I-NEXT:  .LBB20_57:
 ; RV32I-NEXT:    bnez t0, .LBB20_72
 ; RV32I-NEXT:  .LBB20_58:
 ; RV32I-NEXT:    li a5, 0
@@ -12504,565 +12598,570 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
 ; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    j .LBB20_75
 ; RV32I-NEXT:  .LBB20_61:
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:    bne t1, s2, .LBB20_44
+; RV32I-NEXT:    mv s5, s10
+; RV32I-NEXT:    bne t1, t5, .LBB20_48
 ; RV32I-NEXT:  .LBB20_62:
-; RV32I-NEXT:    or a5, s1, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_45
+; RV32I-NEXT:    srl a4, a6, t0
+; RV32I-NEXT:    or a5, a4, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB20_49
 ; RV32I-NEXT:  .LBB20_63:
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    bne t1, s3, .LBB20_46
+; RV32I-NEXT:    mv s5, s11
+; RV32I-NEXT:    bne t1, s0, .LBB20_50
 ; RV32I-NEXT:  .LBB20_64:
-; RV32I-NEXT:    or a5, t6, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_47
+; RV32I-NEXT:    or a5, s2, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB20_51
 ; RV32I-NEXT:  .LBB20_65:
-; RV32I-NEXT:    mv a3, s11
-; RV32I-NEXT:    bne t1, s5, .LBB20_48
+; RV32I-NEXT:    mv s5, s9
+; RV32I-NEXT:    li a4, 4
+; RV32I-NEXT:    bne t1, a4, .LBB20_52
 ; RV32I-NEXT:  .LBB20_66:
-; RV32I-NEXT:    lw a5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or a5, a5, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_49
+; RV32I-NEXT:    or a5, s1, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB20_53
 ; RV32I-NEXT:  .LBB20_67:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    beq t1, s10, .LBB20_50
-; RV32I-NEXT:    j .LBB20_51
+; RV32I-NEXT:    mv s5, s7
+; RV32I-NEXT:    li a4, 5
+; RV32I-NEXT:    bne t1, a4, .LBB20_54
 ; RV32I-NEXT:  .LBB20_68:
-; RV32I-NEXT:    mv a3, s7
-; RV32I-NEXT:    bne t1, s10, .LBB20_53
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB20_55
 ; RV32I-NEXT:  .LBB20_69:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_54
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:    bne t1, s3, .LBB20_56
 ; RV32I-NEXT:  .LBB20_70:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    beq t1, s8, .LBB20_55
-; RV32I-NEXT:    j .LBB20_56
+; RV32I-NEXT:    or a5, s8, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s4, .LBB20_57
 ; RV32I-NEXT:  .LBB20_71:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:    beqz t0, .LBB20_58
 ; RV32I-NEXT:  .LBB20_72:
-; RV32I-NEXT:    sw a3, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 24(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    li a5, 0
 ; RV32I-NEXT:    beqz t4, .LBB20_59
 ; RV32I-NEXT:  .LBB20_73:
-; RV32I-NEXT:    mv a5, s0
+; RV32I-NEXT:    mv a5, s6
 ; RV32I-NEXT:    bnez t1, .LBB20_60
 ; RV32I-NEXT:  .LBB20_74:
-; RV32I-NEXT:    or a5, s1, a5
+; RV32I-NEXT:    lw a4, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    or a5, a4, a5
 ; RV32I-NEXT:  .LBB20_75:
-; RV32I-NEXT:    li s0, 4
-; RV32I-NEXT:    li s1, 6
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li s6, 1
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    bnez t4, .LBB20_91
 ; RV32I-NEXT:  # %bb.76:
-; RV32I-NEXT:    beq t1, s2, .LBB20_92
+; RV32I-NEXT:    beq t1, s6, .LBB20_92
 ; RV32I-NEXT:  .LBB20_77:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    bnez t4, .LBB20_93
 ; RV32I-NEXT:  .LBB20_78:
-; RV32I-NEXT:    beq t1, s3, .LBB20_94
+; RV32I-NEXT:    beq t1, t5, .LBB20_94
 ; RV32I-NEXT:  .LBB20_79:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    bnez t4, .LBB20_95
 ; RV32I-NEXT:  .LBB20_80:
-; RV32I-NEXT:    beq t1, s5, .LBB20_96
+; RV32I-NEXT:    beq t1, s0, .LBB20_96
 ; RV32I-NEXT:  .LBB20_81:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    bnez t4, .LBB20_97
 ; RV32I-NEXT:  .LBB20_82:
-; RV32I-NEXT:    beq t1, s0, .LBB20_98
+; RV32I-NEXT:    li a4, 4
+; RV32I-NEXT:    beq t1, a4, .LBB20_98
 ; RV32I-NEXT:  .LBB20_83:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    bnez t4, .LBB20_99
 ; RV32I-NEXT:  .LBB20_84:
-; RV32I-NEXT:    beq t1, s10, .LBB20_100
+; RV32I-NEXT:    li a4, 5
+; RV32I-NEXT:    beq t1, a4, .LBB20_100
 ; RV32I-NEXT:  .LBB20_85:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s1, .LBB20_101
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s3, .LBB20_101
 ; RV32I-NEXT:  .LBB20_86:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s8, .LBB20_102
+; RV32I-NEXT:    bne t1, s4, .LBB20_102
 ; RV32I-NEXT:  .LBB20_87:
 ; RV32I-NEXT:    bnez t0, .LBB20_103
 ; RV32I-NEXT:  .LBB20_88:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a5, 0
 ; RV32I-NEXT:    bnez t4, .LBB20_104
 ; RV32I-NEXT:  .LBB20_89:
 ; RV32I-NEXT:    beqz t1, .LBB20_105
 ; RV32I-NEXT:  .LBB20_90:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_106
-; RV32I-NEXT:    j .LBB20_107
+; RV32I-NEXT:    j .LBB20_106
 ; RV32I-NEXT:  .LBB20_91:
-; RV32I-NEXT:    mv a3, t5
-; RV32I-NEXT:    bne t1, s2, .LBB20_77
+; RV32I-NEXT:    mv s5, s10
+; RV32I-NEXT:    bne t1, s6, .LBB20_77
 ; RV32I-NEXT:  .LBB20_92:
-; RV32I-NEXT:    or a5, t6, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    srl a4, a6, t0
+; RV32I-NEXT:    or a5, a4, s5
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    beqz t4, .LBB20_78
 ; RV32I-NEXT:  .LBB20_93:
-; RV32I-NEXT:    mv a3, s11
-; RV32I-NEXT:    bne t1, s3, .LBB20_79
+; RV32I-NEXT:    mv s5, s11
+; RV32I-NEXT:    bne t1, t5, .LBB20_79
 ; RV32I-NEXT:  .LBB20_94:
-; RV32I-NEXT:    lw a5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or a5, a5, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a5, s2, s5
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    beqz t4, .LBB20_80
 ; RV32I-NEXT:  .LBB20_95:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne t1, s5, .LBB20_81
+; RV32I-NEXT:    mv s5, s9
+; RV32I-NEXT:    bne t1, s0, .LBB20_81
 ; RV32I-NEXT:  .LBB20_96:
-; RV32I-NEXT:    or a5, ra, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a5, s1, s5
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    beqz t4, .LBB20_82
 ; RV32I-NEXT:  .LBB20_97:
-; RV32I-NEXT:    mv a3, s7
-; RV32I-NEXT:    bne t1, s0, .LBB20_83
+; RV32I-NEXT:    mv s5, s7
+; RV32I-NEXT:    li a4, 4
+; RV32I-NEXT:    bne t1, a4, .LBB20_83
 ; RV32I-NEXT:  .LBB20_98:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    beqz t4, .LBB20_84
 ; RV32I-NEXT:  .LBB20_99:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    bne t1, s10, .LBB20_85
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:    li a4, 5
+; RV32I-NEXT:    bne t1, a4, .LBB20_85
 ; RV32I-NEXT:  .LBB20_100:
-; RV32I-NEXT:    or a5, s6, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s1, .LBB20_86
+; RV32I-NEXT:    or a5, s8, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s3, .LBB20_86
 ; RV32I-NEXT:  .LBB20_101:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s8, .LBB20_87
+; RV32I-NEXT:    beq t1, s4, .LBB20_87
 ; RV32I-NEXT:  .LBB20_102:
-; RV32I-NEXT:    mv a5, a3
+; RV32I-NEXT:    mv a5, s5
 ; RV32I-NEXT:    beqz t0, .LBB20_88
 ; RV32I-NEXT:  .LBB20_103:
-; RV32I-NEXT:    mv a4, a5
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    sw a5, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    li a5, 0
 ; RV32I-NEXT:    beqz t4, .LBB20_89
 ; RV32I-NEXT:  .LBB20_104:
-; RV32I-NEXT:    mv a3, t5
+; RV32I-NEXT:    mv a5, s10
 ; RV32I-NEXT:    bnez t1, .LBB20_90
 ; RV32I-NEXT:  .LBB20_105:
-; RV32I-NEXT:    or a5, t6, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_107
+; RV32I-NEXT:    srl a4, a6, t0
+; RV32I-NEXT:    or a5, a4, a5
 ; RV32I-NEXT:  .LBB20_106:
-; RV32I-NEXT:    mv a3, s11
-; RV32I-NEXT:  .LBB20_107:
-; RV32I-NEXT:    beq t1, s2, .LBB20_121
-; RV32I-NEXT:  # %bb.108:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_122
+; RV32I-NEXT:    lw a4, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    li s10, 4
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB20_124
+; RV32I-NEXT:  # %bb.107:
+; RV32I-NEXT:    beq t1, s6, .LBB20_125
+; RV32I-NEXT:  .LBB20_108:
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB20_126
 ; RV32I-NEXT:  .LBB20_109:
-; RV32I-NEXT:    beq t1, s3, .LBB20_123
+; RV32I-NEXT:    beq t1, t5, .LBB20_127
 ; RV32I-NEXT:  .LBB20_110:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_124
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB20_128
 ; RV32I-NEXT:  .LBB20_111:
-; RV32I-NEXT:    beq t1, s5, .LBB20_125
+; RV32I-NEXT:    beq t1, s0, .LBB20_129
 ; RV32I-NEXT:  .LBB20_112:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_126
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB20_130
 ; RV32I-NEXT:  .LBB20_113:
-; RV32I-NEXT:    beq t1, s0, .LBB20_127
+; RV32I-NEXT:    mv s0, a3
+; RV32I-NEXT:    bne t1, s10, .LBB20_115
 ; RV32I-NEXT:  .LBB20_114:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s10, .LBB20_128
+; RV32I-NEXT:    or a5, s8, s5
 ; RV32I-NEXT:  .LBB20_115:
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s1, .LBB20_129
-; RV32I-NEXT:  .LBB20_116:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s8, .LBB20_130
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    li a3, 5
+; RV32I-NEXT:    beq t1, a3, .LBB20_117
+; RV32I-NEXT:  # %bb.116:
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:  .LBB20_117:
-; RV32I-NEXT:    bnez t0, .LBB20_131
-; RV32I-NEXT:  .LBB20_118:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_132
-; RV32I-NEXT:  .LBB20_119:
-; RV32I-NEXT:    beqz t1, .LBB20_133
-; RV32I-NEXT:  .LBB20_120:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    j .LBB20_134
+; RV32I-NEXT:    beq t1, s3, .LBB20_119
+; RV32I-NEXT:  # %bb.118:
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:  .LBB20_119:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    mv a3, s0
+; RV32I-NEXT:    bne t1, s4, .LBB20_131
+; RV32I-NEXT:  # %bb.120:
+; RV32I-NEXT:    li s0, 3
+; RV32I-NEXT:    bnez t0, .LBB20_132
 ; RV32I-NEXT:  .LBB20_121:
-; RV32I-NEXT:    lw a5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or a5, a5, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_109
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    bnez t4, .LBB20_133
 ; RV32I-NEXT:  .LBB20_122:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne t1, s3, .LBB20_110
+; RV32I-NEXT:    beqz t1, .LBB20_134
 ; RV32I-NEXT:  .LBB20_123:
-; RV32I-NEXT:    or a5, ra, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_111
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    j .LBB20_135
 ; RV32I-NEXT:  .LBB20_124:
-; RV32I-NEXT:    mv a3, s7
-; RV32I-NEXT:    bne t1, s5, .LBB20_112
+; RV32I-NEXT:    mv s5, s11
+; RV32I-NEXT:    bne t1, s6, .LBB20_108
 ; RV32I-NEXT:  .LBB20_125:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_113
+; RV32I-NEXT:    or a5, s2, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB20_109
 ; RV32I-NEXT:  .LBB20_126:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    bne t1, s0, .LBB20_114
+; RV32I-NEXT:    mv s5, s9
+; RV32I-NEXT:    bne t1, t5, .LBB20_110
 ; RV32I-NEXT:  .LBB20_127:
-; RV32I-NEXT:    or a5, s6, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s10, .LBB20_115
+; RV32I-NEXT:    or a5, s1, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB20_111
 ; RV32I-NEXT:  .LBB20_128:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s1, .LBB20_116
+; RV32I-NEXT:    mv s5, s7
+; RV32I-NEXT:    bne t1, s0, .LBB20_112
 ; RV32I-NEXT:  .LBB20_129:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s8, .LBB20_117
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB20_113
 ; RV32I-NEXT:  .LBB20_130:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    beqz t0, .LBB20_118
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:    mv s0, a3
+; RV32I-NEXT:    beq t1, s10, .LBB20_114
+; RV32I-NEXT:    j .LBB20_115
 ; RV32I-NEXT:  .LBB20_131:
-; RV32I-NEXT:    mv a6, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_119
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    li s0, 3
+; RV32I-NEXT:    beqz t0, .LBB20_121
 ; RV32I-NEXT:  .LBB20_132:
-; RV32I-NEXT:    mv a3, s11
-; RV32I-NEXT:    bnez t1, .LBB20_120
+; RV32I-NEXT:    mv a6, s5
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    beqz t4, .LBB20_122
 ; RV32I-NEXT:  .LBB20_133:
-; RV32I-NEXT:    lw a5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    or a5, a5, a3
+; RV32I-NEXT:    mv a5, s11
+; RV32I-NEXT:    bnez t1, .LBB20_123
 ; RV32I-NEXT:  .LBB20_134:
-; RV32I-NEXT:    lw s11, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_148
-; RV32I-NEXT:  # %bb.135:
-; RV32I-NEXT:    beq t1, s2, .LBB20_149
-; RV32I-NEXT:  .LBB20_136:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_150
+; RV32I-NEXT:    or a5, s2, a5
+; RV32I-NEXT:  .LBB20_135:
+; RV32I-NEXT:    li s2, 5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB20_149
+; RV32I-NEXT:  # %bb.136:
+; RV32I-NEXT:    beq t1, s6, .LBB20_150
 ; RV32I-NEXT:  .LBB20_137:
-; RV32I-NEXT:    beq t1, s3, .LBB20_151
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB20_151
 ; RV32I-NEXT:  .LBB20_138:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_152
+; RV32I-NEXT:    beq t1, t5, .LBB20_152
 ; RV32I-NEXT:  .LBB20_139:
-; RV32I-NEXT:    beq t1, s5, .LBB20_153
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB20_153
 ; RV32I-NEXT:  .LBB20_140:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s0, .LBB20_154
+; RV32I-NEXT:    beq t1, s0, .LBB20_154
 ; RV32I-NEXT:  .LBB20_141:
-; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    mv s5, t2
 ; RV32I-NEXT:    bne t1, s10, .LBB20_155
 ; RV32I-NEXT:  .LBB20_142:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s1, .LBB20_156
-; RV32I-NEXT:  .LBB20_143:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s8, .LBB20_157
+; RV32I-NEXT:    bne t1, s2, .LBB20_156
+; RV32I-NEXT:  .LBB20_143:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s3, .LBB20_157
 ; RV32I-NEXT:  .LBB20_144:
-; RV32I-NEXT:    bnez t0, .LBB20_158
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    bne t1, s4, .LBB20_158
 ; RV32I-NEXT:  .LBB20_145:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_159
+; RV32I-NEXT:    bnez t0, .LBB20_159
 ; RV32I-NEXT:  .LBB20_146:
-; RV32I-NEXT:    beqz t1, .LBB20_160
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    bnez t4, .LBB20_160
 ; RV32I-NEXT:  .LBB20_147:
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_161
-; RV32I-NEXT:    j .LBB20_162
+; RV32I-NEXT:    beqz t1, .LBB20_161
 ; RV32I-NEXT:  .LBB20_148:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bne t1, s2, .LBB20_136
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB20_162
+; RV32I-NEXT:    j .LBB20_163
 ; RV32I-NEXT:  .LBB20_149:
-; RV32I-NEXT:    or a5, ra, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_137
+; RV32I-NEXT:    mv s5, s9
+; RV32I-NEXT:    bne t1, s6, .LBB20_137
 ; RV32I-NEXT:  .LBB20_150:
-; RV32I-NEXT:    mv a3, s7
-; RV32I-NEXT:    bne t1, s3, .LBB20_138
+; RV32I-NEXT:    or a5, s1, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB20_138
 ; RV32I-NEXT:  .LBB20_151:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_139
+; RV32I-NEXT:    mv s5, s7
+; RV32I-NEXT:    bne t1, t5, .LBB20_139
 ; RV32I-NEXT:  .LBB20_152:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    bne t1, s5, .LBB20_140
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB20_140
 ; RV32I-NEXT:  .LBB20_153:
-; RV32I-NEXT:    or a5, s6, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s0, .LBB20_141
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:    bne t1, s0, .LBB20_141
 ; RV32I-NEXT:  .LBB20_154:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    or a5, s8, s5
+; RV32I-NEXT:    mv s5, t2
 ; RV32I-NEXT:    beq t1, s10, .LBB20_142
 ; RV32I-NEXT:  .LBB20_155:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s1, .LBB20_143
-; RV32I-NEXT:  .LBB20_156:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s8, .LBB20_144
+; RV32I-NEXT:    beq t1, s2, .LBB20_143
+; RV32I-NEXT:  .LBB20_156:
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s3, .LBB20_144
 ; RV32I-NEXT:  .LBB20_157:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    beqz t0, .LBB20_145
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    beq t1, s4, .LBB20_145
 ; RV32I-NEXT:  .LBB20_158:
-; RV32I-NEXT:    mv a7, a5
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_146
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    beqz t0, .LBB20_146
 ; RV32I-NEXT:  .LBB20_159:
-; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    bnez t1, .LBB20_147
+; RV32I-NEXT:    mv a7, a5
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    beqz t4, .LBB20_147
 ; RV32I-NEXT:  .LBB20_160:
-; RV32I-NEXT:    or a5, ra, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_162
+; RV32I-NEXT:    mv a5, s9
+; RV32I-NEXT:    bnez t1, .LBB20_148
 ; RV32I-NEXT:  .LBB20_161:
-; RV32I-NEXT:    mv a3, s7
+; RV32I-NEXT:    or a5, s1, a5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB20_163
 ; RV32I-NEXT:  .LBB20_162:
-; RV32I-NEXT:    beq t1, s2, .LBB20_174
-; RV32I-NEXT:  # %bb.163:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_175
-; RV32I-NEXT:  .LBB20_164:
-; RV32I-NEXT:    beq t1, s3, .LBB20_176
+; RV32I-NEXT:    mv s5, s7
+; RV32I-NEXT:  .LBB20_163:
+; RV32I-NEXT:    beq t1, s6, .LBB20_175
+; RV32I-NEXT:  # %bb.164:
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB20_176
 ; RV32I-NEXT:  .LBB20_165:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s5, .LBB20_177
+; RV32I-NEXT:    beq t1, t5, .LBB20_177
 ; RV32I-NEXT:  .LBB20_166:
-; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    mv s5, t2
 ; RV32I-NEXT:    bne t1, s0, .LBB20_178
 ; RV32I-NEXT:  .LBB20_167:
-; RV32I-NEXT:    mv a3, t2
+; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    bne t1, s10, .LBB20_179
 ; RV32I-NEXT:  .LBB20_168:
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s1, .LBB20_180
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s2, .LBB20_180
 ; RV32I-NEXT:  .LBB20_169:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s8, .LBB20_181
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    bne t1, s3, .LBB20_181
 ; RV32I-NEXT:  .LBB20_170:
-; RV32I-NEXT:    bnez t0, .LBB20_182
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s4, .LBB20_182
 ; RV32I-NEXT:  .LBB20_171:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_183
+; RV32I-NEXT:    bnez t0, .LBB20_183
 ; RV32I-NEXT:  .LBB20_172:
-; RV32I-NEXT:    beqz t1, .LBB20_184
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    bnez t4, .LBB20_184
 ; RV32I-NEXT:  .LBB20_173:
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_185
-; RV32I-NEXT:    j .LBB20_186
+; RV32I-NEXT:    beqz t1, .LBB20_185
 ; RV32I-NEXT:  .LBB20_174:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_164
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    bnez t4, .LBB20_186
+; RV32I-NEXT:    j .LBB20_187
 ; RV32I-NEXT:  .LBB20_175:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    bne t1, s3, .LBB20_165
+; RV32I-NEXT:    or a5, a3, s5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB20_165
 ; RV32I-NEXT:  .LBB20_176:
-; RV32I-NEXT:    or a5, s6, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s5, .LBB20_166
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:    bne t1, t5, .LBB20_166
 ; RV32I-NEXT:  .LBB20_177:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    or a5, s8, s5
+; RV32I-NEXT:    mv s5, t2
 ; RV32I-NEXT:    beq t1, s0, .LBB20_167
 ; RV32I-NEXT:  .LBB20_178:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    beq t1, s10, .LBB20_168
 ; RV32I-NEXT:  .LBB20_179:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s1, .LBB20_169
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s2, .LBB20_169
 ; RV32I-NEXT:  .LBB20_180:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s8, .LBB20_170
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    beq t1, s3, .LBB20_170
 ; RV32I-NEXT:  .LBB20_181:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    beqz t0, .LBB20_171
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s4, .LBB20_171
 ; RV32I-NEXT:  .LBB20_182:
-; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_172
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    beqz t0, .LBB20_172
 ; RV32I-NEXT:  .LBB20_183:
-; RV32I-NEXT:    mv a3, s7
-; RV32I-NEXT:    bnez t1, .LBB20_173
+; RV32I-NEXT:    mv a0, s5
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    beqz t4, .LBB20_173
 ; RV32I-NEXT:  .LBB20_184:
-; RV32I-NEXT:    or a5, s9, a3
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_186
+; RV32I-NEXT:    mv a5, s7
+; RV32I-NEXT:    bnez t1, .LBB20_174
 ; RV32I-NEXT:  .LBB20_185:
-; RV32I-NEXT:    mv a3, s4
+; RV32I-NEXT:    or a5, a3, a5
+; RV32I-NEXT:    li s5, 0
+; RV32I-NEXT:    beqz t4, .LBB20_187
 ; RV32I-NEXT:  .LBB20_186:
-; RV32I-NEXT:    beq t1, s2, .LBB20_197
-; RV32I-NEXT:  # %bb.187:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s3, .LBB20_198
-; RV32I-NEXT:  .LBB20_188:
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s5, .LBB20_199
+; RV32I-NEXT:    mv s5, t6
+; RV32I-NEXT:  .LBB20_187:
+; RV32I-NEXT:    beq t1, s6, .LBB20_200
+; RV32I-NEXT:  # %bb.188:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, t5, .LBB20_201
 ; RV32I-NEXT:  .LBB20_189:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s0, .LBB20_200
-; RV32I-NEXT:  .LBB20_190:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s10, .LBB20_201
+; RV32I-NEXT:    bne t1, s0, .LBB20_202
+; RV32I-NEXT:  .LBB20_190:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s10, .LBB20_203
 ; RV32I-NEXT:  .LBB20_191:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s1, .LBB20_202
-; RV32I-NEXT:  .LBB20_192:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s8, .LBB20_203
+; RV32I-NEXT:    bne t1, s2, .LBB20_204
+; RV32I-NEXT:  .LBB20_192:
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    bne t1, s3, .LBB20_205
 ; RV32I-NEXT:  .LBB20_193:
-; RV32I-NEXT:    bnez t0, .LBB20_204
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    bne t1, s4, .LBB20_206
 ; RV32I-NEXT:  .LBB20_194:
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    bnez t4, .LBB20_205
+; RV32I-NEXT:    beqz t0, .LBB20_196
 ; RV32I-NEXT:  .LBB20_195:
-; RV32I-NEXT:    beqz t1, .LBB20_206
+; RV32I-NEXT:    mv t3, a5
 ; RV32I-NEXT:  .LBB20_196:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s2, .LBB20_207
-; RV32I-NEXT:    j .LBB20_208
-; RV32I-NEXT:  .LBB20_197:
-; RV32I-NEXT:    or a5, s6, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s3, .LBB20_188
+; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    lw a3, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    beqz t4, .LBB20_198
+; RV32I-NEXT:  # %bb.197:
+; RV32I-NEXT:    mv a5, t6
 ; RV32I-NEXT:  .LBB20_198:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    beqz t1, .LBB20_207
+; RV32I-NEXT:  # %bb.199:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s5, .LBB20_189
-; RV32I-NEXT:  .LBB20_199:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s0, .LBB20_190
+; RV32I-NEXT:    mv t4, t2
+; RV32I-NEXT:    bne t1, s6, .LBB20_208
+; RV32I-NEXT:    j .LBB20_209
 ; RV32I-NEXT:  .LBB20_200:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s10, .LBB20_191
+; RV32I-NEXT:    or a5, s8, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, t5, .LBB20_189
 ; RV32I-NEXT:  .LBB20_201:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s1, .LBB20_192
-; RV32I-NEXT:  .LBB20_202:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s8, .LBB20_193
+; RV32I-NEXT:    beq t1, s0, .LBB20_190
+; RV32I-NEXT:  .LBB20_202:
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s10, .LBB20_191
 ; RV32I-NEXT:  .LBB20_203:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    beqz t0, .LBB20_194
+; RV32I-NEXT:    mv s5, a5
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    beq t1, s2, .LBB20_192
 ; RV32I-NEXT:  .LBB20_204:
-; RV32I-NEXT:    mv t3, a5
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    beqz t4, .LBB20_195
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    mv s5, t2
+; RV32I-NEXT:    beq t1, s3, .LBB20_193
 ; RV32I-NEXT:  .LBB20_205:
-; RV32I-NEXT:    mv a3, s4
-; RV32I-NEXT:    bnez t1, .LBB20_196
-; RV32I-NEXT:  .LBB20_206:
-; RV32I-NEXT:    or a3, s6, a3
+; RV32I-NEXT:    mv s5, a5
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s2, .LBB20_208
+; RV32I-NEXT:    beq t1, s4, .LBB20_194
+; RV32I-NEXT:  .LBB20_206:
+; RV32I-NEXT:    mv a5, s5
+; RV32I-NEXT:    bnez t0, .LBB20_195
+; RV32I-NEXT:    j .LBB20_196
 ; RV32I-NEXT:  .LBB20_207:
-; RV32I-NEXT:    mv a5, a3
+; RV32I-NEXT:    or a5, s8, a5
+; RV32I-NEXT:    mv t4, t2
+; RV32I-NEXT:    beq t1, s6, .LBB20_209
 ; RV32I-NEXT:  .LBB20_208:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s3, .LBB20_217
-; RV32I-NEXT:  # %bb.209:
+; RV32I-NEXT:    mv t4, a5
+; RV32I-NEXT:  .LBB20_209:
 ; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    bne t1, s5, .LBB20_218
-; RV32I-NEXT:  .LBB20_210:
-; RV32I-NEXT:    mv a3, t2
+; RV32I-NEXT:    bne t1, t5, .LBB20_218
+; RV32I-NEXT:  # %bb.210:
+; RV32I-NEXT:    mv t4, t2
 ; RV32I-NEXT:    bne t1, s0, .LBB20_219
 ; RV32I-NEXT:  .LBB20_211:
 ; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    bne t1, s10, .LBB20_220
 ; RV32I-NEXT:  .LBB20_212:
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    bne t1, s1, .LBB20_221
+; RV32I-NEXT:    mv t4, t2
+; RV32I-NEXT:    bne t1, s2, .LBB20_221
 ; RV32I-NEXT:  .LBB20_213:
-; RV32I-NEXT:    bne t1, s8, .LBB20_222
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    bne t1, s3, .LBB20_222
 ; RV32I-NEXT:  .LBB20_214:
-; RV32I-NEXT:    beqz t0, .LBB20_216
+; RV32I-NEXT:    bne t1, s4, .LBB20_223
 ; RV32I-NEXT:  .LBB20_215:
-; RV32I-NEXT:    mv a1, t2
+; RV32I-NEXT:    beqz t0, .LBB20_217
 ; RV32I-NEXT:  .LBB20_216:
-; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    mv a1, t2
+; RV32I-NEXT:  .LBB20_217:
 ; RV32I-NEXT:    srli a5, ra, 16
 ; RV32I-NEXT:    lui t4, 16
 ; RV32I-NEXT:    srli t2, ra, 24
-; RV32I-NEXT:    srli t0, s11, 16
-; RV32I-NEXT:    srli t6, s11, 24
+; RV32I-NEXT:    srli t0, a3, 16
+; RV32I-NEXT:    srli t6, a3, 24
 ; RV32I-NEXT:    srli t1, a4, 16
 ; RV32I-NEXT:    srli s2, a4, 24
 ; RV32I-NEXT:    srli t5, a6, 16
 ; RV32I-NEXT:    srli s3, a6, 24
 ; RV32I-NEXT:    srli s1, a7, 16
-; RV32I-NEXT:    srli a3, a7, 24
+; RV32I-NEXT:    srli s6, a7, 24
 ; RV32I-NEXT:    srli s0, a0, 16
 ; RV32I-NEXT:    srli s5, a0, 24
 ; RV32I-NEXT:    srli s4, t3, 16
-; RV32I-NEXT:    srli s6, t3, 24
-; RV32I-NEXT:    srli s7, a1, 16
-; RV32I-NEXT:    srli s8, a1, 24
+; RV32I-NEXT:    srli s7, t3, 24
+; RV32I-NEXT:    srli s8, a1, 16
+; RV32I-NEXT:    srli s9, a1, 24
 ; RV32I-NEXT:    addi t4, t4, -1
-; RV32I-NEXT:    and s9, ra, t4
-; RV32I-NEXT:    and s10, s11, t4
-; RV32I-NEXT:    srli s9, s9, 8
+; RV32I-NEXT:    and s10, ra, t4
+; RV32I-NEXT:    and s11, a3, t4
+; RV32I-NEXT:    srli s10, s10, 8
 ; RV32I-NEXT:    sb ra, 0(a2)
-; RV32I-NEXT:    sb s9, 1(a2)
+; RV32I-NEXT:    sb s10, 1(a2)
 ; RV32I-NEXT:    sb a5, 2(a2)
 ; RV32I-NEXT:    sb t2, 3(a2)
 ; RV32I-NEXT:    and a5, a4, t4
-; RV32I-NEXT:    srli t2, s10, 8
-; RV32I-NEXT:    sb s11, 4(a2)
+; RV32I-NEXT:    srli t2, s11, 8
+; RV32I-NEXT:    sb a3, 4(a2)
 ; RV32I-NEXT:    sb t2, 5(a2)
 ; RV32I-NEXT:    sb t0, 6(a2)
 ; RV32I-NEXT:    sb t6, 7(a2)
-; RV32I-NEXT:    and t0, a6, t4
+; RV32I-NEXT:    and a3, a6, t4
 ; RV32I-NEXT:    srli a5, a5, 8
 ; RV32I-NEXT:    sb a4, 8(a2)
 ; RV32I-NEXT:    sb a5, 9(a2)
 ; RV32I-NEXT:    sb t1, 10(a2)
 ; RV32I-NEXT:    sb s2, 11(a2)
 ; RV32I-NEXT:    and a4, a7, t4
-; RV32I-NEXT:    srli a5, t0, 8
+; RV32I-NEXT:    srli a3, a3, 8
 ; RV32I-NEXT:    sb a6, 12(a2)
-; RV32I-NEXT:    sb a5, 13(a2)
+; RV32I-NEXT:    sb a3, 13(a2)
 ; RV32I-NEXT:    sb t5, 14(a2)
 ; RV32I-NEXT:    sb s3, 15(a2)
-; RV32I-NEXT:    and a5, a0, t4
+; RV32I-NEXT:    and a3, a0, t4
 ; RV32I-NEXT:    srli a4, a4, 8
 ; RV32I-NEXT:    sb a7, 16(a2)
 ; RV32I-NEXT:    sb a4, 17(a2)
 ; RV32I-NEXT:    sb s1, 18(a2)
-; RV32I-NEXT:    sb a3, 19(a2)
-; RV32I-NEXT:    and a3, t3, t4
-; RV32I-NEXT:    and a4, a1, t4
-; RV32I-NEXT:    srli a5, a5, 8
+; RV32I-NEXT:    sb s6, 19(a2)
+; RV32I-NEXT:    and a4, t3, t4
+; RV32I-NEXT:    and a5, a1, t4
 ; RV32I-NEXT:    srli a3, a3, 8
 ; RV32I-NEXT:    srli a4, a4, 8
+; RV32I-NEXT:    srli a5, a5, 8
 ; RV32I-NEXT:    sb a0, 20(a2)
-; RV32I-NEXT:    sb a5, 21(a2)
+; RV32I-NEXT:    sb a3, 21(a2)
 ; RV32I-NEXT:    sb s0, 22(a2)
 ; RV32I-NEXT:    sb s5, 23(a2)
 ; RV32I-NEXT:    sb t3, 24(a2)
-; RV32I-NEXT:    sb a3, 25(a2)
+; RV32I-NEXT:    sb a4, 25(a2)
 ; RV32I-NEXT:    sb s4, 26(a2)
-; RV32I-NEXT:    sb s6, 27(a2)
+; RV32I-NEXT:    sb s7, 27(a2)
 ; RV32I-NEXT:    sb a1, 28(a2)
-; RV32I-NEXT:    sb a4, 29(a2)
-; RV32I-NEXT:    sb s7, 30(a2)
-; RV32I-NEXT:    sb s8, 31(a2)
+; RV32I-NEXT:    sb a5, 29(a2)
+; RV32I-NEXT:    sb s8, 30(a2)
+; RV32I-NEXT:    sb s9, 31(a2)
 ; RV32I-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
@@ -13078,29 +13177,29 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
 ; RV32I-NEXT:    lw s11, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 80
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB20_217:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    mv a5, t2
-; RV32I-NEXT:    beq t1, s5, .LBB20_210
 ; RV32I-NEXT:  .LBB20_218:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
+; RV32I-NEXT:    mv a5, t4
+; RV32I-NEXT:    mv t4, t2
 ; RV32I-NEXT:    beq t1, s0, .LBB20_211
 ; RV32I-NEXT:  .LBB20_219:
-; RV32I-NEXT:    mv a3, a5
+; RV32I-NEXT:    mv t4, a5
 ; RV32I-NEXT:    mv a5, t2
 ; RV32I-NEXT:    beq t1, s10, .LBB20_212
 ; RV32I-NEXT:  .LBB20_220:
-; RV32I-NEXT:    mv a5, a3
-; RV32I-NEXT:    mv a3, t2
-; RV32I-NEXT:    beq t1, s1, .LBB20_213
+; RV32I-NEXT:    mv a5, t4
+; RV32I-NEXT:    mv t4, t2
+; RV32I-NEXT:    beq t1, s2, .LBB20_213
 ; RV32I-NEXT:  .LBB20_221:
-; RV32I-NEXT:    mv a3, a5
-; RV32I-NEXT:    beq t1, s8, .LBB20_214
+; RV32I-NEXT:    mv t4, a5
+; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    beq t1, s3, .LBB20_214
 ; RV32I-NEXT:  .LBB20_222:
-; RV32I-NEXT:    mv t2, a3
-; RV32I-NEXT:    bnez t0, .LBB20_215
-; RV32I-NEXT:    j .LBB20_216
+; RV32I-NEXT:    mv a5, t4
+; RV32I-NEXT:    beq t1, s4, .LBB20_215
+; RV32I-NEXT:  .LBB20_223:
+; RV32I-NEXT:    mv t2, a5
+; RV32I-NEXT:    bnez t0, .LBB20_216
+; RV32I-NEXT:    j .LBB20_217
   %src = load i256, ptr %src.ptr, align 1
   %dwordOff = load i256, ptr %dwordOff.ptr, align 1
   %bitOff = shl i256 %dwordOff, 6
diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll
index 35a39b89a2cb7..017a434eab50c 100644
--- a/llvm/test/CodeGen/RISCV/add-before-shl.ll
+++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll
@@ -205,17 +205,17 @@ define i128 @add_wide_operand(i128 %a) nounwind {
 ; RV32C-NEXT:    c.lw a3, 4(a1)
 ; RV32C-NEXT:    c.lw a1, 8(a1)
 ; RV32C-NEXT:    c.lui a5, 16
-; RV32C-NEXT:    add a6, a4, a5
-; RV32C-NEXT:    srli a5, a2, 29
+; RV32C-NEXT:    add a7, a4, a5
+; RV32C-NEXT:    srli a6, a2, 29
 ; RV32C-NEXT:    slli a4, a3, 3
-; RV32C-NEXT:    c.or a4, a5
+; RV32C-NEXT:    or a4, a4, a6
 ; RV32C-NEXT:    srli a5, a1, 29
 ; RV32C-NEXT:    c.srli a3, 29
 ; RV32C-NEXT:    c.slli a1, 3
 ; RV32C-NEXT:    c.slli a2, 3
-; RV32C-NEXT:    c.slli a6, 3
+; RV32C-NEXT:    c.slli a7, 3
 ; RV32C-NEXT:    c.or a1, a3
-; RV32C-NEXT:    or a3, a6, a5
+; RV32C-NEXT:    or a3, a7, a5
 ; RV32C-NEXT:    c.sw a2, 0(a0)
 ; RV32C-NEXT:    c.sw a4, 4(a0)
 ; RV32C-NEXT:    c.sw a1, 8(a0)
diff --git a/llvm/test/CodeGen/RISCV/pr69586.ll b/llvm/test/CodeGen/RISCV/pr69586.ll
index 33b89a405d8e3..31fb4e3657540 100644
--- a/llvm/test/CodeGen/RISCV/pr69586.ll
+++ b/llvm/test/CodeGen/RISCV/pr69586.ll
@@ -251,12 +251,12 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    vle32.v v12, (a2)
 ; NOREMAT-NEXT:    vle32.v v4, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v26, v8
-; NOREMAT-NEXT:    lui a3, 4
-; NOREMAT-NEXT:    addi a2, a3, 512
-; NOREMAT-NEXT:    sd a2, 496(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a2, a0, a2
-; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    vle32.v v26, (a2)
+; NOREMAT-NEXT:    lui a4, 4
+; NOREMAT-NEXT:    addi a3, a4, 512
+; NOREMAT-NEXT:    sd a3, 496(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a3, a0, a3
+; NOREMAT-NEXT:    vle32.v v8, (a3)
+; NOREMAT-NEXT:    vle32.v v26, (a3)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v6, v28
 ; NOREMAT-NEXT:    slli a2, s1, 10
 ; NOREMAT-NEXT:    sd a2, 488(sp) # 8-byte Folded Spill
@@ -264,9 +264,8 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    vle32.v v28, (a2)
 ; NOREMAT-NEXT:    vle32.v v6, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v30, v12
-; NOREMAT-NEXT:    addi a2, a3, 1536
+; NOREMAT-NEXT:    addi a2, a4, 1536
 ; NOREMAT-NEXT:    sd a2, 480(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    lui a4, 4
 ; NOREMAT-NEXT:    add a2, a0, a2
 ; NOREMAT-NEXT:    vle32.v v12, (a2)
 ; NOREMAT-NEXT:    vle32.v v30, (a2)
@@ -1143,27 +1142,27 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; REMAT-NEXT:    add a2, sp, a2
 ; REMAT-NEXT:    addi a2, a2, 432
 ; REMAT-NEXT:    vs2r.v v18, (a2) # vscale x 16-byte Folded Spill
-; REMAT-NEXT:    li a2, 29
-; REMAT-NEXT:    slli a2, a2, 9
-; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v18, (a2)
+; REMAT-NEXT:    li a3, 29
+; REMAT-NEXT:    slli a3, a3, 9
+; REMAT-NEXT:    add a3, a0, a3
+; REMAT-NEXT:    vle32.v v18, (a3)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v26, v24
-; REMAT-NEXT:    vle32.v v20, (a2)
+; REMAT-NEXT:    vle32.v v20, (a3)
 ; REMAT-NEXT:    csrr a2, vlenb
 ; REMAT-NEXT:    li a3, 12
 ; REMAT-NEXT:    mul a2, a2, a3
 ; REMAT-NEXT:    add a2, sp, a2
 ; REMAT-NEXT:    addi a2, a2, 432
 ; REMAT-NEXT:    vs2r.v v20, (a2) # vscale x 16-byte Folded Spill
-; REMAT-NEXT:    li a2, 15
-; REMAT-NEXT:    slli a2, a2, 10
-; REMAT-NEXT:    add a2, a0, a2
+; REMAT-NEXT:    li a3, 15
+; REMAT-NEXT:    slli a3, a3, 10
+; REMAT-NEXT:    add a2, a0, a3
 ; REMAT-NEXT:    vle32.v v30, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v28, v8
 ; REMAT-NEXT:    vle32.v v8, (a2)
 ; REMAT-NEXT:    csrr a2, vlenb
-; REMAT-NEXT:    li a3, 10
-; REMAT-NEXT:    mul a2, a2, a3
+; REMAT-NEXT:    li a4, 10
+; REMAT-NEXT:    mul a2, a2, a4
 ; REMAT-NEXT:    add a2, sp, a2
 ; REMAT-NEXT:    addi a2, a2, 432
 ; REMAT-NEXT:    vs2r.v v8, (a2) # vscale x 16-byte Folded Spill
@@ -1171,11 +1170,11 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; REMAT-NEXT:    slli a2, a2, 9
 ; REMAT-NEXT:    add a2, a0, a2
 ; REMAT-NEXT:    vle32.v v6, (a2)
-; REMAT-NEXT:    csrr a3, vlenb
-; REMAT-NEXT:    slli a3, a3, 3
-; REMAT-NEXT:    add a3, sp, a3
-; REMAT-NEXT:    addi a3, a3, 432
-; REMAT-NEXT:    vl2r.v v8, (a3) # vscale x 16-byte Folded Reload
+; REMAT-NEXT:    csrr a4, vlenb
+; REMAT-NEXT:    slli a4, a4, 3
+; REMAT-NEXT:    add a4, sp, a4
+; REMAT-NEXT:    addi a4, a4, 432
+; REMAT-NEXT:    vl2r.v v8, (a4) # vscale x 16-byte Folded Reload
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v12
 ; REMAT-NEXT:    vle32.v v8, (a2)
 ; REMAT-NEXT:    csrr a2, vlenb
@@ -1189,8 +1188,8 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v16, v2
 ; REMAT-NEXT:    vle32.v v8, (a2)
 ; REMAT-NEXT:    csrr a2, vlenb
-; REMAT-NEXT:    li a3, 6
-; REMAT-NEXT:    mul a2, a2, a3
+; REMAT-NEXT:    li a4, 6
+; REMAT-NEXT:    mul a2, a2, a4
 ; REMAT-NEXT:    add a2, sp, a2
 ; REMAT-NEXT:    addi a2, a2, 432
 ; REMAT-NEXT:    vs2r.v v8, (a2) # vscale x 16-byte Folded Spill
@@ -1198,93 +1197,93 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; REMAT-NEXT:    addi a2, a2, 512
 ; REMAT-NEXT:    add a2, a0, a2
 ; REMAT-NEXT:    vle32.v v2, (a2)
-; REMAT-NEXT:    csrr a3, vlenb
-; REMAT-NEXT:    slli a3, a3, 1
-; REMAT-NEXT:    add a3, sp, a3
-; REMAT-NEXT:    addi a3, a3, 432
-; REMAT-NEXT:    vl2r.v v8, (a3) # vscale x 16-byte Folded Reload
+; REMAT-NEXT:    csrr a4, vlenb
+; REMAT-NEXT:    slli a4, a4, 1
+; REMAT-NEXT:    add a4, sp, a4
+; REMAT-NEXT:    addi a4, a4, 432
+; REMAT-NEXT:    vl2r.v v8, (a4) # vscale x 16-byte Folded Reload
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v10
 ; REMAT-NEXT:    vle32.v v20, (a2)
 ; REMAT-NEXT:    li a2, 17
 ; REMAT-NEXT:    slli a2, a2, 10
 ; REMAT-NEXT:    add a2, a0, a2
 ; REMAT-NEXT:    vle32.v v0, (a2)
-; REMAT-NEXT:    csrr a3, vlenb
-; REMAT-NEXT:    slli a3, a3, 2
-; REMAT-NEXT:    add a3, sp, a3
-; REMAT-NEXT:    addi a3, a3, 432
-; REMAT-NEXT:    vl2r.v v8, (a3) # vscale x 16-byte Folded Reload
+; REMAT-NEXT:    csrr a4, vlenb
+; REMAT-NEXT:    slli a4, a4, 2
+; REMAT-NEXT:    add a4, sp, a4
+; REMAT-NEXT:    addi a4, a4, 432
+; REMAT-NEXT:    vl2r.v v8, (a4) # vscale x 16-byte Folded Reload
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
 ; REMAT-NEXT:    vle32.v v22, (a2)
 ; REMAT-NEXT:    lui a2, 4
 ; REMAT-NEXT:    addi a2, a2, 1536
 ; REMAT-NEXT:    add a2, a0, a2
 ; REMAT-NEXT:    vle32.v v24, (a2)
-; REMAT-NEXT:    csrr a3, vlenb
-; REMAT-NEXT:    slli a3, a3, 4
-; REMAT-NEXT:    add a3, sp, a3
-; REMAT-NEXT:    addi a3, a3, 432
-; REMAT-NEXT:    vl2r.v v8, (a3) # vscale x 16-byte Folded Reload
-; REMAT-NEXT:    addi a3, sp, 432
-; REMAT-NEXT:    vl2r.v v10, (a3) # vscale x 16-byte Folded Reload
+; REMAT-NEXT:    csrr a4, vlenb
+; REMAT-NEXT:    slli a4, a4, 4
+; REMAT-NEXT:    add a4, sp, a4
+; REMAT-NEXT:    addi a4, a4, 432
+; REMAT-NEXT:    vl2r.v v8, (a4) # vscale x 16-byte Folded Reload
+; REMAT-NEXT:    addi a4, sp, 432
+; REMAT-NEXT:    vl2r.v v10, (a4) # vscale x 16-byte Folded Reload
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v10
 ; REMAT-NEXT:    vle32.v v8, (a2)
 ; REMAT-NEXT:    li a2, 9
 ; REMAT-NEXT:    slli a2, a2, 11
 ; REMAT-NEXT:    add a2, a0, a2
 ; REMAT-NEXT:    vle32.v v26, (a2)
-; REMAT-NEXT:    csrr a3, vlenb
-; REMAT-NEXT:    li a4, 14
-; REMAT-NEXT:    mul a3, a3, a4
-; REMAT-NEXT:    add a3, sp, a3
-; REMAT-NEXT:    addi a3, a3, 432
-; REMAT-NEXT:    vl2r.v v10, (a3) # vscale x 16-byte Folded Reload
+; REMAT-NEXT:    csrr a4, vlenb
+; REMAT-NEXT:    li a6, 14
+; REMAT-NEXT:    mul a4, a4, a6
+; REMAT-NEXT:    add a4, sp, a4
+; REMAT-NEXT:    addi a4, a4, 432
+; REMAT-NEXT:    vl2r.v v10, (a4) # vscale x 16-byte Folded Reload
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v18
 ; REMAT-NEXT:    vle32.v v10, (a2)
 ; REMAT-NEXT:    lui a2, 5
 ; REMAT-NEXT:    addi a2, a2, -1536
 ; REMAT-NEXT:    add a2, a0, a2
 ; REMAT-NEXT:    vle32.v v28, (a2)
-; REMAT-NEXT:    csrr a3, vlenb
-; REMAT-NEXT:    li a4, 12
-; REMAT-NEXT:    mul a3, a3, a4
-; REMAT-NEXT:    add a3, sp, a3
-; REMAT-NEXT:    addi a3, a3, 432
-; REMAT-NEXT:    vl2r.v v12, (a3) # vscale x 16-byte Folded Reload
+; REMAT-NEXT:    csrr a4, vlenb
+; REMAT-NEXT:    li a6, 12
+; REMAT-NEXT:    mul a4, a4, a6
+; REMAT-NEXT:    add a4, sp, a4
+; REMAT-NEXT:    addi a4, a4, 432
+; REMAT-NEXT:    vl2r.v v12, (a4) # vscale x 16-byte Folded Reload
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v30
 ; REMAT-NEXT:    vle32.v v12, (a2)
 ; REMAT-NEXT:    li a2, 19
 ; REMAT-NEXT:    slli a2, a2, 10
 ; REMAT-NEXT:    add a2, a0, a2
 ; REMAT-NEXT:    vle32.v v30, (a2)
-; REMAT-NEXT:    csrr a3, vlenb
-; REMAT-NEXT:    li a4, 10
-; REMAT-NEXT:    mul a3, a3, a4
-; REMAT-NEXT:    add a3, sp, a3
-; REMAT-NEXT:    addi a3, a3, 432
-; REMAT-NEXT:    vl2r.v v14, (a3) # vscale x 16-byte Folded Reload
+; REMAT-NEXT:    csrr a4, vlenb
+; REMAT-NEXT:    li a6, 10
+; REMAT-NEXT:    mul a4, a4, a6
+; REMAT-NEXT:    add a4, sp, a4
+; REMAT-NEXT:    addi a4, a4, 432
+; REMAT-NEXT:    vl2r.v v14, (a4) # vscale x 16-byte Folded Reload
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v14, v6
 ; REMAT-NEXT:    vle32.v v14, (a2)
 ; REMAT-NEXT:    lui a2, 5
 ; REMAT-NEXT:    addi a2, a2, -512
 ; REMAT-NEXT:    add a2, a0, a2
 ; REMAT-NEXT:    vle32.v v6, (a2)
-; REMAT-NEXT:    csrr a3, vlenb
-; REMAT-NEXT:    slli a3, a3, 3
-; REMAT-NEXT:    add a3, sp, a3
-; REMAT-NEXT:    addi a3, a3, 432
-; REMAT-NEXT:    vl2r.v v16, (a3) # vscale x 16-byte Folded Reload
+; REMAT-NEXT:    csrr a4, vlenb
+; REMAT-NEXT:    slli a4, a4, 3
+; REMAT-NEXT:    add a4, sp, a4
+; REMAT-NEXT:    addi a4, a4, 432
+; REMAT-NEXT:    vl2r.v v16, (a4) # vscale x 16-byte Folded Reload
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v16, v4
 ; REMAT-NEXT:    vle32.v v16, (a2)
 ; REMAT-NEXT:    lui a2, 5
 ; REMAT-NEXT:    add a2, a0, a2
 ; REMAT-NEXT:    vle32.v v4, (a2)
-; REMAT-NEXT:    csrr a3, vlenb
-; REMAT-NEXT:    li a4, 6
-; REMAT-NEXT:    mul a3, a3, a4
-; REMAT-NEXT:    add a3, sp, a3
-; REMAT-NEXT:    addi a3, a3, 432
-; REMAT-NEXT:    vl2r.v v18, (a3) # vscale x 16-byte Folded Reload
+; REMAT-NEXT:    csrr a4, vlenb
+; REMAT-NEXT:    li a6, 6
+; REMAT-NEXT:    mul a4, a4, a6
+; REMAT-NEXT:    add a4, sp, a4
+; REMAT-NEXT:    addi a4, a4, 432
+; REMAT-NEXT:    vl2r.v v18, (a4) # vscale x 16-byte Folded Reload
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v18, v2
 ; REMAT-NEXT:    vle32.v v18, (a2)
 ; REMAT-NEXT:    lui a2, 5
@@ -1299,9 +1298,9 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; REMAT-NEXT:    vle32.v v0, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v22, v24
 ; REMAT-NEXT:    vle32.v v22, (a2)
-; REMAT-NEXT:    lui s4, 5
-; REMAT-NEXT:    addi s4, s4, 1536
-; REMAT-NEXT:    add a2, a0, s4
+; REMAT-NEXT:    lui s5, 5
+; REMAT-NEXT:    addi s5, s5, 1536
+; REMAT-NEXT:    add a2, a0, s5
 ; REMAT-NEXT:    vle32.v v24, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v26
 ; REMAT-NEXT:    vle32.v v8, (a2)
@@ -1311,15 +1310,15 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; REMAT-NEXT:    vle32.v v26, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v28
 ; REMAT-NEXT:    vle32.v v10, (a2)
-; REMAT-NEXT:    lui s3, 6
-; REMAT-NEXT:    addi s3, s3, -1536
-; REMAT-NEXT:    add a2, a0, s3
+; REMAT-NEXT:    lui s4, 6
+; REMAT-NEXT:    addi s4, s4, -1536
+; REMAT-NEXT:    add a2, a0, s4
 ; REMAT-NEXT:    vle32.v v28, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v30
 ; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    li s2, 23
-; REMAT-NEXT:    slli s2, s2, 10
-; REMAT-NEXT:    add a2, a0, s2
+; REMAT-NEXT:    li s3, 23
+; REMAT-NEXT:    slli s3, s3, 10
+; REMAT-NEXT:    add a2, a0, s3
 ; REMAT-NEXT:    vle32.v v30, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v14, v6
 ; REMAT-NEXT:    vle32.v v14, (a2)
@@ -1331,13 +1330,13 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; REMAT-NEXT:    vle32.v v16, (a2)
 ; REMAT-NEXT:    lui a2, 6
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    lui s1, 6
+; REMAT-NEXT:    lui s2, 6
 ; REMAT-NEXT:    vle32.v v4, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v18, v2
 ; REMAT-NEXT:    vle32.v v18, (a2)
-; REMAT-NEXT:    lui s0, 6
-; REMAT-NEXT:    addi s0, s0, 512
-; REMAT-NEXT:    add a2, a0, s0
+; REMAT-NEXT:    lui s1, 6
+; REMAT-NEXT:    addi s1, s1, 512
+; REMAT-NEXT:    add a2, a0, s1
 ; REMAT-NEXT:    vle32.v v2, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v20, v0
 ; REMAT-NEXT:    vle32.v v20, (a2)
@@ -1347,15 +1346,15 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; REMAT-NEXT:    vle32.v v0, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v22, v24
 ; REMAT-NEXT:    vle32.v v22, (a2)
-; REMAT-NEXT:    lui t6, 6
-; REMAT-NEXT:    addi t6, t6, 1536
-; REMAT-NEXT:    add a2, a0, t6
+; REMAT-NEXT:    lui s0, 6
+; REMAT-NEXT:    addi s0, s0, 1536
+; REMAT-NEXT:    add a2, a0, s0
 ; REMAT-NEXT:    vle32.v v24, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v26
 ; REMAT-NEXT:    vle32.v v8, (a2)
-; REMAT-NEXT:    li t5, 13
-; REMAT-NEXT:    slli t5, t5, 11
-; REMAT-NEXT:    add a2, a0, t5
+; REMAT-NEXT:    li t6, 13
+; REMAT-NEXT:    slli t6, t6, 11
+; REMAT-NEXT:    add a2, a0, t6
 ; REMAT-NEXT:    vle32.v v26, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v28
 ; REMAT-NEXT:    vle32.v v10, (a2)
@@ -1365,9 +1364,9 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; REMAT-NEXT:    vle32.v v28, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v30
 ; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    li t4, 27
-; REMAT-NEXT:    slli t4, t4, 10
-; REMAT-NEXT:    add a2, a0, t4
+; REMAT-NEXT:    li t5, 27
+; REMAT-NEXT:    slli t5, t5, 10
+; REMAT-NEXT:    add a2, a0, t5
 ; REMAT-NEXT:    vle32.v v30, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v14, v6
 ; REMAT-NEXT:    vle32.v v14, (a2)
@@ -1379,49 +1378,49 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; REMAT-NEXT:    vle32.v v16, (a2)
 ; REMAT-NEXT:    lui a2, 7
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    lui t3, 7
+; REMAT-NEXT:    lui t4, 7
 ; REMAT-NEXT:    vle32.v v4, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v18, v2
 ; REMAT-NEXT:    vle32.v v18, (a2)
-; REMAT-NEXT:    lui t2, 7
-; REMAT-NEXT:    addi t2, t2, 512
-; REMAT-NEXT:    add a2, a0, t2
+; REMAT-NEXT:    lui t3, 7
+; REMAT-NEXT:    addi t3, t3, 512
+; REMAT-NEXT:    add a2, a0, t3
 ; REMAT-NEXT:    vle32.v v2, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v20, v0
 ; REMAT-NEXT:    vle32.v v20, (a2)
-; REMAT-NEXT:    li t1, 29
-; REMAT-NEXT:    slli t1, t1, 10
-; REMAT-NEXT:    add a2, a0, t1
+; REMAT-NEXT:    li t2, 29
+; REMAT-NEXT:    slli t2, t2, 10
+; REMAT-NEXT:    add a2, a0, t2
 ; REMAT-NEXT:    vle32.v v0, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v22, v24
 ; REMAT-NEXT:    vle32.v v22, (a2)
-; REMAT-NEXT:    lui t0, 7
-; REMAT-NEXT:    addi t0, t0, 1536
-; REMAT-NEXT:    add a2, a0, t0
+; REMAT-NEXT:    lui t1, 7
+; REMAT-NEXT:    addi t1, t1, 1536
+; REMAT-NEXT:    add a2, a0, t1
 ; REMAT-NEXT:    vle32.v v24, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v26
 ; REMAT-NEXT:    vle32.v v8, (a2)
-; REMAT-NEXT:    li a7, 15
-; REMAT-NEXT:    slli a7, a7, 11
-; REMAT-NEXT:    add a2, a0, a7
+; REMAT-NEXT:    li t0, 15
+; REMAT-NEXT:    slli t0, t0, 11
+; REMAT-NEXT:    add a2, a0, t0
 ; REMAT-NEXT:    vle32.v v26, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v28
 ; REMAT-NEXT:    vle32.v v10, (a2)
-; REMAT-NEXT:    lui a6, 8
-; REMAT-NEXT:    addi a6, a6, -1536
-; REMAT-NEXT:    add a2, a0, a6
+; REMAT-NEXT:    lui a7, 8
+; REMAT-NEXT:    addi a7, a7, -1536
+; REMAT-NEXT:    add a2, a0, a7
 ; REMAT-NEXT:    vle32.v v28, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v30
 ; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    li a4, 31
-; REMAT-NEXT:    slli a4, a4, 10
-; REMAT-NEXT:    add a2, a0, a4
+; REMAT-NEXT:    li a6, 31
+; REMAT-NEXT:    slli a6, a6, 10
+; REMAT-NEXT:    add a2, a0, a6
 ; REMAT-NEXT:    vle32.v v30, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v14, v6
 ; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    lui a3, 8
-; REMAT-NEXT:    addi a3, a3, -512
-; REMAT-NEXT:    add a2, a0, a3
+; REMAT-NEXT:    lui a4, 8
+; REMAT-NEXT:    addi a4, a4, -512
+; REMAT-NEXT:    add a2, a0, a4
 ; REMAT-NEXT:    vle32.v v6, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v16, v4
 ; REMAT-NEXT:    vle32.v v16, (a2)
@@ -1493,8 +1492,10 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
 ; REMAT-NEXT:    sd a0, 312(sp) # 8-byte Folded Spill
-; REMAT-NEXT:    add s5, a1, s5
-; REMAT-NEXT:    sd s5, 304(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    li a0, 9
+; REMAT-NEXT:    slli a0, a0, 10
+; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    sd a0, 304(sp) # 8-byte Folded Spill
 ; REMAT-NEXT:    add s6, a1, s6
 ; REMAT-NEXT:    sd s6, 296(sp) # 8-byte Folded Spill
 ; REMAT-NEXT:    li a0, 5
@@ -1525,10 +1526,8 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
 ; REMAT-NEXT:    sd a0, 216(sp) # 8-byte Folded Spill
-; REMAT-NEXT:    li a0, 15
-; REMAT-NEXT:    slli a0, a0, 10
-; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sd a0, 208(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    add a3, a1, a3
+; REMAT-NEXT:    sd a3, 208(sp) # 8-byte Folded Spill
 ; REMAT-NEXT:    li a0, 31
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
@@ -1573,49 +1572,49 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; REMAT-NEXT:    sd a0, 120(sp) # 8-byte Folded Spill
 ; REMAT-NEXT:    add s7, a1, s7
 ; REMAT-NEXT:    sd s7, 112(sp) # 8-byte Folded Spill
-; REMAT-NEXT:    add s4, a1, s4
-; REMAT-NEXT:    sd s4, 104(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    add s5, a1, s5
+; REMAT-NEXT:    sd s5, 104(sp) # 8-byte Folded Spill
 ; REMAT-NEXT:    li a0, 11
 ; REMAT-NEXT:    slli a0, a0, 11
 ; REMAT-NEXT:    add a0, a1, a0
 ; REMAT-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    add s4, a1, s4
+; REMAT-NEXT:    sd s4, 88(sp) # 8-byte Folded Spill
 ; REMAT-NEXT:    add s3, a1, s3
-; REMAT-NEXT:    sd s3, 88(sp) # 8-byte Folded Spill
-; REMAT-NEXT:    add s2, a1, s2
-; REMAT-NEXT:    sd s2, 80(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    sd s3, 80(sp) # 8-byte Folded Spill
 ; REMAT-NEXT:    lui a0, 6
 ; REMAT-NEXT:    addi a0, a0, -512
 ; REMAT-NEXT:    add a0, a1, a0
 ; REMAT-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    add s2, a1, s2
+; REMAT-NEXT:    sd s2, 64(sp) # 8-byte Folded Spill
 ; REMAT-NEXT:    add s1, a1, s1
-; REMAT-NEXT:    sd s1, 64(sp) # 8-byte Folded Spill
-; REMAT-NEXT:    add s0, a1, s0
-; REMAT-NEXT:    sd s0, 56(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    sd s1, 56(sp) # 8-byte Folded Spill
 ; REMAT-NEXT:    li a0, 25
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
 ; REMAT-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    add s0, a1, s0
+; REMAT-NEXT:    sd s0, 40(sp) # 8-byte Folded Spill
 ; REMAT-NEXT:    add t6, a1, t6
-; REMAT-NEXT:    sd t6, 40(sp) # 8-byte Folded Spill
-; REMAT-NEXT:    add t5, a1, t5
-; REMAT-NEXT:    sd t5, 32(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    sd t6, 32(sp) # 8-byte Folded Spill
 ; REMAT-NEXT:    lui a0, 7
 ; REMAT-NEXT:    addi a0, a0, -1536
 ; REMAT-NEXT:    add a0, a1, a0
 ; REMAT-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; REMAT-NEXT:    add t4, a1, t4
-; REMAT-NEXT:    sd t4, 16(sp) # 8-byte Folded Spill
+; REMAT-NEXT:    add t5, a1, t5
+; REMAT-NEXT:    sd t5, 16(sp) # 8-byte Folded Spill
 ; REMAT-NEXT:    lui ra, 7
 ; REMAT-NEXT:    addi ra, ra, -512
 ; REMAT-NEXT:    add ra, a1, ra
-; REMAT-NEXT:    add s11, a1, t3
-; REMAT-NEXT:    add s10, a1, t2
-; REMAT-NEXT:    add s9, a1, t1
-; REMAT-NEXT:    add s8, a1, t0
-; REMAT-NEXT:    add s7, a1, a7
-; REMAT-NEXT:    add s6, a1, a6
-; REMAT-NEXT:    add s5, a1, a4
-; REMAT-NEXT:    add s4, a1, a3
+; REMAT-NEXT:    add s11, a1, t4
+; REMAT-NEXT:    add s10, a1, t3
+; REMAT-NEXT:    add s9, a1, t2
+; REMAT-NEXT:    add s8, a1, t1
+; REMAT-NEXT:    add s7, a1, t0
+; REMAT-NEXT:    add s6, a1, a7
+; REMAT-NEXT:    add s5, a1, a6
+; REMAT-NEXT:    add s4, a1, a4
 ; REMAT-NEXT:    add s3, a1, a2
 ; REMAT-NEXT:    lui s2, 8
 ; REMAT-NEXT:    addi s2, s2, 512
diff --git a/llvm/test/CodeGen/RISCV/rvv/nontemporal-vp-scalable.ll b/llvm/test/CodeGen/RISCV/rvv/nontemporal-vp-scalable.ll
index 1ee7e138654b9..61bf01ddc6e7b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/nontemporal-vp-scalable.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/nontemporal-vp-scalable.ll
@@ -34800,33 +34800,33 @@ define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_P1(<vscale x 64 x
 ; CHECK-RV64VC-NEXT:    li a1, 40
 ; CHECK-RV64VC-NEXT:    mv a0, s1
 ; CHECK-RV64VC-NEXT:    call __muldi3
-; CHECK-RV64VC-NEXT:    slli a7, s1, 2
-; CHECK-RV64VC-NEXT:    sub a1, s0, a7
+; CHECK-RV64VC-NEXT:    slli a6, s1, 2
+; CHECK-RV64VC-NEXT:    sub a1, s0, a6
 ; CHECK-RV64VC-NEXT:    sltu a2, s0, a1
 ; CHECK-RV64VC-NEXT:    addi a2, a2, -1
 ; CHECK-RV64VC-NEXT:    and a3, a2, a1
-; CHECK-RV64VC-NEXT:    slli a1, s1, 1
-; CHECK-RV64VC-NEXT:    sub a2, a3, a1
-; CHECK-RV64VC-NEXT:    sltu a4, a3, a2
-; CHECK-RV64VC-NEXT:    addi a4, a4, -1
-; CHECK-RV64VC-NEXT:    and a2, a2, a4
-; CHECK-RV64VC-NEXT:    sub t0, a2, s1
-; CHECK-RV64VC-NEXT:    mv a5, a2
-; CHECK-RV64VC-NEXT:    bltu a2, s1, .LBB910_2
+; CHECK-RV64VC-NEXT:    slli a7, s1, 1
+; CHECK-RV64VC-NEXT:    sub a1, a3, a7
+; CHECK-RV64VC-NEXT:    sltu a2, a3, a1
+; CHECK-RV64VC-NEXT:    addi a2, a2, -1
+; CHECK-RV64VC-NEXT:    and a1, a1, a2
+; CHECK-RV64VC-NEXT:    sub a4, a1, s1
+; CHECK-RV64VC-NEXT:    mv a5, a1
+; CHECK-RV64VC-NEXT:    bltu a1, s1, .LBB910_2
 ; CHECK-RV64VC-NEXT:  # %bb.1:
 ; CHECK-RV64VC-NEXT:    mv a5, s1
 ; CHECK-RV64VC-NEXT:  .LBB910_2:
-; CHECK-RV64VC-NEXT:    sltu a6, a2, t0
-; CHECK-RV64VC-NEXT:    bltu a3, a1, .LBB910_4
+; CHECK-RV64VC-NEXT:    sltu a2, a1, a4
+; CHECK-RV64VC-NEXT:    bltu a3, a7, .LBB910_4
 ; CHECK-RV64VC-NEXT:  # %bb.3:
-; CHECK-RV64VC-NEXT:    mv a3, a1
+; CHECK-RV64VC-NEXT:    mv a3, a7
 ; CHECK-RV64VC-NEXT:  .LBB910_4:
 ; CHECK-RV64VC-NEXT:    add a0, a0, s2
-; CHECK-RV64VC-NEXT:    addi a6, a6, -1
-; CHECK-RV64VC-NEXT:    sub a2, a3, s1
-; CHECK-RV64VC-NEXT:    sltu a4, a3, a2
-; CHECK-RV64VC-NEXT:    addi a4, a4, -1
-; CHECK-RV64VC-NEXT:    and a2, a2, a4
+; CHECK-RV64VC-NEXT:    addi t0, a2, -1
+; CHECK-RV64VC-NEXT:    sub a1, a3, s1
+; CHECK-RV64VC-NEXT:    sltu a2, a3, a1
+; CHECK-RV64VC-NEXT:    addi a2, a2, -1
+; CHECK-RV64VC-NEXT:    and a1, a1, a2
 ; CHECK-RV64VC-NEXT:    bltu a3, s1, .LBB910_6
 ; CHECK-RV64VC-NEXT:  # %bb.5:
 ; CHECK-RV64VC-NEXT:    mv a3, s1
@@ -34842,7 +34842,7 @@ define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_P1(<vscale x 64 x
 ; CHECK-RV64VC-NEXT:    add a0, a0, sp
 ; CHECK-RV64VC-NEXT:    addi a0, a0, 16
 ; CHECK-RV64VC-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; CHECK-RV64VC-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.p1
 ; CHECK-RV64VC-NEXT:    vluxei64.v v13, (zero), v24
 ; CHECK-RV64VC-NEXT:    csrr a0, vlenb
@@ -34853,44 +34853,44 @@ define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_P1(<vscale x 64 x
 ; CHECK-RV64VC-NEXT:    vsetvli zero, a3, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.p1
 ; CHECK-RV64VC-NEXT:    vluxei64.v v12, (zero), v24
-; CHECK-RV64VC-NEXT:    and a0, a6, t0
-; CHECK-RV64VC-NEXT:    bltu s0, a7, .LBB910_8
+; CHECK-RV64VC-NEXT:    and a2, t0, a4
+; CHECK-RV64VC-NEXT:    bltu s0, a6, .LBB910_8
 ; CHECK-RV64VC-NEXT:  # %bb.7:
-; CHECK-RV64VC-NEXT:    mv s0, a7
+; CHECK-RV64VC-NEXT:    mv s0, a6
 ; CHECK-RV64VC-NEXT:  .LBB910_8:
-; CHECK-RV64VC-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.p1
 ; CHECK-RV64VC-NEXT:    vluxei64.v v15, (zero), v16
 ; CHECK-RV64VC-NEXT:    vl8re64.v v16, (s2)
-; CHECK-RV64VC-NEXT:    sub a0, s0, a1
-; CHECK-RV64VC-NEXT:    sltu a2, s0, a0
+; CHECK-RV64VC-NEXT:    sub a0, s0, a7
+; CHECK-RV64VC-NEXT:    sltu a1, s0, a0
+; CHECK-RV64VC-NEXT:    addi a1, a1, -1
+; CHECK-RV64VC-NEXT:    and a0, a0, a1
+; CHECK-RV64VC-NEXT:    sub a1, a0, s1
+; CHECK-RV64VC-NEXT:    sltu a2, a0, a1
 ; CHECK-RV64VC-NEXT:    addi a2, a2, -1
-; CHECK-RV64VC-NEXT:    and a0, a0, a2
-; CHECK-RV64VC-NEXT:    sub a2, a0, s1
-; CHECK-RV64VC-NEXT:    sltu a3, a0, a2
-; CHECK-RV64VC-NEXT:    addi a3, a3, -1
-; CHECK-RV64VC-NEXT:    and a2, a2, a3
+; CHECK-RV64VC-NEXT:    and a1, a1, a2
 ; CHECK-RV64VC-NEXT:    bltu a0, s1, .LBB910_10
 ; CHECK-RV64VC-NEXT:  # %bb.9:
 ; CHECK-RV64VC-NEXT:    mv a0, s1
 ; CHECK-RV64VC-NEXT:  .LBB910_10:
-; CHECK-RV64VC-NEXT:    csrr a3, vlenb
-; CHECK-RV64VC-NEXT:    slli a3, a3, 3
-; CHECK-RV64VC-NEXT:    mv a4, a3
-; CHECK-RV64VC-NEXT:    slli a3, a3, 1
-; CHECK-RV64VC-NEXT:    add a3, a3, a4
-; CHECK-RV64VC-NEXT:    add a3, a3, sp
-; CHECK-RV64VC-NEXT:    addi a3, a3, 16
-; CHECK-RV64VC-NEXT:    vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
-; CHECK-RV64VC-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT:    csrr a2, vlenb
+; CHECK-RV64VC-NEXT:    slli a2, a2, 3
+; CHECK-RV64VC-NEXT:    mv a3, a2
+; CHECK-RV64VC-NEXT:    slli a2, a2, 1
+; CHECK-RV64VC-NEXT:    add a2, a2, a3
+; CHECK-RV64VC-NEXT:    add a2, a2, sp
+; CHECK-RV64VC-NEXT:    addi a2, a2, 16
+; CHECK-RV64VC-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.p1
 ; CHECK-RV64VC-NEXT:    vluxei64.v v11, (zero), v24
 ; CHECK-RV64VC-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.p1
 ; CHECK-RV64VC-NEXT:    vluxei64.v v10, (zero), v16
-; CHECK-RV64VC-NEXT:    bltu s0, a1, .LBB910_12
+; CHECK-RV64VC-NEXT:    bltu s0, a7, .LBB910_12
 ; CHECK-RV64VC-NEXT:  # %bb.11:
-; CHECK-RV64VC-NEXT:    mv s0, a1
+; CHECK-RV64VC-NEXT:    mv s0, a7
 ; CHECK-RV64VC-NEXT:  .LBB910_12:
 ; CHECK-RV64VC-NEXT:    sub a0, s0, s1
 ; CHECK-RV64VC-NEXT:    sltu a1, s0, a0
@@ -35353,33 +35353,33 @@ define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_PALL(<vscale x 64
 ; CHECK-RV64VC-NEXT:    li a1, 40
 ; CHECK-RV64VC-NEXT:    mv a0, s1
 ; CHECK-RV64VC-NEXT:    call __muldi3
-; CHECK-RV64VC-NEXT:    slli a7, s1, 2
-; CHECK-RV64VC-NEXT:    sub a1, s0, a7
+; CHECK-RV64VC-NEXT:    slli a6, s1, 2
+; CHECK-RV64VC-NEXT:    sub a1, s0, a6
 ; CHECK-RV64VC-NEXT:    sltu a2, s0, a1
 ; CHECK-RV64VC-NEXT:    addi a2, a2, -1
 ; CHECK-RV64VC-NEXT:    and a3, a2, a1
-; CHECK-RV64VC-NEXT:    slli a1, s1, 1
-; CHECK-RV64VC-NEXT:    sub a2, a3, a1
-; CHECK-RV64VC-NEXT:    sltu a4, a3, a2
-; CHECK-RV64VC-NEXT:    addi a4, a4, -1
-; CHECK-RV64VC-NEXT:    and a2, a2, a4
-; CHECK-RV64VC-NEXT:    sub t0, a2, s1
-; CHECK-RV64VC-NEXT:    mv a5, a2
-; CHECK-RV64VC-NEXT:    bltu a2, s1, .LBB911_2
+; CHECK-RV64VC-NEXT:    slli a7, s1, 1
+; CHECK-RV64VC-NEXT:    sub a1, a3, a7
+; CHECK-RV64VC-NEXT:    sltu a2, a3, a1
+; CHECK-RV64VC-NEXT:    addi a2, a2, -1
+; CHECK-RV64VC-NEXT:    and a1, a1, a2
+; CHECK-RV64VC-NEXT:    sub a4, a1, s1
+; CHECK-RV64VC-NEXT:    mv a5, a1
+; CHECK-RV64VC-NEXT:    bltu a1, s1, .LBB911_2
 ; CHECK-RV64VC-NEXT:  # %bb.1:
 ; CHECK-RV64VC-NEXT:    mv a5, s1
 ; CHECK-RV64VC-NEXT:  .LBB911_2:
-; CHECK-RV64VC-NEXT:    sltu a6, a2, t0
-; CHECK-RV64VC-NEXT:    bltu a3, a1, .LBB911_4
+; CHECK-RV64VC-NEXT:    sltu a2, a1, a4
+; CHECK-RV64VC-NEXT:    bltu a3, a7, .LBB911_4
 ; CHECK-RV64VC-NEXT:  # %bb.3:
-; CHECK-RV64VC-NEXT:    mv a3, a1
+; CHECK-RV64VC-NEXT:    mv a3, a7
 ; CHECK-RV64VC-NEXT:  .LBB911_4:
 ; CHECK-RV64VC-NEXT:    add a0, a0, s2
-; CHECK-RV64VC-NEXT:    addi a6, a6, -1
-; CHECK-RV64VC-NEXT:    sub a2, a3, s1
-; CHECK-RV64VC-NEXT:    sltu a4, a3, a2
-; CHECK-RV64VC-NEXT:    addi a4, a4, -1
-; CHECK-RV64VC-NEXT:    and a2, a2, a4
+; CHECK-RV64VC-NEXT:    addi t0, a2, -1
+; CHECK-RV64VC-NEXT:    sub a1, a3, s1
+; CHECK-RV64VC-NEXT:    sltu a2, a3, a1
+; CHECK-RV64VC-NEXT:    addi a2, a2, -1
+; CHECK-RV64VC-NEXT:    and a1, a1, a2
 ; CHECK-RV64VC-NEXT:    bltu a3, s1, .LBB911_6
 ; CHECK-RV64VC-NEXT:  # %bb.5:
 ; CHECK-RV64VC-NEXT:    mv a3, s1
@@ -35395,7 +35395,7 @@ define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_PALL(<vscale x 64
 ; CHECK-RV64VC-NEXT:    add a0, a0, sp
 ; CHECK-RV64VC-NEXT:    addi a0, a0, 16
 ; CHECK-RV64VC-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; CHECK-RV64VC-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.pall
 ; CHECK-RV64VC-NEXT:    vluxei64.v v13, (zero), v24
 ; CHECK-RV64VC-NEXT:    csrr a0, vlenb
@@ -35406,44 +35406,44 @@ define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_PALL(<vscale x 64
 ; CHECK-RV64VC-NEXT:    vsetvli zero, a3, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.pall
 ; CHECK-RV64VC-NEXT:    vluxei64.v v12, (zero), v24
-; CHECK-RV64VC-NEXT:    and a0, a6, t0
-; CHECK-RV64VC-NEXT:    bltu s0, a7, .LBB911_8
+; CHECK-RV64VC-NEXT:    and a2, t0, a4
+; CHECK-RV64VC-NEXT:    bltu s0, a6, .LBB911_8
 ; CHECK-RV64VC-NEXT:  # %bb.7:
-; CHECK-RV64VC-NEXT:    mv s0, a7
+; CHECK-RV64VC-NEXT:    mv s0, a6
 ; CHECK-RV64VC-NEXT:  .LBB911_8:
-; CHECK-RV64VC-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.pall
 ; CHECK-RV64VC-NEXT:    vluxei64.v v15, (zero), v16
 ; CHECK-RV64VC-NEXT:    vl8re64.v v16, (s2)
-; CHECK-RV64VC-NEXT:    sub a0, s0, a1
-; CHECK-RV64VC-NEXT:    sltu a2, s0, a0
+; CHECK-RV64VC-NEXT:    sub a0, s0, a7
+; CHECK-RV64VC-NEXT:    sltu a1, s0, a0
+; CHECK-RV64VC-NEXT:    addi a1, a1, -1
+; CHECK-RV64VC-NEXT:    and a0, a0, a1
+; CHECK-RV64VC-NEXT:    sub a1, a0, s1
+; CHECK-RV64VC-NEXT:    sltu a2, a0, a1
 ; CHECK-RV64VC-NEXT:    addi a2, a2, -1
-; CHECK-RV64VC-NEXT:    and a0, a0, a2
-; CHECK-RV64VC-NEXT:    sub a2, a0, s1
-; CHECK-RV64VC-NEXT:    sltu a3, a0, a2
-; CHECK-RV64VC-NEXT:    addi a3, a3, -1
-; CHECK-RV64VC-NEXT:    and a2, a2, a3
+; CHECK-RV64VC-NEXT:    and a1, a1, a2
 ; CHECK-RV64VC-NEXT:    bltu a0, s1, .LBB911_10
 ; CHECK-RV64VC-NEXT:  # %bb.9:
 ; CHECK-RV64VC-NEXT:    mv a0, s1
 ; CHECK-RV64VC-NEXT:  .LBB911_10:
-; CHECK-RV64VC-NEXT:    csrr a3, vlenb
-; CHECK-RV64VC-NEXT:    slli a3, a3, 3
-; CHECK-RV64VC-NEXT:    mv a4, a3
-; CHECK-RV64VC-NEXT:    slli a3, a3, 1
-; CHECK-RV64VC-NEXT:    add a3, a3, a4
-; CHECK-RV64VC-NEXT:    add a3, a3, sp
-; CHECK-RV64VC-NEXT:    addi a3, a3, 16
-; CHECK-RV64VC-NEXT:    vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
-; CHECK-RV64VC-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT:    csrr a2, vlenb
+; CHECK-RV64VC-NEXT:    slli a2, a2, 3
+; CHECK-RV64VC-NEXT:    mv a3, a2
+; CHECK-RV64VC-NEXT:    slli a2, a2, 1
+; CHECK-RV64VC-NEXT:    add a2, a2, a3
+; CHECK-RV64VC-NEXT:    add a2, a2, sp
+; CHECK-RV64VC-NEXT:    addi a2, a2, 16
+; CHECK-RV64VC-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.pall
 ; CHECK-RV64VC-NEXT:    vluxei64.v v11, (zero), v24
 ; CHECK-RV64VC-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.pall
 ; CHECK-RV64VC-NEXT:    vluxei64.v v10, (zero), v16
-; CHECK-RV64VC-NEXT:    bltu s0, a1, .LBB911_12
+; CHECK-RV64VC-NEXT:    bltu s0, a7, .LBB911_12
 ; CHECK-RV64VC-NEXT:  # %bb.11:
-; CHECK-RV64VC-NEXT:    mv s0, a1
+; CHECK-RV64VC-NEXT:    mv s0, a7
 ; CHECK-RV64VC-NEXT:  .LBB911_12:
 ; CHECK-RV64VC-NEXT:    sub a0, s0, s1
 ; CHECK-RV64VC-NEXT:    sltu a1, s0, a0
@@ -35906,33 +35906,33 @@ define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_S1(<vscale x 64 x
 ; CHECK-RV64VC-NEXT:    li a1, 40
 ; CHECK-RV64VC-NEXT:    mv a0, s1
 ; CHECK-RV64VC-NEXT:    call __muldi3
-; CHECK-RV64VC-NEXT:    slli a7, s1, 2
-; CHECK-RV64VC-NEXT:    sub a1, s0, a7
+; CHECK-RV64VC-NEXT:    slli a6, s1, 2
+; CHECK-RV64VC-NEXT:    sub a1, s0, a6
 ; CHECK-RV64VC-NEXT:    sltu a2, s0, a1
 ; CHECK-RV64VC-NEXT:    addi a2, a2, -1
 ; CHECK-RV64VC-NEXT:    and a3, a2, a1
-; CHECK-RV64VC-NEXT:    slli a1, s1, 1
-; CHECK-RV64VC-NEXT:    sub a2, a3, a1
-; CHECK-RV64VC-NEXT:    sltu a4, a3, a2
-; CHECK-RV64VC-NEXT:    addi a4, a4, -1
-; CHECK-RV64VC-NEXT:    and a2, a2, a4
-; CHECK-RV64VC-NEXT:    sub t0, a2, s1
-; CHECK-RV64VC-NEXT:    mv a5, a2
-; CHECK-RV64VC-NEXT:    bltu a2, s1, .LBB912_2
+; CHECK-RV64VC-NEXT:    slli a7, s1, 1
+; CHECK-RV64VC-NEXT:    sub a1, a3, a7
+; CHECK-RV64VC-NEXT:    sltu a2, a3, a1
+; CHECK-RV64VC-NEXT:    addi a2, a2, -1
+; CHECK-RV64VC-NEXT:    and a1, a1, a2
+; CHECK-RV64VC-NEXT:    sub a4, a1, s1
+; CHECK-RV64VC-NEXT:    mv a5, a1
+; CHECK-RV64VC-NEXT:    bltu a1, s1, .LBB912_2
 ; CHECK-RV64VC-NEXT:  # %bb.1:
 ; CHECK-RV64VC-NEXT:    mv a5, s1
 ; CHECK-RV64VC-NEXT:  .LBB912_2:
-; CHECK-RV64VC-NEXT:    sltu a6, a2, t0
-; CHECK-RV64VC-NEXT:    bltu a3, a1, .LBB912_4
+; CHECK-RV64VC-NEXT:    sltu a2, a1, a4
+; CHECK-RV64VC-NEXT:    bltu a3, a7, .LBB912_4
 ; CHECK-RV64VC-NEXT:  # %bb.3:
-; CHECK-RV64VC-NEXT:    mv a3, a1
+; CHECK-RV64VC-NEXT:    mv a3, a7
 ; CHECK-RV64VC-NEXT:  .LBB912_4:
 ; CHECK-RV64VC-NEXT:    add a0, a0, s2
-; CHECK-RV64VC-NEXT:    addi a6, a6, -1
-; CHECK-RV64VC-NEXT:    sub a2, a3, s1
-; CHECK-RV64VC-NEXT:    sltu a4, a3, a2
-; CHECK-RV64VC-NEXT:    addi a4, a4, -1
-; CHECK-RV64VC-NEXT:    and a2, a2, a4
+; CHECK-RV64VC-NEXT:    addi t0, a2, -1
+; CHECK-RV64VC-NEXT:    sub a1, a3, s1
+; CHECK-RV64VC-NEXT:    sltu a2, a3, a1
+; CHECK-RV64VC-NEXT:    addi a2, a2, -1
+; CHECK-RV64VC-NEXT:    and a1, a1, a2
 ; CHECK-RV64VC-NEXT:    bltu a3, s1, .LBB912_6
 ; CHECK-RV64VC-NEXT:  # %bb.5:
 ; CHECK-RV64VC-NEXT:    mv a3, s1
@@ -35948,7 +35948,7 @@ define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_S1(<vscale x 64 x
 ; CHECK-RV64VC-NEXT:    add a0, a0, sp
 ; CHECK-RV64VC-NEXT:    addi a0, a0, 16
 ; CHECK-RV64VC-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; CHECK-RV64VC-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.s1
 ; CHECK-RV64VC-NEXT:    vluxei64.v v13, (zero), v24
 ; CHECK-RV64VC-NEXT:    csrr a0, vlenb
@@ -35959,44 +35959,44 @@ define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_S1(<vscale x 64 x
 ; CHECK-RV64VC-NEXT:    vsetvli zero, a3, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.s1
 ; CHECK-RV64VC-NEXT:    vluxei64.v v12, (zero), v24
-; CHECK-RV64VC-NEXT:    and a0, a6, t0
-; CHECK-RV64VC-NEXT:    bltu s0, a7, .LBB912_8
+; CHECK-RV64VC-NEXT:    and a2, t0, a4
+; CHECK-RV64VC-NEXT:    bltu s0, a6, .LBB912_8
 ; CHECK-RV64VC-NEXT:  # %bb.7:
-; CHECK-RV64VC-NEXT:    mv s0, a7
+; CHECK-RV64VC-NEXT:    mv s0, a6
 ; CHECK-RV64VC-NEXT:  .LBB912_8:
-; CHECK-RV64VC-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.s1
 ; CHECK-RV64VC-NEXT:    vluxei64.v v15, (zero), v16
 ; CHECK-RV64VC-NEXT:    vl8re64.v v16, (s2)
-; CHECK-RV64VC-NEXT:    sub a0, s0, a1
-; CHECK-RV64VC-NEXT:    sltu a2, s0, a0
+; CHECK-RV64VC-NEXT:    sub a0, s0, a7
+; CHECK-RV64VC-NEXT:    sltu a1, s0, a0
+; CHECK-RV64VC-NEXT:    addi a1, a1, -1
+; CHECK-RV64VC-NEXT:    and a0, a0, a1
+; CHECK-RV64VC-NEXT:    sub a1, a0, s1
+; CHECK-RV64VC-NEXT:    sltu a2, a0, a1
 ; CHECK-RV64VC-NEXT:    addi a2, a2, -1
-; CHECK-RV64VC-NEXT:    and a0, a0, a2
-; CHECK-RV64VC-NEXT:    sub a2, a0, s1
-; CHECK-RV64VC-NEXT:    sltu a3, a0, a2
-; CHECK-RV64VC-NEXT:    addi a3, a3, -1
-; CHECK-RV64VC-NEXT:    and a2, a2, a3
+; CHECK-RV64VC-NEXT:    and a1, a1, a2
 ; CHECK-RV64VC-NEXT:    bltu a0, s1, .LBB912_10
 ; CHECK-RV64VC-NEXT:  # %bb.9:
 ; CHECK-RV64VC-NEXT:    mv a0, s1
 ; CHECK-RV64VC-NEXT:  .LBB912_10:
-; CHECK-RV64VC-NEXT:    csrr a3, vlenb
-; CHECK-RV64VC-NEXT:    slli a3, a3, 3
-; CHECK-RV64VC-NEXT:    mv a4, a3
-; CHECK-RV64VC-NEXT:    slli a3, a3, 1
-; CHECK-RV64VC-NEXT:    add a3, a3, a4
-; CHECK-RV64VC-NEXT:    add a3, a3, sp
-; CHECK-RV64VC-NEXT:    addi a3, a3, 16
-; CHECK-RV64VC-NEXT:    vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
-; CHECK-RV64VC-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT:    csrr a2, vlenb
+; CHECK-RV64VC-NEXT:    slli a2, a2, 3
+; CHECK-RV64VC-NEXT:    mv a3, a2
+; CHECK-RV64VC-NEXT:    slli a2, a2, 1
+; CHECK-RV64VC-NEXT:    add a2, a2, a3
+; CHECK-RV64VC-NEXT:    add a2, a2, sp
+; CHECK-RV64VC-NEXT:    addi a2, a2, 16
+; CHECK-RV64VC-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.s1
 ; CHECK-RV64VC-NEXT:    vluxei64.v v11, (zero), v24
 ; CHECK-RV64VC-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.s1
 ; CHECK-RV64VC-NEXT:    vluxei64.v v10, (zero), v16
-; CHECK-RV64VC-NEXT:    bltu s0, a1, .LBB912_12
+; CHECK-RV64VC-NEXT:    bltu s0, a7, .LBB912_12
 ; CHECK-RV64VC-NEXT:  # %bb.11:
-; CHECK-RV64VC-NEXT:    mv s0, a1
+; CHECK-RV64VC-NEXT:    mv s0, a7
 ; CHECK-RV64VC-NEXT:  .LBB912_12:
 ; CHECK-RV64VC-NEXT:    sub a0, s0, s1
 ; CHECK-RV64VC-NEXT:    sltu a1, s0, a0
@@ -36459,33 +36459,33 @@ define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_ALL(<vscale x 64 x
 ; CHECK-RV64VC-NEXT:    li a1, 40
 ; CHECK-RV64VC-NEXT:    mv a0, s1
 ; CHECK-RV64VC-NEXT:    call __muldi3
-; CHECK-RV64VC-NEXT:    slli a7, s1, 2
-; CHECK-RV64VC-NEXT:    sub a1, s0, a7
+; CHECK-RV64VC-NEXT:    slli a6, s1, 2
+; CHECK-RV64VC-NEXT:    sub a1, s0, a6
 ; CHECK-RV64VC-NEXT:    sltu a2, s0, a1
 ; CHECK-RV64VC-NEXT:    addi a2, a2, -1
 ; CHECK-RV64VC-NEXT:    and a3, a2, a1
-; CHECK-RV64VC-NEXT:    slli a1, s1, 1
-; CHECK-RV64VC-NEXT:    sub a2, a3, a1
-; CHECK-RV64VC-NEXT:    sltu a4, a3, a2
-; CHECK-RV64VC-NEXT:    addi a4, a4, -1
-; CHECK-RV64VC-NEXT:    and a2, a2, a4
-; CHECK-RV64VC-NEXT:    sub t0, a2, s1
-; CHECK-RV64VC-NEXT:    mv a5, a2
-; CHECK-RV64VC-NEXT:    bltu a2, s1, .LBB913_2
+; CHECK-RV64VC-NEXT:    slli a7, s1, 1
+; CHECK-RV64VC-NEXT:    sub a1, a3, a7
+; CHECK-RV64VC-NEXT:    sltu a2, a3, a1
+; CHECK-RV64VC-NEXT:    addi a2, a2, -1
+; CHECK-RV64VC-NEXT:    and a1, a1, a2
+; CHECK-RV64VC-NEXT:    sub a4, a1, s1
+; CHECK-RV64VC-NEXT:    mv a5, a1
+; CHECK-RV64VC-NEXT:    bltu a1, s1, .LBB913_2
 ; CHECK-RV64VC-NEXT:  # %bb.1:
 ; CHECK-RV64VC-NEXT:    mv a5, s1
 ; CHECK-RV64VC-NEXT:  .LBB913_2:
-; CHECK-RV64VC-NEXT:    sltu a6, a2, t0
-; CHECK-RV64VC-NEXT:    bltu a3, a1, .LBB913_4
+; CHECK-RV64VC-NEXT:    sltu a2, a1, a4
+; CHECK-RV64VC-NEXT:    bltu a3, a7, .LBB913_4
 ; CHECK-RV64VC-NEXT:  # %bb.3:
-; CHECK-RV64VC-NEXT:    mv a3, a1
+; CHECK-RV64VC-NEXT:    mv a3, a7
 ; CHECK-RV64VC-NEXT:  .LBB913_4:
 ; CHECK-RV64VC-NEXT:    add a0, a0, s2
-; CHECK-RV64VC-NEXT:    addi a6, a6, -1
-; CHECK-RV64VC-NEXT:    sub a2, a3, s1
-; CHECK-RV64VC-NEXT:    sltu a4, a3, a2
-; CHECK-RV64VC-NEXT:    addi a4, a4, -1
-; CHECK-RV64VC-NEXT:    and a2, a2, a4
+; CHECK-RV64VC-NEXT:    addi t0, a2, -1
+; CHECK-RV64VC-NEXT:    sub a1, a3, s1
+; CHECK-RV64VC-NEXT:    sltu a2, a3, a1
+; CHECK-RV64VC-NEXT:    addi a2, a2, -1
+; CHECK-RV64VC-NEXT:    and a1, a1, a2
 ; CHECK-RV64VC-NEXT:    bltu a3, s1, .LBB913_6
 ; CHECK-RV64VC-NEXT:  # %bb.5:
 ; CHECK-RV64VC-NEXT:    mv a3, s1
@@ -36501,7 +36501,7 @@ define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_ALL(<vscale x 64 x
 ; CHECK-RV64VC-NEXT:    add a0, a0, sp
 ; CHECK-RV64VC-NEXT:    addi a0, a0, 16
 ; CHECK-RV64VC-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; CHECK-RV64VC-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.all
 ; CHECK-RV64VC-NEXT:    vluxei64.v v13, (zero), v24
 ; CHECK-RV64VC-NEXT:    csrr a0, vlenb
@@ -36512,44 +36512,44 @@ define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_ALL(<vscale x 64 x
 ; CHECK-RV64VC-NEXT:    vsetvli zero, a3, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.all
 ; CHECK-RV64VC-NEXT:    vluxei64.v v12, (zero), v24
-; CHECK-RV64VC-NEXT:    and a0, a6, t0
-; CHECK-RV64VC-NEXT:    bltu s0, a7, .LBB913_8
+; CHECK-RV64VC-NEXT:    and a2, t0, a4
+; CHECK-RV64VC-NEXT:    bltu s0, a6, .LBB913_8
 ; CHECK-RV64VC-NEXT:  # %bb.7:
-; CHECK-RV64VC-NEXT:    mv s0, a7
+; CHECK-RV64VC-NEXT:    mv s0, a6
 ; CHECK-RV64VC-NEXT:  .LBB913_8:
-; CHECK-RV64VC-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.all
 ; CHECK-RV64VC-NEXT:    vluxei64.v v15, (zero), v16
 ; CHECK-RV64VC-NEXT:    vl8re64.v v16, (s2)
-; CHECK-RV64VC-NEXT:    sub a0, s0, a1
-; CHECK-RV64VC-NEXT:    sltu a2, s0, a0
+; CHECK-RV64VC-NEXT:    sub a0, s0, a7
+; CHECK-RV64VC-NEXT:    sltu a1, s0, a0
+; CHECK-RV64VC-NEXT:    addi a1, a1, -1
+; CHECK-RV64VC-NEXT:    and a0, a0, a1
+; CHECK-RV64VC-NEXT:    sub a1, a0, s1
+; CHECK-RV64VC-NEXT:    sltu a2, a0, a1
 ; CHECK-RV64VC-NEXT:    addi a2, a2, -1
-; CHECK-RV64VC-NEXT:    and a0, a0, a2
-; CHECK-RV64VC-NEXT:    sub a2, a0, s1
-; CHECK-RV64VC-NEXT:    sltu a3, a0, a2
-; CHECK-RV64VC-NEXT:    addi a3, a3, -1
-; CHECK-RV64VC-NEXT:    and a2, a2, a3
+; CHECK-RV64VC-NEXT:    and a1, a1, a2
 ; CHECK-RV64VC-NEXT:    bltu a0, s1, .LBB913_10
 ; CHECK-RV64VC-NEXT:  # %bb.9:
 ; CHECK-RV64VC-NEXT:    mv a0, s1
 ; CHECK-RV64VC-NEXT:  .LBB913_10:
-; CHECK-RV64VC-NEXT:    csrr a3, vlenb
-; CHECK-RV64VC-NEXT:    slli a3, a3, 3
-; CHECK-RV64VC-NEXT:    mv a4, a3
-; CHECK-RV64VC-NEXT:    slli a3, a3, 1
-; CHECK-RV64VC-NEXT:    add a3, a3, a4
-; CHECK-RV64VC-NEXT:    add a3, a3, sp
-; CHECK-RV64VC-NEXT:    addi a3, a3, 16
-; CHECK-RV64VC-NEXT:    vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
-; CHECK-RV64VC-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT:    csrr a2, vlenb
+; CHECK-RV64VC-NEXT:    slli a2, a2, 3
+; CHECK-RV64VC-NEXT:    mv a3, a2
+; CHECK-RV64VC-NEXT:    slli a2, a2, 1
+; CHECK-RV64VC-NEXT:    add a2, a2, a3
+; CHECK-RV64VC-NEXT:    add a2, a2, sp
+; CHECK-RV64VC-NEXT:    addi a2, a2, 16
+; CHECK-RV64VC-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.all
 ; CHECK-RV64VC-NEXT:    vluxei64.v v11, (zero), v24
 ; CHECK-RV64VC-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.all
 ; CHECK-RV64VC-NEXT:    vluxei64.v v10, (zero), v16
-; CHECK-RV64VC-NEXT:    bltu s0, a1, .LBB913_12
+; CHECK-RV64VC-NEXT:    bltu s0, a7, .LBB913_12
 ; CHECK-RV64VC-NEXT:  # %bb.11:
-; CHECK-RV64VC-NEXT:    mv s0, a1
+; CHECK-RV64VC-NEXT:    mv s0, a7
 ; CHECK-RV64VC-NEXT:  .LBB913_12:
 ; CHECK-RV64VC-NEXT:    sub a0, s0, s1
 ; CHECK-RV64VC-NEXT:    sltu a1, s0, a0
@@ -37011,33 +37011,33 @@ define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_DEFAULT(<vscale x
 ; CHECK-RV64VC-NEXT:    li a1, 40
 ; CHECK-RV64VC-NEXT:    mv a0, s1
 ; CHECK-RV64VC-NEXT:    call __muldi3
-; CHECK-RV64VC-NEXT:    slli a7, s1, 2
-; CHECK-RV64VC-NEXT:    sub a1, s0, a7
+; CHECK-RV64VC-NEXT:    slli a6, s1, 2
+; CHECK-RV64VC-NEXT:    sub a1, s0, a6
 ; CHECK-RV64VC-NEXT:    sltu a2, s0, a1
 ; CHECK-RV64VC-NEXT:    addi a2, a2, -1
 ; CHECK-RV64VC-NEXT:    and a3, a2, a1
-; CHECK-RV64VC-NEXT:    slli a1, s1, 1
-; CHECK-RV64VC-NEXT:    sub a2, a3, a1
-; CHECK-RV64VC-NEXT:    sltu a4, a3, a2
-; CHECK-RV64VC-NEXT:    addi a4, a4, -1
-; CHECK-RV64VC-NEXT:    and a2, a2, a4
-; CHECK-RV64VC-NEXT:    sub t0, a2, s1
-; CHECK-RV64VC-NEXT:    mv a5, a2
-; CHECK-RV64VC-NEXT:    bltu a2, s1, .LBB914_2
+; CHECK-RV64VC-NEXT:    slli a7, s1, 1
+; CHECK-RV64VC-NEXT:    sub a1, a3, a7
+; CHECK-RV64VC-NEXT:    sltu a2, a3, a1
+; CHECK-RV64VC-NEXT:    addi a2, a2, -1
+; CHECK-RV64VC-NEXT:    and a1, a1, a2
+; CHECK-RV64VC-NEXT:    sub a4, a1, s1
+; CHECK-RV64VC-NEXT:    mv a5, a1
+; CHECK-RV64VC-NEXT:    bltu a1, s1, .LBB914_2
 ; CHECK-RV64VC-NEXT:  # %bb.1:
 ; CHECK-RV64VC-NEXT:    mv a5, s1
 ; CHECK-RV64VC-NEXT:  .LBB914_2:
-; CHECK-RV64VC-NEXT:    sltu a6, a2, t0
-; CHECK-RV64VC-NEXT:    bltu a3, a1, .LBB914_4
+; CHECK-RV64VC-NEXT:    sltu a2, a1, a4
+; CHECK-RV64VC-NEXT:    bltu a3, a7, .LBB914_4
 ; CHECK-RV64VC-NEXT:  # %bb.3:
-; CHECK-RV64VC-NEXT:    mv a3, a1
+; CHECK-RV64VC-NEXT:    mv a3, a7
 ; CHECK-RV64VC-NEXT:  .LBB914_4:
 ; CHECK-RV64VC-NEXT:    add a0, a0, s2
-; CHECK-RV64VC-NEXT:    addi a6, a6, -1
-; CHECK-RV64VC-NEXT:    sub a2, a3, s1
-; CHECK-RV64VC-NEXT:    sltu a4, a3, a2
-; CHECK-RV64VC-NEXT:    addi a4, a4, -1
-; CHECK-RV64VC-NEXT:    and a2, a2, a4
+; CHECK-RV64VC-NEXT:    addi t0, a2, -1
+; CHECK-RV64VC-NEXT:    sub a1, a3, s1
+; CHECK-RV64VC-NEXT:    sltu a2, a3, a1
+; CHECK-RV64VC-NEXT:    addi a2, a2, -1
+; CHECK-RV64VC-NEXT:    and a1, a1, a2
 ; CHECK-RV64VC-NEXT:    bltu a3, s1, .LBB914_6
 ; CHECK-RV64VC-NEXT:  # %bb.5:
 ; CHECK-RV64VC-NEXT:    mv a3, s1
@@ -37053,7 +37053,7 @@ define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_DEFAULT(<vscale x
 ; CHECK-RV64VC-NEXT:    add a0, a0, sp
 ; CHECK-RV64VC-NEXT:    addi a0, a0, 16
 ; CHECK-RV64VC-NEXT:    vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
-; CHECK-RV64VC-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.all
 ; CHECK-RV64VC-NEXT:    vluxei64.v v13, (zero), v24
 ; CHECK-RV64VC-NEXT:    csrr a0, vlenb
@@ -37064,44 +37064,44 @@ define <vscale x 64 x i8> @test_nontemporal_vp_gather_nxv64i8_DEFAULT(<vscale x
 ; CHECK-RV64VC-NEXT:    vsetvli zero, a3, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.all
 ; CHECK-RV64VC-NEXT:    vluxei64.v v12, (zero), v24
-; CHECK-RV64VC-NEXT:    and a0, a6, t0
-; CHECK-RV64VC-NEXT:    bltu s0, a7, .LBB914_8
+; CHECK-RV64VC-NEXT:    and a2, t0, a4
+; CHECK-RV64VC-NEXT:    bltu s0, a6, .LBB914_8
 ; CHECK-RV64VC-NEXT:  # %bb.7:
-; CHECK-RV64VC-NEXT:    mv s0, a7
+; CHECK-RV64VC-NEXT:    mv s0, a6
 ; CHECK-RV64VC-NEXT:  .LBB914_8:
-; CHECK-RV64VC-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.all
 ; CHECK-RV64VC-NEXT:    vluxei64.v v15, (zero), v16
 ; CHECK-RV64VC-NEXT:    vl8re64.v v16, (s2)
-; CHECK-RV64VC-NEXT:    sub a0, s0, a1
-; CHECK-RV64VC-NEXT:    sltu a2, s0, a0
+; CHECK-RV64VC-NEXT:    sub a0, s0, a7
+; CHECK-RV64VC-NEXT:    sltu a1, s0, a0
+; CHECK-RV64VC-NEXT:    addi a1, a1, -1
+; CHECK-RV64VC-NEXT:    and a0, a0, a1
+; CHECK-RV64VC-NEXT:    sub a1, a0, s1
+; CHECK-RV64VC-NEXT:    sltu a2, a0, a1
 ; CHECK-RV64VC-NEXT:    addi a2, a2, -1
-; CHECK-RV64VC-NEXT:    and a0, a0, a2
-; CHECK-RV64VC-NEXT:    sub a2, a0, s1
-; CHECK-RV64VC-NEXT:    sltu a3, a0, a2
-; CHECK-RV64VC-NEXT:    addi a3, a3, -1
-; CHECK-RV64VC-NEXT:    and a2, a2, a3
+; CHECK-RV64VC-NEXT:    and a1, a1, a2
 ; CHECK-RV64VC-NEXT:    bltu a0, s1, .LBB914_10
 ; CHECK-RV64VC-NEXT:  # %bb.9:
 ; CHECK-RV64VC-NEXT:    mv a0, s1
 ; CHECK-RV64VC-NEXT:  .LBB914_10:
-; CHECK-RV64VC-NEXT:    csrr a3, vlenb
-; CHECK-RV64VC-NEXT:    slli a3, a3, 3
-; CHECK-RV64VC-NEXT:    mv a4, a3
-; CHECK-RV64VC-NEXT:    slli a3, a3, 1
-; CHECK-RV64VC-NEXT:    add a3, a3, a4
-; CHECK-RV64VC-NEXT:    add a3, a3, sp
-; CHECK-RV64VC-NEXT:    addi a3, a3, 16
-; CHECK-RV64VC-NEXT:    vl8r.v v24, (a3) # vscale x 64-byte Folded Reload
-; CHECK-RV64VC-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-RV64VC-NEXT:    csrr a2, vlenb
+; CHECK-RV64VC-NEXT:    slli a2, a2, 3
+; CHECK-RV64VC-NEXT:    mv a3, a2
+; CHECK-RV64VC-NEXT:    slli a2, a2, 1
+; CHECK-RV64VC-NEXT:    add a2, a2, a3
+; CHECK-RV64VC-NEXT:    add a2, a2, sp
+; CHECK-RV64VC-NEXT:    addi a2, a2, 16
+; CHECK-RV64VC-NEXT:    vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK-RV64VC-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.all
 ; CHECK-RV64VC-NEXT:    vluxei64.v v11, (zero), v24
 ; CHECK-RV64VC-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
 ; CHECK-RV64VC-NEXT:    c.ntl.all
 ; CHECK-RV64VC-NEXT:    vluxei64.v v10, (zero), v16
-; CHECK-RV64VC-NEXT:    bltu s0, a1, .LBB914_12
+; CHECK-RV64VC-NEXT:    bltu s0, a7, .LBB914_12
 ; CHECK-RV64VC-NEXT:  # %bb.11:
-; CHECK-RV64VC-NEXT:    mv s0, a1
+; CHECK-RV64VC-NEXT:    mv s0, a7
 ; CHECK-RV64VC-NEXT:  .LBB914_12:
 ; CHECK-RV64VC-NEXT:    sub a0, s0, s1
 ; CHECK-RV64VC-NEXT:    sltu a1, s0, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
index ead79fcf53d8b..1bbce1021f5d8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
@@ -308,31 +308,31 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
 ; RV64X60-NEXT:    addi s1, a7, -1
 ; RV64X60-NEXT:    zext.w s1, s1
 ; RV64X60-NEXT:    mul t3, a1, s1
-; RV64X60-NEXT:    mul t4, a3, s1
-; RV64X60-NEXT:    mul t5, a5, s1
-; RV64X60-NEXT:    add s0, a0, a6
+; RV64X60-NEXT:    mul s0, a3, s1
+; RV64X60-NEXT:    mul s1, a5, s1
+; RV64X60-NEXT:    add t4, a0, a6
 ; RV64X60-NEXT:    csrr t2, vlenb
-; RV64X60-NEXT:    add s1, a2, a6
-; RV64X60-NEXT:    add t3, t3, s0
-; RV64X60-NEXT:    add s0, a4, a6
-; RV64X60-NEXT:    add t4, t4, s1
-; RV64X60-NEXT:    li t6, 32
+; RV64X60-NEXT:    add t5, a2, a6
+; RV64X60-NEXT:    add t3, t3, t4
+; RV64X60-NEXT:    add t4, a4, a6
 ; RV64X60-NEXT:    add t5, t5, s0
-; RV64X60-NEXT:    sltu s0, a0, t4
-; RV64X60-NEXT:    sltu s1, a2, t3
-; RV64X60-NEXT:    and t4, s0, s1
-; RV64X60-NEXT:    or s2, a1, a3
-; RV64X60-NEXT:    sltu s0, a0, t5
-; RV64X60-NEXT:    sltu s1, a4, t3
-; RV64X60-NEXT:    srli t3, s2, 63
-; RV64X60-NEXT:    and s0, s0, s1
-; RV64X60-NEXT:    or s1, a1, a5
-; RV64X60-NEXT:    or t4, t4, t3
+; RV64X60-NEXT:    li t6, 32
+; RV64X60-NEXT:    add t4, t4, s1
+; RV64X60-NEXT:    sltu s1, a0, t5
+; RV64X60-NEXT:    sltu s0, a2, t3
+; RV64X60-NEXT:    and s2, s1, s0
+; RV64X60-NEXT:    or t5, a1, a3
+; RV64X60-NEXT:    sltu s1, a0, t4
+; RV64X60-NEXT:    sltu s0, a4, t3
+; RV64X60-NEXT:    srli t3, t5, 63
+; RV64X60-NEXT:    and t5, s1, s0
+; RV64X60-NEXT:    or t4, a1, a5
+; RV64X60-NEXT:    or s2, s2, t3
 ; RV64X60-NEXT:    slli t3, t2, 1
-; RV64X60-NEXT:    srli s1, s1, 63
-; RV64X60-NEXT:    or s0, s0, s1
+; RV64X60-NEXT:    srli s0, t4, 63
+; RV64X60-NEXT:    or s0, t5, s0
 ; RV64X60-NEXT:    maxu s1, t3, t6
-; RV64X60-NEXT:    or s0, t4, s0
+; RV64X60-NEXT:    or s0, s2, s0
 ; RV64X60-NEXT:    sltu s1, a6, s1
 ; RV64X60-NEXT:    or s0, s0, s1
 ; RV64X60-NEXT:    add t4, a0, a6



More information about the llvm-commits mailing list